Esempio n. 1
0
def download_data(mind_type="small"):
    mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(mind_type)
    data_path = get_data_path(mind_type)
    train_news_file, _ = get_path("train", mind_type)
    valid_news_file, _ = get_path("valid", mind_type)
    test_news_file, _ = get_path("test", mind_type)
    if not os.path.exists(train_news_file):
        download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(mind_url, os.path.join(data_path, 'valid'), mind_dev_dataset)
    if mind_type == "large":
        if not os.path.exists(test_news_file):
            download_deeprec_resources(mind_url, os.path.join(data_path, 'test'), mind_dev_dataset)
    if not os.path.exists(get_yaml_path()):
        utils_url = r'https://recodatasets.blob.core.windows.net/newsrec/'
        download_deeprec_resources(utils_url, os.path.join(get_root_path(), 'utils'), mind_utils)
Esempio n. 2
0
MIND_type = 'large'

data_path = "./test_mind"

train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
fast_valid_behaviors_file = os.path.join(data_path, 'valid',
                                         r'behaviors.small.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", r'npa.yaml')
model_dir = os.path.join(data_path, "nrms")

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(
    MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'),
                               mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs,