def test_model_nrms(mind_resource_path):
    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
    train_behaviors_file = os.path.join(mind_resource_path, "train",
                                        r"behaviors.tsv")
    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(mind_resource_path, "valid",
                                        r"behaviors.tsv")
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"nrms.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    assert hparams is not None

    iterator = MINDIterator
    model = NRMSModel(hparams, iterator)

    assert model.run_eval(valid_news_file, valid_behaviors_file) is not None
    assert isinstance(
        model.fit(train_news_file, train_behaviors_file, valid_news_file,
                  valid_behaviors_file),
        BaseModel,
    )
Exemple #2
0
def test_model_nrms(tmp):
    yaml_file = os.path.join(tmp, "nrms.yaml")
    train_file = os.path.join(tmp, "train.txt")
    valid_file = os.path.join(tmp, "test.txt")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "nrms.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    assert hparams is not None

    iterator = NewsIterator
    model = NRMSModel(hparams, iterator)

    assert model.run_eval(valid_file) is not None
    assert isinstance(model.fit(train_file, valid_file), BaseModel)
Exemple #3
0
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'),
                               mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs,
                          show_step=10)
print("[NRMS] Config,", hparams)

iterator = MINDIterator
model = NRMSModel(hparams, iterator, seed=seed)

print("[NRMS] First run:",
      model.run_eval(valid_news_file, fast_valid_behaviors_file))

model.fit(train_news_file,
          train_behaviors_file,
          valid_news_file,
          fast_valid_behaviors_file,
          model_save_path=model_dir)

# res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
# print(res_syn)
Exemple #4
0
if model_type == 'nrms':
    iterator = MINDIterator
    model = NRMSModel(hparams, iterator, seed=seed)
elif model_type == 'naml':
    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator, seed=seed)
elif model_type == 'npa':
    iterator = MINDIterator
    model = NPAModel(hparams, iterator, seed=seed)
elif model_type == 'nrmma':
    iterator = MINDAllIterator
    model = NRMMAModel(hparams, iterator, seed=seed)

else:
    raise NotImplementedError(f"{exp_name} is not implemented")

# In[8]:
model_path = os.path.join(exp_path, model_type)
model_name = model_type + '_ckpt'
model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file,
          model_path=model_path, model_name=model_name)
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
logging.info(res_syn)


# ## Reference
# \[1\] Wu et al. "Neural News Recommendation with Multi-Head Self-Attention." in Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)<br>
# \[2\] Wu, Fangzhao, et al. "MIND: A Large-scale Dataset for News Recommendation" Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. https://msnews.github.io/competition.html <br>
# \[3\] GloVe: Global Vectors for Word Representation. https://nlp.stanford.edu/projects/glove/
        os.path.join(data_root, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file,
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file,
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs,
                          show_step=10)
logger.debug(f"hparams: {hparams}")

iterator = MINDIterator

model = NRMSModel(hparams, iterator, seed=seed)

logger.info(model.run_eval(valid_news_file, valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file,
          valid_behaviors_file)

res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
logger.debug(f"res_syn: {res_syn}")

sb.glue("res_syn", res_syn)

model_path = os.path.join(BASE_DIR, "ckpt")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "nrms_ckpt"))

group_impr_indexes, group_labels, group_preds = model.run_fast_eval(