def test_npa_component_definition(mind_resource_path):
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    iterator = MINDIterator
    model = NPAModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
def test_naml_component_definition(tmp):
    wordEmb_file = os.path.join(tmp, "utils", "embedding_all.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict_all.pkl")
    vertDict_file = os.path.join(tmp, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(tmp, "utils", "subvert_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"naml.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
    )
    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
Example #3
0
def test_prepare_hparams(must_exist_attributes, deeprec_resource_path):
    wordEmb_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                "embedding.npy")
    userDict_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                 "uid2index.pkl")
    wordDict_file = os.path.join(deeprec_resource_path, "mind", "utils",
                                 "word_dict.pkl")
    yaml_file = os.path.join(deeprec_resource_path, "mind", "utils",
                             r"nrms.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(deeprec_resource_path, "mind", "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    assert hasattr(hparams, must_exist_attributes)
Example #4
0
def get_params(yaml_name, device_id=0, model_class=None):
    if yaml_name:
        yaml_path = os.path.join(get_root_path(), "utils", yaml_name)
    else:
        yaml_path = get_yaml_path()

    return prepare_hparams(yaml_path, wordEmb_file=get_emb_path(), wordDict_file=get_dict_file(), device_id=device_id,
                           epochs=epochs, show_step=10, userDict_file=get_user_dic_path(), model_class=model_class)
def test_naml_iterator(mind_resource_path):
    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
    train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl")
    vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
        batch_size=1024,
    )
    train_iterator = MINDAllIterator(hparams, hparams.npratio)
    test_iterator = MINDAllIterator(hparams, -1)

    assert train_iterator is not None
    for res in train_iterator.load_data_from_file(
        train_news_file, train_behaviors_file
    ):
        assert isinstance(res, dict)
        assert len(res) == 11
        break

    assert test_iterator is not None
    for res in test_iterator.load_data_from_file(valid_news_file, valid_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 11
        break
def test_model_naml(mind_resource_path):
    train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv")
    train_behaviors_file = os.path.join(mind_resource_path, "train",
                                        r"behaviors.tsv")
    valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(mind_resource_path, "valid",
                                        r"behaviors.tsv")
    wordEmb_file = os.path.join(mind_resource_path, "utils",
                                "embedding_all.npy")
    userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(mind_resource_path, "utils",
                                 "word_dict_all.pkl")
    vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl")
    subvertDict_file = os.path.join(mind_resource_path, "utils",
                                    "subvert_dict.pkl")
    yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.z20.web.core.windows.net/newsrec/",
            os.path.join(mind_resource_path, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        vertDict_file=vertDict_file,
        subvertDict_file=subvertDict_file,
        epochs=1,
    )

    iterator = MINDAllIterator
    model = NAMLModel(hparams, iterator)
    assert model.run_eval(valid_news_file, valid_behaviors_file) is not None
    assert isinstance(
        model.fit(train_news_file, train_behaviors_file, valid_news_file,
                  valid_behaviors_file),
        BaseModel,
    )
def test_news_iterator(tmp):
    train_news_file = os.path.join(tmp, "train", r"news.tsv")
    train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(tmp, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(tmp, "utils", "embedding.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"nrms.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    train_iterator = MINDIterator(hparams, hparams.npratio)
    test_iterator = MINDIterator(hparams, -1)

    assert train_iterator is not None
    for res in train_iterator.load_data_from_file(train_news_file,
                                                  train_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 5
        break

    assert test_iterator is not None
    for res in test_iterator.load_data_from_file(valid_news_file,
                                                 valid_behaviors_file):
        assert isinstance(res, dict)
        assert len(res) == 5
        break
def test_model_nrms(tmp):
    train_news_file = os.path.join(tmp, "train", r"news.tsv")
    train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv")
    valid_news_file = os.path.join(tmp, "valid", r"news.tsv")
    valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv")
    wordEmb_file = os.path.join(tmp, "utils", "embedding.npy")
    userDict_file = os.path.join(tmp, "utils", "uid2index.pkl")
    wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl")
    yaml_file = os.path.join(tmp, "utils", r"nrms.yaml")

    if not os.path.exists(train_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "train"),
            "MINDdemo_train.zip",
        )
    if not os.path.exists(valid_news_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "valid"),
            "MINDdemo_dev.zip",
        )
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r"https://recodatasets.blob.core.windows.net/newsrec/",
            os.path.join(tmp, "utils"),
            "MINDdemo_utils.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        wordDict_file=wordDict_file,
        userDict_file=userDict_file,
        epochs=1,
    )
    assert hparams is not None

    iterator = MINDIterator
    model = NRMSModel(hparams, iterator)

    assert model.run_eval(valid_news_file, valid_behaviors_file) is not None
    assert isinstance(
        model.fit(train_news_file, train_behaviors_file, valid_news_file,
                  valid_behaviors_file),
        BaseModel,
    )
Example #9
0
def test_lstur_component_definition(tmp):
    yaml_file = os.path.join(tmp, "lstur.yaml")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "lstur.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    iterator = NewsIterator
    model = LSTURModel(hparams, iterator)

    assert model.model is not None
    assert model.scorer is not None
    assert model.loss is not None
    assert model.train_optimizer is not None
Example #10
0
def test_model_npa(tmp):
    yaml_file = os.path.join(tmp, "npa.yaml")
    train_file = os.path.join(tmp, "train.txt")
    valid_file = os.path.join(tmp, "test.txt")
    wordEmb_file = os.path.join(tmp, "embedding.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/newsrec/", tmp,
            "npa.zip")

    hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1)
    assert hparams is not None

    iterator = NewsIterator
    model = NPAModel(hparams, iterator)

    assert model.run_eval(valid_file) is not None
    assert isinstance(model.fit(train_file, valid_file), BaseModel)
Example #11
0
train_behaviors_file = os.path.join(data_path, 'train', r'final_behaviors.tsv')
# valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'final_behaviors.tsv')
# test_news_file = os.path.join(data_path, 'test', r'news.tsv')
# test_behaviors_file = os.path.join(data_path, 'test', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict_all.pkl")
subvertDict_file = os.path.join(data_path, "utils", "subvert_dict.pkl")
vertDict_file = os.path.join(data_path, "utils", "vert_dict.pkl")
yaml_file = os.path.join(data_path, "utils", '{}.yaml'.format(opt.model_name))

hparams = prepare_hparams(yaml_file,
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file,
                          userDict_file=userDict_file,
                          vertDict_file=vertDict_file,
                          subvertDict_file=subvertDict_file,
                          batch_size=batch_size,
                          epochs=epochs)
print(hparams)

iterator = iterator = MINDAllIterator
model = NAMLModel(hparams, iterator, seed=seed)

# print(model.run_slow_eval(news_file, valid_behaviors_file))

model.fit(news_file, train_behaviors_file, news_file, valid_behaviors_file)

# model_path = os.path.join(model_path, "model")
# os.makedirs(model_path, exist_ok=True)
Example #12
0
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
test_news_file = os.path.join(data_path, 'test', r'news.tsv')
test_behaviors_file = os.path.join(data_path, 'test', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding_all.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict_all.pkl")
yaml_file = os.path.join(data_path, "utils", r'nrms.yaml')
entityDict_file = os.path.join(data_path, "utils", "entity_dict_all.pkl")
entity_embedding_file = os.path.join(data_path, "utils", "entity_embeddings_5w_100_all.npy")
context_embedding_file = os.path.join(data_path, "utils", "context_embeddings_5w_100_all.npy")

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs, entityEmb_file=entity_embedding_file, contextEmb_file=context_embedding_file, \
                          entityDict_file=entityDict_file,
                          show_step=10)
print(hparams)

iterator = MINDIterator
model = NRMSModel(hparams, iterator, seed=seed)
model_path = os.path.join(data_path, "model")
model.model.load_weights(os.path.join(model_path, "my_nrms_ckpt"))
f = open(test_behaviors_file)
print('total test samples:', len(f.readlines()))
f.close()

group_impr_indexes, group_labels, group_preds = model.run_fast_eval(test_news_file, test_behaviors_file, test=1)

import numpy as np
Example #13
0
mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(
    MIND_type)

if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'),
                               mind_train_dataset)

if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, \
                          epochs=epochs,
                          show_step=10)
print("[NRMS] Config,", hparams)

iterator = MINDIterator
model = NRMSModel(hparams, iterator, seed=seed)

print("[NRMS] First run:",
      model.run_eval(valid_news_file, fast_valid_behaviors_file))

model.fit(train_news_file,
          train_behaviors_file,
          valid_news_file,
          fast_valid_behaviors_file,
          model_save_path=model_dir)
Example #14
0
if not os.path.exists(train_news_file):
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset)   
if not os.path.exists(valid_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(test_news_file):
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'test'), mind_test_dataset)
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

hparams = prepare_hparams(yaml_file, 
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file, 
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs,
                          show_step=10)
print(hparams)

iterator = MINDIterator
model = NRMSModel(hparams, iterator, seed=seed)

# print(model.run_eval(valid_news_file, valid_behaviors_file))

model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)
# res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
# print(res_syn)

# pm.record("res_syn", res_syn)
Example #15
0
if not os.path.exists(train_news_file):
    print("not os.path.exists(train_news_file)")
    download_deeprec_resources(mind_url, os.path.join(data_path, 'train'),
                               mind_train_dataset)

if not os.path.exists(valid_news_file):
    print("not os.path.exists(valid_news_file)")
    download_deeprec_resources(mind_url, \
                               os.path.join(data_path, 'valid'), mind_dev_dataset)
if not os.path.exists(yaml_file):
    print("not os.path.exists(yaml_file)")
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \
                               os.path.join(data_path, 'utils'), mind_utils)

## Create hyper-parameters
hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \
                          wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=epochs)
print('hparams:', hparams)
# We generate a word_dict file to tranform words in news title to word indexes, and a embedding matrix is initted from pretrained glove embeddings.
# 我们生成一个word_dict文件,将新闻标题中的单词转换为单词索引,并从预先训练的glove嵌入中初始化embedding矩阵。

iterator = MINDIterator

## Train the LSTUR model
model = LSTURModel(hparams, iterator, seed=seed)

print(
    model.run_eval(valid_news_file,
                   valid_behaviors_file))  # news:18723, behaviors:7538(多了第一列)

model.fit(train_news_file, train_behaviors_file, valid_news_file,
          valid_behaviors_file)