def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
Example #2
0
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'train_mind_demo.txt')
    valid_file = os.path.join(data_path, r'valid_mind_demo.txt')
    test_file = os.path.join(data_path, r'test_mind_demo.txt')
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Example #5
0
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_dkn_component_definition(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy")
    contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy")

    download_deeprec_resources(
        "https://recodatasets.z20.web.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None

    ###  test DKN's item2item version
    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        is_clip_norm=True,
        max_grad_norm=0.5,
        his_size=20,
        MODEL_DIR=os.path.join(data_path, "save_models"),
        use_entity=True,
        use_context=True,
    )
    hparams.neg_num = 9
    assert hparams is not None
    model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator)

    assert model_item2item.pred_logits is not None
    assert model_item2item.update is not None
    assert model_item2item.iterator is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, '../resources/deeprec/dkn')
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'final_test_with_entity.txt')
    valid_file = os.path.join(data_path, r'final_test_with_entity.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'dknresources.zip')

    hparams = prepare_hparams(yaml_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert (isinstance(model.fit(train_file, valid_file), BaseModel))
    assert model.run_eval(valid_file) is not None
Example #8
0
#     news_entities,
#     train_entities,
#     valid_entities,
#     max_sentence=10,
#     word_embedding_dim=100,
# )

news_feature_file = os.path.join(data_path, 'doc_feature.txt')
word_embeddings_file = os.path.join(data_path, 'word_embeddings_5w_100.npy')
user_history_file = os.path.join(data_path, 'user_history.txt')
entity_embeddings_file = os.path.join(data_path,
                                      'entity_embeddings_5w_100.npy')
yaml_file = os.path.join(data_path, 'dkn_MINDlarge.yaml')
# yaml_file = maybe_download(url="https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
#                            work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          MODEL_DIR=os.path.join(data_path, 'save_models'),
                          batch_size=batch_size)
model = DKN(hparams, DKNTextIterator)

model.fit(train_file, valid_file)

res = model.run_eval(valid_file)
print(res)
Example #9
0
word_embeddings_file = os.path.join(data_path, "word_embeddings_5w_100.npy")
entity_embeddings_file = os.path.join(data_path,
                                      "entity_embeddings_5w_100.npy")

train_path = os.path.join(data_path, "train")
valid_path = os.path.join(data_path, "valid")
test_path = os.path.join(data_path, "test")

yaml_file = maybe_download(
    url=
    "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
    work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)

hparams.save_model = True
hparams.show_step = 5000
hparams.MODEL_DIR = 'para'

model = DKN(hparams, DKNTextIterator)
model.load_model('./para/epoch_5')

# model.run_test(valid_file, 14085557, save_model=True, validate=True)
model.run_test(test_file, 10388965, save_model=False, validate=True)
Example #10
0
        news_words,
        news_entities,
        train_entities,
        valid_entities,
        test_entities=test_entities,
        max_sentence=10,
        word_embedding_dim=100,
    )

yaml_file = maybe_download(
    url=
    "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
    work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)

hparams.save_model = True
hparams.show_step = 20000
hparams.MODEL_DIR = 'para'
hparams.save_epoch = 1
hparams.write_tfevents = False

model = DKN(hparams, DKNTextIterator)
model.fit(train_file, valid_file, 16918280)
Example #11
0
        news_words,
        news_entities,
        train_entities,
        valid_entities,
        test_entities=test_entities,
        max_sentence=10,
        word_embedding_dim=100,
    )

yaml_file = maybe_download(
    url=
    "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
    work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)

hparams.save_model = True
hparams.show_step = 20000
hparams.MODEL_DIR = 'para'
hparams.save_epoch = 1
hparams.write_tfevents = False

model = DKN(hparams, DKNTextIterator)
model.fit(train_file, valid_file, 2853385)
Example #12
0
entity_embeddings_file = os.path.join(data_path,
                                      "entity_embeddings_5w_100.npy")

train_path = os.path.join(data_path, "train")
valid_path = os.path.join(data_path, "valid")
test_path = os.path.join(data_path, "test")

yaml_file = maybe_download(
    url=
    "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
    work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)

hparams.save_model = True
hparams.show_step = 5000
hparams.MODEL_DIR = 'para'

model = DKN(hparams, DKNTextIterator)
model.load_model('./para/epoch_4')

# model.run_test(valid_file, 14085557, save_model=True, validate=True)
model.run_test(test_file, 93115001, save_model=True, validate=False)
# model.run_eval(test_file)