def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") train_file = os.path.join(data_path, "final_test_with_entity.txt") valid_file = os.path.join(data_path, "final_test_with_entity.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'train_mind_demo.txt') valid_file = os.path.join(data_path, r'valid_mind_demo.txt') test_file = os.path.join(data_path, r'test_mind_demo.txt') news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_dkn_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None ### test DKN's item2item version hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 assert hparams is not None model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator) assert model_item2item.pred_logits is not None assert model_item2item.update is not None assert model_item2item.iterator is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, '../resources/deeprec/dkn') yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'final_test_with_entity.txt') valid_file = os.path.join(data_path, r'final_test_with_entity.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'dknresources.zip') hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert (isinstance(model.fit(train_file, valid_file), BaseModel)) assert model.run_eval(valid_file) is not None
# news_entities, # train_entities, # valid_entities, # max_sentence=10, # word_embedding_dim=100, # ) news_feature_file = os.path.join(data_path, 'doc_feature.txt') word_embeddings_file = os.path.join(data_path, 'word_embeddings_5w_100.npy') user_history_file = os.path.join(data_path, 'user_history.txt') entity_embeddings_file = os.path.join(data_path, 'entity_embeddings_5w_100.npy') yaml_file = os.path.join(data_path, 'dkn_MINDlarge.yaml') # yaml_file = maybe_download(url="https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", # work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, MODEL_DIR=os.path.join(data_path, 'save_models'), batch_size=batch_size) model = DKN(hparams, DKNTextIterator) model.fit(train_file, valid_file) res = model.run_eval(valid_file) print(res)
word_embeddings_file = os.path.join(data_path, "word_embeddings_5w_100.npy") entity_embeddings_file = os.path.join(data_path, "entity_embeddings_5w_100.npy") train_path = os.path.join(data_path, "train") valid_path = os.path.join(data_path, "valid") test_path = os.path.join(data_path, "test") yaml_file = maybe_download( url= "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, batch_size=batch_size) hparams.save_model = True hparams.show_step = 5000 hparams.MODEL_DIR = 'para' model = DKN(hparams, DKNTextIterator) model.load_model('./para/epoch_5') # model.run_test(valid_file, 14085557, save_model=True, validate=True) model.run_test(test_file, 10388965, save_model=False, validate=True)
news_words, news_entities, train_entities, valid_entities, test_entities=test_entities, max_sentence=10, word_embedding_dim=100, ) yaml_file = maybe_download( url= "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, batch_size=batch_size) hparams.save_model = True hparams.show_step = 20000 hparams.MODEL_DIR = 'para' hparams.save_epoch = 1 hparams.write_tfevents = False model = DKN(hparams, DKNTextIterator) model.fit(train_file, valid_file, 16918280)
news_words, news_entities, train_entities, valid_entities, test_entities=test_entities, max_sentence=10, word_embedding_dim=100, ) yaml_file = maybe_download( url= "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, batch_size=batch_size) hparams.save_model = True hparams.show_step = 20000 hparams.MODEL_DIR = 'para' hparams.save_epoch = 1 hparams.write_tfevents = False model = DKN(hparams, DKNTextIterator) model.fit(train_file, valid_file, 2853385)
entity_embeddings_file = os.path.join(data_path, "entity_embeddings_5w_100.npy") train_path = os.path.join(data_path, "train") valid_path = os.path.join(data_path, "valid") test_path = os.path.join(data_path, "test") yaml_file = maybe_download( url= "https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml", work_directory=data_path) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=word_embeddings_file, entityEmb_file=entity_embeddings_file, epochs=epochs, history_size=history_size, batch_size=batch_size) hparams.save_model = True hparams.show_step = 5000 hparams.MODEL_DIR = 'para' model = DKN(hparams, DKNTextIterator) model.load_model('./para/epoch_4') # model.run_test(valid_file, 14085557, save_model=True, validate=True) model.run_test(test_file, 93115001, save_model=True, validate=False) # model.run_eval(test_file)