def test_prepare_hparams(must_exist_attributes, deeprec_resource_path): wordEmb_file = os.path.join(deeprec_resource_path, "mind", "utils", "embedding.npy") userDict_file = os.path.join(deeprec_resource_path, "mind", "utils", "uid2index.pkl") wordDict_file = os.path.join(deeprec_resource_path, "mind", "utils", "word_dict.pkl") yaml_file = os.path.join(deeprec_resource_path, "mind", "utils", r"nrms.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(deeprec_resource_path, "mind", "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=1, ) assert hasattr(hparams, must_exist_attributes)
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") train_file = os.path.join(data_path, "final_test_with_entity.txt") valid_file = os.path.join(data_path, "final_test_with_entity.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_npa_component_definition(mind_resource_path): wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding.npy") userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl") wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict.pkl") yaml_file = os.path.join(mind_resource_path, "utils", r"npa.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=1, ) iterator = MINDIterator model = NPAModel(hparams, iterator) assert model.model is not None assert model.scorer is not None assert model.loss is not None assert model.train_optimizer is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'train_mind_demo.txt') valid_file = os.path.join(data_path, r'valid_mind_demo.txt') test_file = os.path.join(data_path, r'test_mind_demo.txt') news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") train_file = os.path.join(data_path, "final_test_with_entity.txt") valid_file = os.path.join(data_path, "final_test_with_entity.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams(yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_dkn_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=5, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_naml_component_definition(tmp): wordEmb_file = os.path.join(tmp, "utils", "embedding_all.npy") userDict_file = os.path.join(tmp, "utils", "uid2index.pkl") wordDict_file = os.path.join(tmp, "utils", "word_dict_all.pkl") vertDict_file = os.path.join(tmp, "utils", "vert_dict.pkl") subvertDict_file = os.path.join(tmp, "utils", "subvert_dict.pkl") yaml_file = os.path.join(tmp, "utils", r"naml.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, vertDict_file=vertDict_file, subvertDict_file=subvertDict_file, epochs=1, ) iterator = MINDAllIterator model = NAMLModel(hparams, iterator) assert model.model is not None assert model.scorer is not None assert model.loss is not None assert model.train_optimizer is not None
def test_DKN_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") data_file = os.path.join(data_path, r"train_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy") yaml_file = os.path.join(data_path, "dkn.yaml") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file="", entityEmb_file="", contextEmb_file="", ) iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res, impression, data_size in iterator.load_data_from_file(data_file): assert isinstance(res, dict) ### test DKN item2item iterator hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 iterator_item2item = DKNItem2itemTextIterator(hparams, tf.Graph()) assert iterator_item2item is not None test_round = 3 for res, impression, data_size in iterator_item2item.load_data_from_file( os.path.join(data_path, "doc_list.txt") ): assert isinstance(res, dict) test_round -= 1 if test_round <= 0: break
def test_prepare_hparams(must_exist_attributes, resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) assert hasattr(hparams, must_exist_attributes)
def test_dkn_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(data_path, "dkn.yaml") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None ### test DKN's item2item version hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 assert hparams is not None model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator) assert model_item2item.pred_logits is not None assert model_item2item.update is not None assert model_item2item.iterator is not None
def test_load_yaml_file(tmp): yaml_file = os.path.join(tmp, "utils", r"nrms.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(tmp, "utils"), "MINDdemo_utils.zip", ) config = load_yaml(yaml_file) assert config is not None
def test_prepare_hparams(deeprec_resource_path, must_exist_attributes): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) assert hasattr(hparams, must_exist_attributes)
def test_load_yaml_file(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) config = load_yaml(yaml_file) assert config is not None
def test_load_yaml_file(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) config = load_yaml(yaml_file) assert config is not None
def test_DKN_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") data_file = os.path.join(data_path, "final_test_with_entity.txt") yaml_file = os.path.join(data_path, "dkn.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "dknresources.zip", ) hparams = prepare_hparams(yaml_file, wordEmb_file="", entityEmb_file="") iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
def test_xdeepfm_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) model = XDeepFMModel(hparams, FFMTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_lstur_component_definition(tmp): yaml_file = os.path.join(tmp, "lstur.yaml") wordEmb_file = os.path.join(tmp, "embedding.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/newsrec/", tmp, "lstur.zip") hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1) iterator = NewsIterator model = LSTURModel(hparams, iterator) assert model.model is not None assert model.scorer is not None assert model.loss is not None assert model.train_optimizer is not None
def test_FFM_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) iterator = FFMTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, tuple)
def test_FFM_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) iterator = FFMTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
def test_xdeepfm_component_definition(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) model = XDeepFMModel(hparams, FFMTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_model_npa(tmp): yaml_file = os.path.join(tmp, "npa.yaml") train_file = os.path.join(tmp, "train.txt") valid_file = os.path.join(tmp, "test.txt") wordEmb_file = os.path.join(tmp, "embedding.npy") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/newsrec/", tmp, "npa.zip") hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, epochs=1) assert hparams is not None iterator = NewsIterator model = NPAModel(hparams, iterator) assert model.run_eval(valid_file) is not None assert isinstance(model.fit(train_file, valid_file), BaseModel)
def test_model_xdeepfm(resource_path): data_path = os.path.join(resource_path, '../resources/deeprec/xdeepfm') yaml_file = os.path.join(data_path, r'xDeepFM.yaml') data_file = os.path.join(data_path, r'sample_FFM_data.txt') output_file = os.path.join(data_path, r'output.txt') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip') hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") output_file = os.path.join(data_path, "output.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") output_file = os.path.join(data_path, "output.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def download_data(mind_type="small"): mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(mind_type) data_path = get_data_path(mind_type) train_news_file, _ = get_path("train", mind_type) valid_news_file, _ = get_path("valid", mind_type) test_news_file, _ = get_path("test", mind_type) if not os.path.exists(train_news_file): download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset) if not os.path.exists(valid_news_file): download_deeprec_resources(mind_url, os.path.join(data_path, 'valid'), mind_dev_dataset) if mind_type == "large": if not os.path.exists(test_news_file): download_deeprec_resources(mind_url, os.path.join(data_path, 'test'), mind_dev_dataset) if not os.path.exists(get_yaml_path()): utils_url = r'https://recodatasets.blob.core.windows.net/newsrec/' download_deeprec_resources(utils_url, os.path.join(get_root_path(), 'utils'), mind_utils)
def test_model_dkn(resource_path): data_path = os.path.join(resource_path, '../resources/deeprec/dkn') yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'final_test_with_entity.txt') valid_file = os.path.join(data_path, r'final_test_with_entity.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'dknresources.zip') hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, epochs=1, learning_rate=0.0001) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert (isinstance(model.fit(train_file, valid_file), BaseModel)) assert model.run_eval(valid_file) is not None
def test_naml_iterator(mind_resource_path): train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv") train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv") valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv") valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv") wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy") userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl") wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl") vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl") subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl") yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml") if not os.path.exists(train_news_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "train"), "MINDdemo_train.zip", ) if not os.path.exists(valid_news_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "valid"), "MINDdemo_dev.zip", ) if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, vertDict_file=vertDict_file, subvertDict_file=subvertDict_file, epochs=1, batch_size=1024, ) train_iterator = MINDAllIterator(hparams, hparams.npratio) test_iterator = MINDAllIterator(hparams, -1) assert train_iterator is not None for res in train_iterator.load_data_from_file( train_news_file, train_behaviors_file ): assert isinstance(res, dict) assert len(res) == 11 break assert test_iterator is not None for res in test_iterator.load_data_from_file(valid_news_file, valid_behaviors_file): assert isinstance(res, dict) assert len(res) == 11 break
def test_DKN_iterator(resource_path): data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn") data_file = os.path.join(data_path, r"train_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") yaml_file = os.path.join(data_path, "dkn.yaml") download_deeprec_resources( "https://recodatasets.blob.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file="", entityEmb_file="", contextEmb_file="", ) iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res, impression, data_size in iterator.load_data_from_file(data_file): assert isinstance(res, dict)
def test_model_naml(mind_resource_path): train_news_file = os.path.join(mind_resource_path, "train", r"news.tsv") train_behaviors_file = os.path.join(mind_resource_path, "train", r"behaviors.tsv") valid_news_file = os.path.join(mind_resource_path, "valid", r"news.tsv") valid_behaviors_file = os.path.join(mind_resource_path, "valid", r"behaviors.tsv") wordEmb_file = os.path.join(mind_resource_path, "utils", "embedding_all.npy") userDict_file = os.path.join(mind_resource_path, "utils", "uid2index.pkl") wordDict_file = os.path.join(mind_resource_path, "utils", "word_dict_all.pkl") vertDict_file = os.path.join(mind_resource_path, "utils", "vert_dict.pkl") subvertDict_file = os.path.join(mind_resource_path, "utils", "subvert_dict.pkl") yaml_file = os.path.join(mind_resource_path, "utils", r"naml.yaml") if not os.path.exists(train_news_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "train"), "MINDdemo_train.zip", ) if not os.path.exists(valid_news_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "valid"), "MINDdemo_dev.zip", ) if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.z20.web.core.windows.net/newsrec/", os.path.join(mind_resource_path, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, vertDict_file=vertDict_file, subvertDict_file=subvertDict_file, epochs=1, ) iterator = MINDAllIterator model = NAMLModel(hparams, iterator) assert model.run_eval(valid_news_file, valid_behaviors_file) is not None assert isinstance( model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file), BaseModel, )
def test_news_iterator(tmp): train_news_file = os.path.join(tmp, "train", r"news.tsv") train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv") valid_news_file = os.path.join(tmp, "valid", r"news.tsv") valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv") wordEmb_file = os.path.join(tmp, "utils", "embedding.npy") userDict_file = os.path.join(tmp, "utils", "uid2index.pkl") wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl") yaml_file = os.path.join(tmp, "utils", r"nrms.yaml") if not os.path.exists(train_news_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "train"), "MINDdemo_train.zip", ) if not os.path.exists(valid_news_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "valid"), "MINDdemo_dev.zip", ) if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=1, ) train_iterator = MINDIterator(hparams, hparams.npratio) test_iterator = MINDIterator(hparams, -1) assert train_iterator is not None for res in train_iterator.load_data_from_file(train_news_file, train_behaviors_file): assert isinstance(res, dict) assert len(res) == 5 break assert test_iterator is not None for res in test_iterator.load_data_from_file(valid_news_file, valid_behaviors_file): assert isinstance(res, dict) assert len(res) == 5 break
def test_model_nrms(tmp): train_news_file = os.path.join(tmp, "train", r"news.tsv") train_behaviors_file = os.path.join(tmp, "train", r"behaviors.tsv") valid_news_file = os.path.join(tmp, "valid", r"news.tsv") valid_behaviors_file = os.path.join(tmp, "valid", r"behaviors.tsv") wordEmb_file = os.path.join(tmp, "utils", "embedding.npy") userDict_file = os.path.join(tmp, "utils", "uid2index.pkl") wordDict_file = os.path.join(tmp, "utils", "word_dict.pkl") yaml_file = os.path.join(tmp, "utils", r"nrms.yaml") if not os.path.exists(train_news_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "train"), "MINDdemo_train.zip", ) if not os.path.exists(valid_news_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "valid"), "MINDdemo_dev.zip", ) if not os.path.exists(yaml_file): download_deeprec_resources( r"https://recodatasets.blob.core.windows.net/newsrec/", os.path.join(tmp, "utils"), "MINDdemo_utils.zip", ) hparams = prepare_hparams( yaml_file, wordEmb_file=wordEmb_file, wordDict_file=wordDict_file, userDict_file=userDict_file, epochs=1, ) assert hparams is not None iterator = MINDIterator model = NRMSModel(hparams, iterator) assert model.run_eval(valid_news_file, valid_behaviors_file) is not None assert isinstance( model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file), BaseModel, )
train_news_file = os.path.join(data_path, 'train', r'news.tsv') train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv') valid_news_file = os.path.join(data_path, 'valid', r'news.tsv') fast_valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.small.tsv') wordEmb_file = os.path.join(data_path, "utils", "embedding.npy") userDict_file = os.path.join(data_path, "utils", "uid2index.pkl") wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl") yaml_file = os.path.join(data_path, "utils", r'npa.yaml') model_dir = os.path.join(data_path, "nrms") mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set( MIND_type) if not os.path.exists(train_news_file): download_deeprec_resources(mind_url, os.path.join(data_path, 'train'), mind_train_dataset) if not os.path.exists(valid_news_file): download_deeprec_resources(mind_url, \ os.path.join(data_path, 'valid'), mind_dev_dataset) if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/newsrec/', \ os.path.join(data_path, 'utils'), mind_utils) hparams = prepare_hparams(yaml_file, wordEmb_file=wordEmb_file, \ wordDict_file=wordDict_file, userDict_file=userDict_file, \ epochs=epochs, show_step=10) print("[NRMS] Config,", hparams) iterator = MINDIterator
data_path = os.path.abspath(os.path.join(os.getcwd(), "../../data/mind-demo-dkn")) data_path1 = os.path.abspath(os.path.join(os.getcwd(), "../../data")) print('data_path:', data_path) print('data_path1:', data_path1) yaml_file = os.path.join(data_path, r'dkn.yaml') train_file = os.path.join(data_path, r'train_mind_demo.txt') valid_file = os.path.join(data_path, r'valid_mind_demo.txt') test_file = os.path.join(data_path, r'test_mind_demo.txt') news_feature_file = os.path.join(data_path, r'doc_feature.txt') user_history_file = os.path.join(data_path, r'user_history.txt') wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy') entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy') contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy') if not os.path.exists(yaml_file): download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip') ## Create hyper-parameters epochs = 10 history_size = 50 batch_size = 100 hparams = prepare_hparams(yaml_file, news_feature_file = news_feature_file, user_history_file = user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=epochs, history_size=history_size, batch_size=batch_size)
print('There are in total {0} fields and {1} features.'.format( converter.field_count, converter.feature_count)) tmpdir = TemporaryDirectory() data_path = tmpdir.name yaml_file = os.path.join(data_path, YAML_FILE_NAME) train_file = os.path.join(data_path, TRAIN_FILE_NAME) valid_file = os.path.join(data_path, VALID_FILE_NAME) test_file = os.path.join(data_path, TEST_FILE_NAME) model_file = os.path.join(data_path, MODEL_FILE_NAME) output_file = os.path.join(data_path, OUTPUT_FILE_NAME) if not os.path.exists(yaml_file): download_deeprec_resources( r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip') # Training task ffm_model = xl.create_ffm() # Use field-aware factorization machine (ffm) ffm_model.setTrain(train_file) # Set the path of training dataset ffm_model.setValidate(valid_file) # Set the path of validation dataset # Parameters: # 0. task: binary classification # 1. learning rate: 0.2 # 2. regular lambda: 0.002 # 3. evaluation metric: auc # 4. number of epochs: 10 # 5. optimization method: sgd param = {