def test_DKN_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") data_file = os.path.join(data_path, r"train_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy") yaml_file = os.path.join(data_path, "dkn.yaml") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file="", entityEmb_file="", contextEmb_file="", ) iterator = DKNTextIterator(hparams, tf.Graph()) assert iterator is not None for res, impression, data_size in iterator.load_data_from_file(data_file): assert isinstance(res, dict) # test DKN item2item iterator hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) hparams.neg_num = 9 iterator_item2item = DKNItem2itemTextIterator(hparams, tf.Graph()) assert iterator_item2item is not None test_round = 3 for res, impression, data_size in iterator_item2item.load_data_from_file( os.path.join(data_path, "doc_list.txt")): assert isinstance(res, dict) test_round -= 1 if test_round <= 0: break
def test_model_dkn(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(data_path, r"dkn.yaml") train_file = os.path.join(data_path, r"train_mind_demo.txt") valid_file = os.path.join(data_path, r"valid_mind_demo.txt") test_file = os.path.join(data_path, r"test_mind_demo.txt") news_feature_file = os.path.join(data_path, r"doc_feature.txt") user_history_file = os.path.join(data_path, r"user_history.txt") wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy") entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy") contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy") download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "mind-demo.zip", ) hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001, ) input_creator = DKNTextIterator model = DKN(hparams, input_creator) assert isinstance(model.fit(train_file, valid_file), BaseModel) assert model.run_eval(valid_file) is not None
def test_model_sum(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "slirec") yaml_file = os.path.join(deeprec_config_path, "sum.yaml") train_file = os.path.join(data_path, r"train_data") valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") output_file = os.path.join(data_path, "output.txt") train_num_ngs = ( 4 # number of negative instances with a positive instance for training ) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) if not os.path.exists(train_file): user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) sample_rate = ( 0.005 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing( *input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs ) hparams = prepare_hparams( yaml_file, learning_rate=0.01, epochs=1, train_num_ngs=train_num_ngs ) assert hparams is not None input_creator = SequentialIterator model = SUMModel(hparams, input_creator) assert model.run_eval(valid_file, num_ngs=valid_num_ngs) is not None assert isinstance( model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs), BaseModel ) assert model.predict(valid_file, output_file) is not None
def test_dkn_component_definition(dkn_files): # Load params from fixture ( _, yaml_file, news_feature_file, user_history_file, wordEmb_file, entityEmb_file, contextEmb_file, ) = dkn_files # Test DKN model hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, user_history_file=user_history_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, learning_rate=0.0001, ) assert hparams is not None model = DKN(hparams, DKNTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_sum_component_definition(sequential_files, deeprec_config_path): yaml_file_sum = os.path.join(deeprec_config_path, "sum.yaml") data_path, user_vocab, item_vocab, cate_vocab = sequential_files # SUM model hparams_sum = prepare_hparams( yaml_file_sum, train_num_ngs=4, embed_l2=0.0, layer_l2=0.0, learning_rate=0.001, epochs=1, MODEL_DIR=os.path.join(data_path, "model"), SUMMARIES_DIR=os.path.join(data_path, "summary"), user_vocab=user_vocab, item_vocab=item_vocab, cate_vocab=cate_vocab, need_sample=True, ) assert hparams_sum is not None model_sum = SUMModel(hparams_sum, SequentialIterator) assert model_sum.logit is not None assert model_sum.update is not None assert model_sum.iterator is not None
def test_dkn_item2item_component_definition(dkn_files): # Load params from fixture ( data_path, yaml_file, news_feature_file, _, wordEmb_file, entityEmb_file, contextEmb_file, ) = dkn_files # Test DKN's item2item version hparams = prepare_hparams( yaml_file, news_feature_file=news_feature_file, wordEmb_file=wordEmb_file, entityEmb_file=entityEmb_file, contextEmb_file=contextEmb_file, epochs=1, is_clip_norm=True, max_grad_norm=0.5, his_size=20, MODEL_DIR=os.path.join(data_path, "save_models"), use_entity=True, use_context=True, ) assert hparams is not None hparams.neg_num = 9 model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator) assert model_item2item.pred_logits is not None assert model_item2item.update is not None assert model_item2item.iterator is not None
def test_prepare_hparams(deeprec_resource_path, must_exist_attributes): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) assert hasattr(hparams, must_exist_attributes)
def test_Sequential_Iterator(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "slirec") yaml_file = os.path.join(deeprec_config_path, "sli_rec.yaml") train_file = os.path.join(data_path, r"train_data") if not os.path.exists(train_file): valid_file = os.path.join(data_path, r"valid_data") test_file = os.path.join(data_path, r"test_data") user_vocab = os.path.join(data_path, r"user_vocab.pkl") item_vocab = os.path.join(data_path, r"item_vocab.pkl") cate_vocab = os.path.join(data_path, r"category_vocab.pkl") reviews_name = "reviews_Movies_and_TV_5.json" meta_name = "meta_Movies_and_TV.json" reviews_file = os.path.join(data_path, reviews_name) meta_file = os.path.join(data_path, meta_name) valid_num_ngs = ( 4 # number of negative instances with a positive instance for validation ) test_num_ngs = ( 9 # number of negative instances with a positive instance for testing ) sample_rate = ( 0.01 # sample a small item set for training and testing here for example ) input_files = [ reviews_file, meta_file, train_file, valid_file, test_file, user_vocab, item_vocab, cate_vocab, ] download_and_extract(reviews_name, reviews_file) download_and_extract(meta_name, meta_file) data_preprocessing(*input_files, sample_rate=sample_rate, valid_num_ngs=valid_num_ngs, test_num_ngs=test_num_ngs) hparams = prepare_hparams(yaml_file) iterator = SequentialIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(train_file): assert isinstance(res, dict)
def test_FFM_iterator(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) iterator = FFMTextIterator(hparams, tf.Graph()) assert iterator is not None for res in iterator.load_data_from_file(data_file): assert isinstance(res, tuple)
def test_xdeepfm_component_definition(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file) model = XDeepFMModel(hparams, FFMTextIterator) assert model.logit is not None assert model.update is not None assert model.iterator is not None
def test_model_lightgcn(deeprec_resource_path, deeprec_config_path): data_path = os.path.join(deeprec_resource_path, "dkn") yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml") user_file = os.path.join(data_path, r"user_embeddings.csv") item_file = os.path.join(data_path, r"item_embeddings.csv") df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) hparams = prepare_hparams(yaml_file, epochs=1) model = LightGCN(hparams, data) assert model.run_eval() is not None model.fit() assert model.recommend_k_items(test) is not None model.infer_embedding(user_file, item_file) assert os.path.getsize(user_file) != 0 assert os.path.getsize(item_file) != 0
def test_lightgcn_component_definition(deeprec_config_path): yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml") df = movielens.load_pandas_df(size="100k") train, test = python_stratified_split(df, ratio=0.75) data = ImplicitCF(train=train, test=test) embed_size = 64 hparams = prepare_hparams(yaml_file, embed_size=embed_size) model = LightGCN(hparams, data) assert model.norm_adj is not None assert model.ua_embeddings.shape == [data.n_users, embed_size] assert model.ia_embeddings.shape == [data.n_items, embed_size] assert model.u_g_embeddings is not None assert model.pos_i_g_embeddings is not None assert model.neg_i_g_embeddings is not None assert model.batch_ratings is not None assert model.loss is not None assert model.opt is not None
def test_model_xdeepfm(deeprec_resource_path): data_path = os.path.join(deeprec_resource_path, "xdeepfm") yaml_file = os.path.join(data_path, "xDeepFM.yaml") data_file = os.path.join(data_path, "sample_FFM_data.txt") output_file = os.path.join(data_path, "output.txt") if not os.path.exists(yaml_file): download_deeprec_resources( "https://recodatasets.z20.web.core.windows.net/deeprec/", data_path, "xdeepfmresources.zip", ) hparams = prepare_hparams(yaml_file, learning_rate=0.01) assert hparams is not None input_creator = FFMTextIterator model = XDeepFMModel(hparams, input_creator) assert model.run_eval(data_file) is not None assert isinstance(model.fit(data_file, data_file), BaseModel) assert model.predict(data_file, output_file) is not None
def train_lightgcn(params, data): hparams = prepare_hparams(**params) model = LightGCN(hparams, data) with Timer() as t: model.fit() return model, t