def test_DKN_iterator(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    data_file = os.path.join(data_path, r"train_mind_demo.txt")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
    contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    download_deeprec_resources(
        "https://recodatasets.z20.web.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file="",
        entityEmb_file="",
        contextEmb_file="",
    )
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res, impression, data_size in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)

    ###  test DKN item2item iterator
    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        is_clip_norm=True,
        max_grad_norm=0.5,
        his_size=20,
        MODEL_DIR=os.path.join(data_path, "save_models"),
        use_entity=True,
        use_context=True,
    )
    hparams.neg_num = 9

    iterator_item2item = DKNItem2itemTextIterator(hparams, tf.Graph())
    assert iterator_item2item is not None
    test_round = 3
    for res, impression, data_size in iterator_item2item.load_data_from_file(
        os.path.join(data_path, "doc_list.txt")
    ):
        assert isinstance(res, dict)
        test_round -= 1
        if test_round <= 0:
            break
def test_dkn_component_definition(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy")
    contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy")

    download_deeprec_resources(
        "https://recodatasets.z20.web.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None

    ###  test DKN's item2item version
    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        contextEmb_file=contextEmb_file,
        epochs=1,
        is_clip_norm=True,
        max_grad_norm=0.5,
        his_size=20,
        MODEL_DIR=os.path.join(data_path, "save_models"),
        use_entity=True,
        use_context=True,
    )
    hparams.neg_num = 9
    assert hparams is not None
    model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator)

    assert model_item2item.pred_logits is not None
    assert model_item2item.update is not None
    assert model_item2item.iterator is not None
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=5,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
Exemple #5
0
def test_model_lightgcn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(
        resource_path,
        "..",
        "..",
        "reco_utils",
        "recommender",
        "deeprec",
        "config",
        "lightgcn.yaml",
    )
    user_file = os.path.join(data_path, r"user_embeddings.csv")
    item_file = os.path.join(data_path, r"item_embeddings.csv")

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    hparams = prepare_hparams(yaml_file, epochs=1)
    model = LightGCN(hparams, data)

    assert model.run_eval() is not None
    model.fit()
    assert model.recommend_k_items(test) is not None
    model.infer_embedding(user_file, item_file)
    assert os.path.getsize(user_file) != 0
    assert os.path.getsize(item_file) != 0
Exemple #6
0
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'train_mind_demo.txt')
    valid_file = os.path.join(data_path, r'valid_mind_demo.txt')
    test_file = os.path.join(data_path, r'test_mind_demo.txt')
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
Exemple #8
0
def test_lightgcn_component_definition(resource_path):
    yaml_file = os.path.join(
        resource_path,
        "..",
        "..",
        "reco_utils",
        "recommender",
        "deeprec",
        "config",
        "lightgcn.yaml",
    )

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    embed_size = 64
    hparams = prepare_hparams(yaml_file, embed_size=embed_size)
    model = LightGCN(hparams, data)

    assert model.norm_adj is not None
    assert model.ua_embeddings.shape == [data.n_users, embed_size]
    assert model.ia_embeddings.shape == [data.n_items, embed_size]
    assert model.u_g_embeddings is not None
    assert model.pos_i_g_embeddings is not None
    assert model.neg_i_g_embeddings is not None
    assert model.batch_ratings is not None
    assert model.loss is not None
    assert model.opt is not None
def test_dkn_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    news_feature_file = os.path.join(data_path, r'doc_feature.txt')
    user_history_file = os.path.join(data_path, r'user_history.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
    contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')

    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(yaml_file,
                              news_feature_file=news_feature_file,
                              user_history_file=user_history_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              contextEmb_file=contextEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    assert hparams is not None
    model = DKN(hparams, DKNTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "dkn")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    train_file = os.path.join(data_path, "final_test_with_entity.txt")
    valid_file = os.path.join(data_path, "final_test_with_entity.txt")
    wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
    entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(
        yaml_file,
        wordEmb_file=wordEmb_file,
        entityEmb_file=entityEmb_file,
        epochs=1,
        learning_rate=0.0001,
    )
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert isinstance(model.fit(train_file, valid_file), BaseModel)
    assert model.run_eval(valid_file) is not None
def test_Sequential_Iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "slirec")
    yaml_file = os.path.join(
        resource_path,
        "..",
        "..",
        "reco_utils",
        "recommender",
        "deeprec",
        "config",
        "sli_rec.yaml",
    )
    train_file = os.path.join(data_path, r"train_data")

    if not os.path.exists(train_file):
        valid_file = os.path.join(data_path, r"valid_data")
        test_file = os.path.join(data_path, r"test_data")
        user_vocab = os.path.join(data_path, r"user_vocab.pkl")
        item_vocab = os.path.join(data_path, r"item_vocab.pkl")
        cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

        reviews_name = "reviews_Movies_and_TV_5.json"
        meta_name = "meta_Movies_and_TV.json"
        reviews_file = os.path.join(data_path, reviews_name)
        meta_file = os.path.join(data_path, meta_name)
        valid_num_ngs = (
            4  # number of negative instances with a positive instance for validation
        )
        test_num_ngs = (
            9  # number of negative instances with a positive instance for testing
        )
        sample_rate = (
            0.01  # sample a small item set for training and testing here for example
        )

        input_files = [
            reviews_file,
            meta_file,
            train_file,
            valid_file,
            test_file,
            user_vocab,
            item_vocab,
            cate_vocab,
        ]
        download_and_extract(reviews_name, reviews_file)
        download_and_extract(meta_name, meta_file)
        data_preprocessing(
            *input_files,
            sample_rate=sample_rate,
            valid_num_ngs=valid_num_ngs,
            test_num_ngs=test_num_ngs
        )

    hparams = prepare_hparams(yaml_file)
    iterator = SequentialIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(train_file):
        assert isinstance(res, dict)
def test_model_sum(deeprec_resource_path, deeprec_config_path):
    data_path = os.path.join(deeprec_resource_path, "slirec")
    yaml_file = os.path.join(deeprec_config_path, "sum.yaml")
    train_file = os.path.join(data_path, r"train_data")
    valid_file = os.path.join(data_path, r"valid_data")
    test_file = os.path.join(data_path, r"test_data")
    output_file = os.path.join(data_path, "output.txt")
    train_num_ngs = (
        4  # number of negative instances with a positive instance for training
    )
    valid_num_ngs = (
        4  # number of negative instances with a positive instance for validation
    )
    test_num_ngs = (
        9  # number of negative instances with a positive instance for testing
    )

    if not os.path.exists(train_file):
        user_vocab = os.path.join(data_path, r"user_vocab.pkl")
        item_vocab = os.path.join(data_path, r"item_vocab.pkl")
        cate_vocab = os.path.join(data_path, r"category_vocab.pkl")
        reviews_name = "reviews_Movies_and_TV_5.json"
        meta_name = "meta_Movies_and_TV.json"
        reviews_file = os.path.join(data_path, reviews_name)
        meta_file = os.path.join(data_path, meta_name)
        sample_rate = (
            0.005  # sample a small item set for training and testing here for example
        )

        input_files = [
            reviews_file,
            meta_file,
            train_file,
            valid_file,
            test_file,
            user_vocab,
            item_vocab,
            cate_vocab,
        ]
        download_and_extract(reviews_name, reviews_file)
        download_and_extract(meta_name, meta_file)
        data_preprocessing(*input_files,
                           sample_rate=sample_rate,
                           valid_num_ngs=valid_num_ngs,
                           test_num_ngs=test_num_ngs)

    hparams = prepare_hparams(yaml_file,
                              learning_rate=0.01,
                              epochs=1,
                              train_num_ngs=train_num_ngs)
    assert hparams is not None

    input_creator = SequentialIterator
    model = SUMModel(hparams, input_creator)
    assert model.run_eval(valid_file, num_ngs=valid_num_ngs) is not None
    assert isinstance(
        model.fit(train_file, valid_file, valid_num_ngs=valid_num_ngs),
        BaseModel)
    assert model.predict(valid_file, output_file) is not None
def test_prepare_hparams(must_exist_attributes, resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )
    hparams = prepare_hparams(yaml_file)
    assert hasattr(hparams, must_exist_attributes)
def test_prepare_hparams(deeprec_resource_path, must_exist_attributes):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )
    hparams = prepare_hparams(yaml_file)
    assert hasattr(hparams, must_exist_attributes)
 def train(self, dataset: RecommendationDataset) -> None:
     hparams = prepare_hparams(
         "./recommenders/reco_utils/recommender/deeprec/config/lightgcn.yaml",
         n_layers=3,
         batch_size=1024,
         epochs=self.epochs,
         learning_rate=0.005,
         top_k=self.top_size,
     )
     self.model = LightGCN(hparams, self._wrap_dataset(dataset), seed=42)
     self.model.fit()
Exemple #16
0
def test_DKN_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    data_file = os.path.join(data_path, "final_test_with_entity.txt")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "dknresources.zip",
        )

    hparams = prepare_hparams(yaml_file, wordEmb_file="", entityEmb_file="")
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
def test_FFM_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    iterator = FFMTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
def test_xdeepfm_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    model = XDeepFMModel(hparams, FFMTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_xdeepfm_component_definition(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    model = XDeepFMModel(hparams, FFMTextIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None
def test_FFM_iterator(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file)
    iterator = FFMTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res in iterator.load_data_from_file(data_file):
        assert isinstance(res, tuple)
def test_model_xdeepfm(resource_path):
    data_path = os.path.join(resource_path, '../resources/deeprec/xdeepfm')
    yaml_file = os.path.join(data_path, r'xDeepFM.yaml')
    data_file = os.path.join(data_path, r'sample_FFM_data.txt')
    output_file = os.path.join(data_path, r'output.txt')

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'xdeepfmresources.zip')

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(deeprec_resource_path):
    data_path = os.path.join(deeprec_resource_path, "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")
    output_file = os.path.join(data_path, "output.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.z20.web.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
def test_model_xdeepfm(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "xdeepfm")
    yaml_file = os.path.join(data_path, "xDeepFM.yaml")
    data_file = os.path.join(data_path, "sample_FFM_data.txt")
    output_file = os.path.join(data_path, "output.txt")

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            "https://recodatasets.blob.core.windows.net/deeprec/",
            data_path,
            "xdeepfmresources.zip",
        )

    hparams = prepare_hparams(yaml_file, learning_rate=0.01)
    assert hparams is not None

    input_creator = FFMTextIterator
    model = XDeepFMModel(hparams, input_creator)

    assert model.run_eval(data_file) is not None
    assert isinstance(model.fit(data_file, data_file), BaseModel)
    assert model.predict(data_file, output_file) is not None
def test_model_dkn(resource_path):
    data_path = os.path.join(resource_path, '../resources/deeprec/dkn')
    yaml_file = os.path.join(data_path, r'dkn.yaml')
    train_file = os.path.join(data_path, r'final_test_with_entity.txt')
    valid_file = os.path.join(data_path, r'final_test_with_entity.txt')
    wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
    entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')

    if not os.path.exists(yaml_file):
        download_deeprec_resources(
            r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
            'dknresources.zip')

    hparams = prepare_hparams(yaml_file,
                              wordEmb_file=wordEmb_file,
                              entityEmb_file=entityEmb_file,
                              epochs=1,
                              learning_rate=0.0001)
    input_creator = DKNTextIterator
    model = DKN(hparams, input_creator)

    assert (isinstance(model.fit(train_file, valid_file), BaseModel))
    assert model.run_eval(valid_file) is not None
def test_DKN_iterator(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec", "dkn")
    data_file = os.path.join(data_path, r"train_mind_demo.txt")
    news_feature_file = os.path.join(data_path, r"doc_feature.txt")
    user_history_file = os.path.join(data_path, r"user_history.txt")
    yaml_file = os.path.join(data_path, "dkn.yaml")
    download_deeprec_resources(
        "https://recodatasets.blob.core.windows.net/deeprec/",
        data_path,
        "mind-demo.zip",
    )

    hparams = prepare_hparams(
        yaml_file,
        news_feature_file=news_feature_file,
        user_history_file=user_history_file,
        wordEmb_file="",
        entityEmb_file="",
        contextEmb_file="",
    )
    iterator = DKNTextIterator(hparams, tf.Graph())
    assert iterator is not None
    for res, impression, data_size in iterator.load_data_from_file(data_file):
        assert isinstance(res, dict)
Exemple #26
0
#     news_entities,
#     train_entities,
#     valid_entities,
#     max_sentence=10,
#     word_embedding_dim=100,
# )

news_feature_file = os.path.join(data_path, 'doc_feature.txt')
word_embeddings_file = os.path.join(data_path, 'word_embeddings_5w_100.npy')
user_history_file = os.path.join(data_path, 'user_history.txt')
entity_embeddings_file = os.path.join(data_path,
                                      'entity_embeddings_5w_100.npy')
yaml_file = os.path.join(data_path, 'dkn_MINDlarge.yaml')
# yaml_file = maybe_download(url="https://recodatasets.blob.core.windows.net/deeprec/deeprec/dkn/dkn_MINDsmall.yaml",
#                            work_directory=data_path)
hparams = prepare_hparams(yaml_file,
                          news_feature_file=news_feature_file,
                          user_history_file=user_history_file,
                          wordEmb_file=word_embeddings_file,
                          entityEmb_file=entity_embeddings_file,
                          epochs=epochs,
                          history_size=history_size,
                          MODEL_DIR=os.path.join(data_path, 'save_models'),
                          batch_size=batch_size)
model = DKN(hparams, DKNTextIterator)

model.fit(train_file, valid_file)

res = model.run_eval(valid_file)
print(res)
Exemple #27
0
def test_slirec_component_definition(resource_path):
    data_path = os.path.join(resource_path, "..", "resources", "deeprec",
                             "slirec")
    yaml_file = os.path.join(
        resource_path,
        "..",
        "..",
        "reco_utils",
        "recommender",
        "deeprec",
        "config",
        "sli_rec.yaml",
    )
    yaml_file_nextitnet = os.path.join(
        resource_path,
        "..",
        "..",
        "reco_utils",
        "recommender",
        "deeprec",
        "config",
        "nextitnet.yaml",
    )
    train_file = os.path.join(data_path, r"train_data")

    if not os.path.exists(train_file):
        train_file = os.path.join(data_path, r"train_data")
        valid_file = os.path.join(data_path, r"valid_data")
        test_file = os.path.join(data_path, r"test_data")
        user_vocab = os.path.join(data_path, r"user_vocab.pkl")
        item_vocab = os.path.join(data_path, r"item_vocab.pkl")
        cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

        reviews_name = "reviews_Movies_and_TV_5.json"
        meta_name = "meta_Movies_and_TV.json"
        reviews_file = os.path.join(data_path, reviews_name)
        meta_file = os.path.join(data_path, meta_name)
        valid_num_ngs = (
            4  # number of negative instances with a positive instance for validation
        )
        test_num_ngs = (
            9  # number of negative instances with a positive instance for testing
        )
        sample_rate = (
            0.01  # sample a small item set for training and testing here for example
        )

        input_files = [
            reviews_file,
            meta_file,
            train_file,
            valid_file,
            test_file,
            user_vocab,
            item_vocab,
            cate_vocab,
        ]
        download_and_extract(reviews_name, reviews_file)
        download_and_extract(meta_name, meta_file)
        data_preprocessing(*input_files,
                           sample_rate=sample_rate,
                           valid_num_ngs=valid_num_ngs,
                           test_num_ngs=test_num_ngs)

    hparams = prepare_hparams(
        yaml_file, train_num_ngs=4
    )  # confirm the train_num_ngs when initializing a SLi_Rec model.
    model = SLI_RECModel(hparams, SequentialIterator)
    # nextitnet model
    hparams_nextitnet = prepare_hparams(yaml_file_nextitnet, train_num_ngs=4)
    model_nextitnet = NextItNetModel(hparams_nextitnet, NextItNetIterator)

    assert model.logit is not None
    assert model.update is not None
    assert model.iterator is not None

    assert model_nextitnet.logit is not None
    assert model_nextitnet.update is not None
    assert model_nextitnet.iterator is not None
Exemple #28
0
wordEmb_file = os.path.join(data_path, r'word_embeddings_100.npy')
entityEmb_file = os.path.join(data_path, r'TransE_entity2vec_100.npy')
contextEmb_file = os.path.join(data_path, r'TransE_context2vec_100.npy')
if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path1, 'mind-demo-dkn.zip')


## Create hyper-parameters
epochs = 10
history_size = 50
batch_size = 100
hparams = prepare_hparams(yaml_file,
                          news_feature_file = news_feature_file,
                          user_history_file = user_history_file,
                          wordEmb_file=wordEmb_file,
                          entityEmb_file=entityEmb_file,
                          contextEmb_file=contextEmb_file,
                          epochs=epochs,
                          history_size=history_size,
                          batch_size=batch_size)
print(hparams)


## Train the DKN model
model = DKN(hparams, DKNTextIterator)
print(model.run_eval(valid_file))
model.fit(train_file, valid_file)


## Evaluate the DKN model
res = model.run_eval(test_file)
yaml_file = os.path.join(data_path, r'xDeepFM.yaml')
train_file = os.path.join(data_path, r'synthetic_part_0')
valid_file = os.path.join(data_path, r'synthetic_part_1')
test_file = os.path.join(data_path, r'synthetic_part_2')
output_file = os.path.join(data_path, r'output.txt')

if not os.path.exists(yaml_file):
    download_deeprec_resources(r'https://recodatasets.blob.core.windows.net/deeprec/', data_path, 'xdeepfmresources.zip')

print("Data gathering complete")

# 1. prepare hyper-parameters
hparams = prepare_hparams(yaml_file, 
                          FEATURE_COUNT=1000, 
                          FIELD_COUNT=10, 
                          cross_l2=0.0001, 
                          embed_l2=0.0001, 
                          learning_rate=0.001, 
                          epochs=EPOCHS,
                          batch_size=BATCH_SIZE)
print("Hyper-parameters: ")
print(hparams)
# 2. create data loader
# designate a data iterator for xDeepFM model (FFMTextIterator)
input_creator = FFMTextIterator

# 3. create model
model = XDeepFMModel(hparams, input_creator, seed=RANDOM_SEED)
# we can also load a pre-trained model with model.load_model(r'model_path')

# untrained model's performance
print("Untrained model's performance: {}".format(model.run_eval(test_file)))
test_file = os.path.join(data_path, r'criteo_tiny_test')

if not os.path.exists(yaml_file):
    download_deeprec_resources(
        r'https://recodatasets.blob.core.windows.net/deeprec/', data_path,
        'xdeepfmresources.zip')

# set hyper-parameters
hparams = prepare_hparams(yaml_file,
                          FEATURE_COUNT=2300000,
                          FIELD_COUNT=39,
                          cross_l2=0.01,
                          embed_l2=0.01,
                          layer_l2=0.01,
                          learning_rate=0.002,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          cross_layer_sizes=[20, 10],
                          init_value=0.1,
                          layer_sizes=[20, 20],
                          use_Linear_part=True,
                          use_CIN_part=True,
                          use_DNN_part=True)

# make model
model = XDeepFMModel(hparams, FFMTextIterator, seed=RANDOM_SEED)

# train model
model.fit(train_file, valid_file)
# profiling
model.train_timeliner.save('xDeepFM-timeliner.json')
Exemple #31
0
def train_lightgcn(params, data):
    hparams = prepare_hparams(**params)
    model = LightGCN(hparams, data)
    with Timer() as t:
        model.fit()
    return model, t