Example #1
0
def test_load_pandas_df(
    size,
    num_samples,
    num_movies,
    movie_example,
    title_example,
    genres_example,
    year_example,
    tmp,
):
    """Test MovieLens dataset load as pd.DataFrame"""
    # Test if correct data are loaded
    header = ["a", "b", "c"]
    df = load_pandas_df(size=size, local_cache_path=tmp, header=header)
    assert len(df) == num_samples
    assert len(df.columns) == len(header)
    # Test if raw-zip file, rating file, and item file are cached
    assert len(os.listdir(tmp)) == 3

    # Test title, genres, and released year load
    header = ["a", "b", "c", "d", "e"]
    with pytest.warns(Warning):
        df = load_pandas_df(
            size=size,
            header=header,
            local_cache_path=tmp,
            title_col="Title",
            genres_col="Genres",
            year_col="Year",
        )
        assert len(df) == num_samples
        assert (
            len(df.columns) == 7
        )  # 4 header columns (user, item, rating, timestamp) and 3 feature columns
        assert "e" not in df.columns  # only the first 4 header columns are used
        # Get two records of the same items and check if the item-features are the same.
        head = df.loc[df["b"] == movie_example][:2]
        title = head["Title"].values
        assert title[0] == title[1]
        assert title[0] == title_example
        genres = head["Genres"].values
        assert genres[0] == genres[1]
        assert genres[0] == genres_example
        year = head["Year"].values
        assert year[0] == year[1]
        assert year[0] == year_example

    # Test default arguments
    df = load_pandas_df(size)
    assert len(df) == num_samples
    # user, item, rating and timestamp
    assert len(df.columns) == 4
def test_load_pandas_df_mock_100__with_custom_param__succeed():
    df = load_pandas_df(
        "mock100", title_col=DEFAULT_TITLE_COL, genres_col=DEFAULT_GENRE_COL
    )
    assert type(df[DEFAULT_TITLE_COL]) == Series
    assert type(df[DEFAULT_GENRE_COL]) == Series
    assert len(df) == 100
    assert "|" in df.loc[0, DEFAULT_GENRE_COL]
    assert df.loc[0, DEFAULT_TITLE_COL] == "foo"
def test_model_lightgcn(deeprec_resource_path, deeprec_config_path):
    data_path = os.path.join(deeprec_resource_path, "dkn")
    yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")
    user_file = os.path.join(data_path, r"user_embeddings.csv")
    item_file = os.path.join(data_path, r"item_embeddings.csv")

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    hparams = prepare_hparams(yaml_file, epochs=1)
    model = LightGCN(hparams, data)

    assert model.run_eval() is not None
    model.fit()
    assert model.recommend_k_items(test) is not None
    model.infer_embedding(user_file, item_file)
    assert os.path.getsize(user_file) != 0
    assert os.path.getsize(item_file) != 0
def test_lightgcn_component_definition(deeprec_config_path):
    yaml_file = os.path.join(deeprec_config_path, "lightgcn.yaml")

    df = movielens.load_pandas_df(size="100k")
    train, test = python_stratified_split(df, ratio=0.75)

    data = ImplicitCF(train=train, test=test)

    embed_size = 64
    hparams = prepare_hparams(yaml_file, embed_size=embed_size)
    model = LightGCN(hparams, data)

    assert model.norm_adj is not None
    assert model.ua_embeddings.shape == [data.n_users, embed_size]
    assert model.ia_embeddings.shape == [data.n_items, embed_size]
    assert model.u_g_embeddings is not None
    assert model.pos_i_g_embeddings is not None
    assert model.neg_i_g_embeddings is not None
    assert model.batch_ratings is not None
    assert model.loss is not None
    assert model.opt is not None
def test_load_pandas_df_mock_100__with_default_param__succeed():
    df = load_pandas_df("mock100")
    assert type(df) == pandas.DataFrame
    assert len(df) == 100
    assert not df[[DEFAULT_USER_COL, DEFAULT_ITEM_COL]].duplicated().any()