Example #1
0
def test_he_sampling_correctness(sample_dataframes):
    df, _, _ = sample_dataframes

    df = df.drop_duplicates(subset=["user", "item"])

    encoder = DatasetEncoder()
    encoder.fit(df["user"], df["item"])

    train, test = split(df, n_test=1)

    dataset = Dataset.from_df(df, encoder=encoder, normalize=lambda _: ones_like(_))

    train_dataset = Dataset.from_df(
        train, encoder=encoder, normalize=lambda _: ones_like(_)
    )
    test_dataset = Dataset.from_df(
        test, encoder=encoder, normalize=lambda _: ones_like(_)
    )

    users, items = he_sampling(test_dataset, train_dataset)

    a, b, _ = dataset.to_components()
    all_users, all_items = groupby(a, b)

    for i in range(len(items)):
        # items includes no more than one element from 'all_items'.
        assert len(intersect1d(items[i], all_items[i])) == 1
Example #2
0
def test_fit_transform_user_item_succeeds(sample_dataset):
    d = DatasetEncoder()
    r = d.fit_transform(users=sample_dataset[:, 0], items=sample_dataset[:, 1])

    # check overall shape of responses matches
    assert r["users"].shape == sample_dataset[:, 0].shape
    assert r["items"].shape == sample_dataset[:, 1].shape

    # check number of unique elements matches.
    assert np.unique(r["users"]).shape == np.unique(sample_dataset[:, 0]).shape
    assert np.unique(r["items"]).shape == np.unique(sample_dataset[:, 1]).shape
Example #3
0
def test_partial_fit_item_user_succeeds(sample_dataset):
    users, items = sample_dataset[:, 0], sample_dataset[:, 1]
    n = int(users.shape[0] / 2)

    a_users, a_items = users[:n], items[:n]
    b_users, b_items = users[n:], items[n:]

    d = DatasetEncoder()
    d.partial_fit(users=a_users, items=a_items)
    d.partial_fit(users=b_users, items=b_items)

    r = d.transform(users=users, items=items)

    assert r["users"].shape == users.shape
    assert r["items"].shape == items.shape

    assert np.unique(r["users"]).shape == np.unique(users).shape
    assert np.unique(r["items"]).shape == np.unique(items).shape
Example #4
0
def test_shared_encoder_interactions_shapes(sample_dataframes):
    """
    Datasets using the same DatasetEncoder produce interactions matrices of the
    same shape.
    """
    interactions, _, _ = sample_dataframes

    encoder = DatasetEncoder()
    encoder.fit(interactions["user"], interactions["item"])

    train_interactions = interactions.iloc[:int(len(interactions) / 2)]
    test_interactions = interactions.iloc[:int(len(interactions) / 2)]

    train_dataset = Dataset.from_df(train_interactions, encoder=encoder)
    test_dataset = Dataset.from_df(test_interactions, encoder=encoder)
    dataset = Dataset.from_df(interactions)

    assert dataset.interactions.shape == train_dataset.interactions.shape
    assert dataset.interactions.shape == test_dataset.interactions.shape
Example #5
0
def test_to_df_fails_with_mismatched_inputs(sample_dataset):
    users, items = sample_dataset[:, 0], sample_dataset[:, 1]
    d = DatasetEncoder()
    d.fit(users=users, items=items)
    encoded = d.transform(users=users, items=items)
    recommended = [
        np.random.choice(encoded["items"], 10, replace=False)
        for _ in range(int(len(users) / 2))
    ]

    with pytest.raises(ValueError):
        d.to_df(users, recommended)
Example #6
0
def test_transform_fails_for_unknown_elements(sample_dataset):
    users, items = sample_dataset[:, 0], sample_dataset[:, 1]
    n = int(users.shape[0] / 4)

    with pytest.raises(KeyError):
        a_users, a_items = users[:n], items[:n]
        b_users, b_items = users[n:], items[n:]

        d = DatasetEncoder()
        d.fit(users=a_users, items=a_items)

        d.transform(users=b_users, items=b_items)
Example #7
0
def test_fit_reversible_user_item_transform(sample_dataset):
    users, items = sample_dataset[:, 0], sample_dataset[:, 1]

    d = DatasetEncoder()
    d.fit(users=users, items=items)

    r = d.inverse_transform(**d.transform(users=users, items=items))

    assert r["users"].shape == users.shape
    assert r["items"].shape == items.shape

    assert np.all(r["users"] == users)
    assert np.all(r["items"] == items)
Example #8
0
def test_to_df_succeeds(sample_dataset):
    users, items = sample_dataset[:, 0], sample_dataset[:, 1]

    d = DatasetEncoder()
    d.fit(users=users, items=items)
    encoded = d.transform(users=users, items=items)
    recommended = [
        np.random.choice(encoded["items"], 10, replace=False) for _ in users
    ]

    rdf = d.to_df(encoded["users"], recommended, target_col="user")

    assert np.all(rdf["user"].values == users)

    for i, row in enumerate(rdf.values[:, 1:]):
        assert np.all(row == d.inverse_transform(
            items=recommended[i])["items"])
Example #9
0
def test_inverse_fit_transform_using_metadata(sample_metadata_dataset):
    users = sample_metadata_dataset[:, 0]
    items = sample_metadata_dataset[:, 1]
    user_meta = sample_metadata_dataset[:, 2]
    item_meta = sample_metadata_dataset[:, 3]

    d = DatasetEncoder()
    d.fit(users=users, items=items, user_tags=user_meta, item_tags=item_meta)

    r = d.inverse_transform(**d.transform(
        users=users, items=items, user_tags=user_meta, item_tags=item_meta))

    assert r["users"].shape == users.shape
    assert r["items"].shape == items.shape
    assert r["user_tags"].shape == user_meta.shape
    assert r["item_tags"].shape == item_meta.shape

    assert np.all(r["users"] == users)
    assert np.all(r["items"] == items)
    assert np.all(r["user_tags"] == user_meta)
    assert np.all(r["item_tags"] == item_meta)
Example #10
0
from xanthus.models import GeneralizedMatrixFactorizationModel as GMFModel
from xanthus.datasets import Dataset, DatasetEncoder, utils
from xanthus.evaluate import leave_one_out, score, metrics, he_sampling

ratings = pd.read_csv("data/movielens-100k/ratings.csv")
movies = pd.read_csv("data/movielens-100k/movies.csv")
title_mapping = dict(zip(movies["movieId"], movies["title"]))

ratings = ratings.rename(columns={"userId": "user", "movieId": "item"})
ratings.loc[:, "item"] = ratings["item"].apply(lambda _: title_mapping[_])

ratings = ratings[ratings["rating"] > 3.0]

train_df, test_df = leave_one_out(ratings)

encoder = DatasetEncoder()
encoder.fit(ratings["user"], ratings["item"])

train_ds = Dataset.from_df(train_df,
                           normalize=utils.as_implicit,
                           encoder=encoder)
test_ds = Dataset.from_df(test_df,
                          normalize=utils.as_implicit,
                          encoder=encoder)

model = GMFModel(fit_params=dict(epochs=10, batch_size=256),
                 n_factors=32,
                 negative_samples=4)

model.fit(train_ds)
Example #11
0
def test_fit_user_item_succeeds(sample_dataset):
    d = DatasetEncoder()
    assert d.fit(users=sample_dataset[:, 0], items=sample_dataset[:, 1]) == d