예제 #1
0
def test_batch_predict_with_items():
    no_components = 2
    ds = RandomDataset(density=1.0)

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    model.batch_setup(item_chunks={0: ds.item_ids},
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    n_items = 10
    item_ids = np.random.choice(ds.item_ids, n_items)

    for uid in range(ds.no_users):

        original_scores = model.predict(
            np.repeat(uid, n_items),
            item_ids=item_ids,
            user_features=ds.user_features,
            item_features=ds.item_features,
        )

        # Check scores
        _, batch_predicted_scores = model.predict_for_user(user_id=uid,
                                                           item_ids=item_ids,
                                                           top_k=0)
        assert_array_almost_equal(original_scores, batch_predicted_scores)

        # Check ids
        original_ids = item_ids[np.argsort(-original_scores)[:5]]
        batch_ids, _ = model.predict_for_user(user_id=uid,
                                              item_ids=item_ids,
                                              top_k=5)
        assert_array_equal(original_ids, batch_ids)
예제 #2
0
def test_predict_for_user_with_items():
    no_components = 2
    ds = RandomDataset(no_items=5, no_users=2, density=1.)
    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    inference._batch_cleanup()

    with pytest.raises(EnvironmentError):
        model.predict_for_user(user_id=0, top_k=2, item_ids=np.arange(2))

    model.batch_setup(
        item_chunks={0: ds.item_ids},
        user_features=ds.user_features,
        item_features=ds.item_features,
    )

    for user_id in range(ds.no_users):
        scores = model.predict_for_user(
            user_id=user_id,
            top_k=2,
            item_ids=np.arange(2),
        )
        assert len(scores) == 2
예제 #3
0
def test_matrix_types():
    no_users, no_items = 10, 100
    no_features = 20

    for mattype in mattypes:
        for dtype in dtypes:
            train = mattype((no_users, no_items), dtype=dtype)

            user_features = mattype((no_users, no_features), dtype=dtype)
            item_features = mattype((no_items, no_features), dtype=dtype)

            model = LightFM()
            model.fit_partial(train,
                              user_features=user_features,
                              item_features=item_features)

            model.predict(
                np.random.randint(0, no_users, 10).astype(np.int32),
                np.random.randint(0, no_items, 10).astype(np.int32),
                user_features=user_features,
                item_features=item_features,
            )

            model.predict_rank(train,
                               user_features=user_features,
                               item_features=item_features)
예제 #4
0
def test_predict():
    no_users, no_items = 10, 100

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    model = LightFM()
    model.fit_partial(train)

    for uid in range(no_users):
        scores_arr = model.predict(np.repeat(uid, no_items),
                                   np.arange(no_items))
        scores_int = model.predict(uid, np.arange(no_items))
        assert np.allclose(scores_arr, scores_int)
예제 #5
0
def test_not_enough_features_fails():
    no_users, no_items = 10, 100
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users - 1, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items - 1, no_features), dtype=np.int32)
    model = LightFM()
    with pytest.raises(Exception):
        model.fit_partial(train,
                          user_features=user_features,
                          item_features=item_features)
예제 #6
0
def test_batch_predict_user_recs_per_user_wo_features():
    no_components = 2
    ds = RandomDataset()

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train)

    for uid in range(ds.no_users):
        rec_item_ids, rec_scores = model.predict_for_user(
            user_id=uid,
            top_k=5,
            item_ids=ds.item_ids,
        )
        assert len(rec_scores) == 5
        assert_array_almost_equal(rec_scores, -1 * np.sort(-1 * rec_scores))
예제 #7
0
def test_return_self():
    no_users, no_items = 10, 100

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)
    model = LightFM()
    assert model.fit_partial(train) is model
    assert model.fit(train) is model
예제 #8
0
def test_batch_predict():
    no_components = 2
    ds = RandomDataset(density=1.0)

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)

    model.batch_setup(
        item_chunks={0: ds.item_ids},
        user_features=ds.user_features,
        item_features=ds.item_features,
    )
    user_repr = inference._user_repr
    item_repr = inference._item_repr
    assert np.sum(user_repr)
    assert user_repr.shape == (ds.no_users, no_components)
    assert np.sum(item_repr)
    assert item_repr.shape == (no_components, ds.no_items)

    zeros = 0

    for uid in range(ds.no_users):

        original_scores = model.predict(
            np.repeat(uid, ds.no_items),
            np.arange(ds.no_items),
            user_features=ds.user_features,
            item_features=ds.item_features,
        )

        # Check scores
        _, batch_predicted_scores = model.predict_for_user(
            user_id=uid, top_k=0, item_ids=ds.item_ids)
        assert_array_almost_equal(original_scores, batch_predicted_scores)

        # Check ids
        original_ids = np.argsort(-original_scores)[:5]
        batch_ids, _ = model.predict_for_user(user_id=uid,
                                              top_k=5,
                                              item_ids=ds.item_ids)
        assert np.array_equal(original_ids, batch_ids)

        if np.sum(batch_predicted_scores) == 0:
            zeros += 1
    assert zeros < ds.no_users, 'predictions seems to be all zeros'
예제 #9
0
def test_predict_ranks():
    no_users, no_items = 10, 100

    train = sp.rand(no_users, no_items, format='csr', random_state=42)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input, train_interactions=train).todense()
    assert np.all(
        np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 -
        np.squeeze(np.array(train.getnnz(axis=1))))

    # Make sure ranks are computed pessimistically when
    # there are ties (that is, equal predictions for every
    # item will assign maximum rank to each).
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 99)
    assert np.all(ranks.max(axis=1) == 99)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
예제 #10
0
def test_full_batch_predict_wo_features():
    no_components = 2
    top_k = 5
    ds = RandomDataset(density=1.0)

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train)
    user_ids = [0, 1, 2]

    # Single process
    model.batch_setup({0: ds.item_ids})
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert len(recoms[user_id][0]) == top_k
예제 #11
0
def test_full_batch_predict():
    no_components = 2
    top_k = 5
    ds = RandomDataset()

    model = LightFM(no_components=no_components)
    model.fit_partial(ds.train,
                      user_features=ds.user_features,
                      item_features=ds.item_features)
    user_ids = [0, 1, 2]
    chunks = {0: ds.item_ids}

    # Single process
    model.batch_setup(item_chunks=chunks,
                      user_features=ds.user_features,
                      item_features=ds.item_features,
                      n_process=1)
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert len(recoms[user_id][0]) == top_k
    initial_recoms = recoms
    model.batch_cleanup()

    model.batch_setup(item_chunks=chunks,
                      user_features=ds.user_features,
                      item_features=ds.item_features,
                      n_process=2)

    # Multiple processes
    recoms = model.batch_predict(
        user_ids=user_ids,
        chunk_id=0,
        top_k=top_k,
    )
    for user_id in user_ids:
        assert user_id in recoms
        assert_array_almost_equal(recoms[user_id], initial_recoms[user_id])
예제 #12
0
def test_feature_inference_fails():
    # On predict if we try to use feature inference and supply
    # higher ids than the number of features that were supplied to fit
    # we should complain

    no_users, no_items = 10, 100
    no_features = 20

    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)

    user_features = sp.csr_matrix((no_users, no_features), dtype=np.int32)
    item_features = sp.csr_matrix((no_items, no_features), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train,
                      user_features=user_features,
                      item_features=item_features)

    with pytest.raises(ValueError):
        model.predict(np.array([no_features], dtype=np.int32),
                      np.array([no_features], dtype=np.int32))
예제 #13
0
def test_empty_matrix():
    no_users, no_items = 10, 100
    train = sp.coo_matrix((no_users, no_items), dtype=np.int32)
    model = LightFM()
    model.fit_partial(train)