Ejemplo n.º 1
0
def test__similar_users__bad():
    """ensure the similar_users() method raises an exception for an user not in training data"""

    with pytest.raises(AssertionError):
        model = RankFM(factors=2)
        model.fit(intx_train_pd_int)
        similar = model.similar_users(9, n_users=1)
Ejemplo n.º 2
0
def test__similar_users__good():
    """test the similar_users() method for a valid [user_id]"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    similar = model.similar_users(1, n_users=2)

    shape = similar.shape == (2, )
    users = np.isin(similar, intx_train_pd_int['user_id'].unique()).all()
    assert shape and users
Ejemplo n.º 3
0
def test__fit__good(interactions, user_features, item_features):
    """assert that the model can be successfully fit on the input data"""

    model = RankFM(factors=2)
    model.fit(interactions,
              user_features,
              item_features,
              epochs=2,
              verbose=True)
    assert model.is_fit
Ejemplo n.º 4
0
def test__similar_items__good():
    """test the similar_items() method for a valid [item_id]"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    similar = model.similar_items(1, n_items=3)

    shape = similar.shape == (3, )
    items = np.isin(similar, intx_train_pd_int['item_id'].unique()).all()
    assert shape and items
Ejemplo n.º 5
0
def test__predict__good__disjoint_drop():
    """test the predict() method on disjoint validation pairs with the cold_start='drop' option"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    scores = model.predict(intx_valid_disjoint, cold_start='drop')

    shape = scores.shape == (5, )
    dtype = scores.dtype == np.float32
    nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0
    assert shape and dtype and nmiss
Ejemplo n.º 6
0
def test__predict__good__train():
    """test the predict() method on the training inputs"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    scores = model.predict(intx_train_pd_int)

    shape = scores.shape == (9, )
    dtype = scores.dtype == np.float32
    nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0
    assert shape and dtype and nmiss
Ejemplo n.º 7
0
def test__recommend__good__train():
    """test the recommend() method on the training users"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    recs = model.recommend(train_users, n_items=3)

    klass = isinstance(recs, pd.DataFrame)
    shape = recs.shape == (3, 3)
    index = np.array_equal(recs.index.values, train_users)
    items = recs.isin(intx_train_pd_int['item_id'].values).all().all()
    assert klass and shape and index and items
Ejemplo n.º 8
0
def test__recommend__good__valid__nan():
    """test the recommend() method on a disjoint set of validation users"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    recs = model.recommend(valid_users, n_items=3, cold_start='nan')

    klass = isinstance(recs, pd.DataFrame)
    shape = recs.shape == (4, 3)
    index = np.array_equal(sorted(recs.index.values), sorted(valid_users))
    items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()
    new_users = list(set(valid_users) - set(train_users))
    nmiss = recs.loc[new_users].isnull().all().all()
    assert klass and shape and index and items and nmiss
Ejemplo n.º 9
0
def test__recommend__good__valid__drop():
    """test the recommend() method on a disjoint set of validation users"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    recs = model.recommend(valid_users, n_items=3, cold_start='drop')

    klass = isinstance(recs, pd.DataFrame)
    shape = recs.shape == (2, 3)
    index = np.isin(recs.index.values, valid_users).all()
    items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all()

    same_users = list(set(valid_users) & set(train_users))
    match_users = np.array_equal(sorted(same_users), sorted(recs.index.values))
    assert klass and shape and index and items and match_users
Ejemplo n.º 10
0
def test__recommend__good__train__filter():
    """test the recommend() method on the training users but filter previous items"""

    model = RankFM(factors=2)
    model.fit(intx_train_pd_int)
    recs = model.recommend(train_users, n_items=3, filter_previous=True)

    klass = isinstance(recs, pd.DataFrame)
    shape = recs.shape == (3, 3)
    index = np.array_equal(recs.index.values, train_users)
    items = recs.isin(intx_train_pd_int['item_id'].values).all().all()

    recs_long = recs.stack().reset_index().drop('level_1', axis=1)
    recs_long.columns = ['user_id', 'item_id']
    intersect = pd.merge(intx_train_pd_int,
                         recs_long,
                         on=['user_id', 'item_id'],
                         how='inner').empty
    assert klass and shape and index and items and intersect
Ejemplo n.º 11
0
def model(interactions, item_features, factors, max_samples, epochs):
    """Builds a Factorization Machine recommendation model.

    Args:
        interactions: A dataframe of user/item interactions.
        item_features: A dataframe of item features matrix.
        factors: The latent factor rank (hyperparameter).
        max_samples: The maximum number of negative samples to draw for WARP loss (hyperparameter).
        epochs: Number of training epochs.

    Returns:
        model: A RankFM model object.
    """

    model = RankFM(factors=factors,
                   loss='warp',
                   max_samples=max_samples,
                   alpha=0.01,
                   sigma=0.1,
                   learning_rate=0.1,
                   learning_schedule='invscaling')

    model.fit(interactions, item_features=item_features, epochs=epochs, verbose=True)
    return model
Ejemplo n.º 12
0
def test__fit__bad__if_str_cols():
    """ensure that the [item_features] does not contain string columns"""

    with pytest.raises(ValueError):
        model = RankFM(factors=2)
        model.fit(intx_train_pd_int, item_features=if_str_cols)
Ejemplo n.º 13
0
def test__fit__bad__if_no_id():
    """ensure that the [item_features] contains a [item_id] column"""

    with pytest.raises(KeyError):
        model = RankFM(factors=2)
        model.fit(intx_train_pd_int, item_features=if_no_id)
Ejemplo n.º 14
0
def test__fit__bad__rating_col():
    """ensure that having more than 2 columns in the interaction data causes an assertion failure"""

    with pytest.raises(AssertionError):
        model = RankFM(factors=2)
        model.fit(intx_train_pd_rating)