def test__similar_users__bad(): """ensure the similar_users() method raises an exception for an user not in training data""" with pytest.raises(AssertionError): model = RankFM(factors=2) model.fit(intx_train_pd_int) similar = model.similar_users(9, n_users=1)
def test__similar_users__good(): """test the similar_users() method for a valid [user_id]""" model = RankFM(factors=2) model.fit(intx_train_pd_int) similar = model.similar_users(1, n_users=2) shape = similar.shape == (2, ) users = np.isin(similar, intx_train_pd_int['user_id'].unique()).all() assert shape and users
def test__fit__good(interactions, user_features, item_features): """assert that the model can be successfully fit on the input data""" model = RankFM(factors=2) model.fit(interactions, user_features, item_features, epochs=2, verbose=True) assert model.is_fit
def test__similar_items__good(): """test the similar_items() method for a valid [item_id]""" model = RankFM(factors=2) model.fit(intx_train_pd_int) similar = model.similar_items(1, n_items=3) shape = similar.shape == (3, ) items = np.isin(similar, intx_train_pd_int['item_id'].unique()).all() assert shape and items
def test__predict__good__disjoint_drop(): """test the predict() method on disjoint validation pairs with the cold_start='drop' option""" model = RankFM(factors=2) model.fit(intx_train_pd_int) scores = model.predict(intx_valid_disjoint, cold_start='drop') shape = scores.shape == (5, ) dtype = scores.dtype == np.float32 nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0 assert shape and dtype and nmiss
def test__predict__good__train(): """test the predict() method on the training inputs""" model = RankFM(factors=2) model.fit(intx_train_pd_int) scores = model.predict(intx_train_pd_int) shape = scores.shape == (9, ) dtype = scores.dtype == np.float32 nmiss = np.sum(np.isnan(scores).astype(np.int32)) == 0 assert shape and dtype and nmiss
def test__recommend__good__train(): """test the recommend() method on the training users""" model = RankFM(factors=2) model.fit(intx_train_pd_int) recs = model.recommend(train_users, n_items=3) klass = isinstance(recs, pd.DataFrame) shape = recs.shape == (3, 3) index = np.array_equal(recs.index.values, train_users) items = recs.isin(intx_train_pd_int['item_id'].values).all().all() assert klass and shape and index and items
def test__recommend__good__valid__nan(): """test the recommend() method on a disjoint set of validation users""" model = RankFM(factors=2) model.fit(intx_train_pd_int) recs = model.recommend(valid_users, n_items=3, cold_start='nan') klass = isinstance(recs, pd.DataFrame) shape = recs.shape == (4, 3) index = np.array_equal(sorted(recs.index.values), sorted(valid_users)) items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all() new_users = list(set(valid_users) - set(train_users)) nmiss = recs.loc[new_users].isnull().all().all() assert klass and shape and index and items and nmiss
def test__recommend__good__valid__drop(): """test the recommend() method on a disjoint set of validation users""" model = RankFM(factors=2) model.fit(intx_train_pd_int) recs = model.recommend(valid_users, n_items=3, cold_start='drop') klass = isinstance(recs, pd.DataFrame) shape = recs.shape == (2, 3) index = np.isin(recs.index.values, valid_users).all() items = recs.dropna().isin(intx_train_pd_int['item_id'].values).all().all() same_users = list(set(valid_users) & set(train_users)) match_users = np.array_equal(sorted(same_users), sorted(recs.index.values)) assert klass and shape and index and items and match_users
def test__recommend__good__train__filter(): """test the recommend() method on the training users but filter previous items""" model = RankFM(factors=2) model.fit(intx_train_pd_int) recs = model.recommend(train_users, n_items=3, filter_previous=True) klass = isinstance(recs, pd.DataFrame) shape = recs.shape == (3, 3) index = np.array_equal(recs.index.values, train_users) items = recs.isin(intx_train_pd_int['item_id'].values).all().all() recs_long = recs.stack().reset_index().drop('level_1', axis=1) recs_long.columns = ['user_id', 'item_id'] intersect = pd.merge(intx_train_pd_int, recs_long, on=['user_id', 'item_id'], how='inner').empty assert klass and shape and index and items and intersect
def model(interactions, item_features, factors, max_samples, epochs): """Builds a Factorization Machine recommendation model. Args: interactions: A dataframe of user/item interactions. item_features: A dataframe of item features matrix. factors: The latent factor rank (hyperparameter). max_samples: The maximum number of negative samples to draw for WARP loss (hyperparameter). epochs: Number of training epochs. Returns: model: A RankFM model object. """ model = RankFM(factors=factors, loss='warp', max_samples=max_samples, alpha=0.01, sigma=0.1, learning_rate=0.1, learning_schedule='invscaling') model.fit(interactions, item_features=item_features, epochs=epochs, verbose=True) return model
def test__fit__bad__if_str_cols(): """ensure that the [item_features] does not contain string columns""" with pytest.raises(ValueError): model = RankFM(factors=2) model.fit(intx_train_pd_int, item_features=if_str_cols)
def test__fit__bad__if_no_id(): """ensure that the [item_features] contains a [item_id] column""" with pytest.raises(KeyError): model = RankFM(factors=2) model.fit(intx_train_pd_int, item_features=if_no_id)
def test__fit__bad__rating_col(): """ensure that having more than 2 columns in the interaction data causes an assertion failure""" with pytest.raises(AssertionError): model = RankFM(factors=2) model.fit(intx_train_pd_rating)