def test_tf_bmf_batch_accuracy(tf_session): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings algo = lktf.BiasedMF(25, damping=10, batch_size=1024, epochs=20, rng_spec=42) algo = basic.Fallback(algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.83, abs=0.025) user_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(1.03, abs=0.05)
def test_fsvd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings svd_algo = svd.FunkSVD(25, 125, damping=10) algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05)
def test_batch_rmse(): import lenskit.crossfold as xf import lenskit.batch as batch import lenskit.algorithms.bias as bs ratings = lktu.ml100k.ratings algo = bs.Bias(damping=5) def eval(train, test): algo.fit(train) preds = batch.predict(algo, test) return preds.set_index(['user', 'item']) results = pd.concat( (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5)))) user_rmse = results.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) # we should have all users users = ratings.user.unique() assert len(user_rmse) == len(users) missing = np.setdiff1d(users, user_rmse.index) assert len(missing) == 0 # we should not have any missing values assert all(user_rmse.notna()) # we should have a reasonable mean assert user_rmse.mean() == approx(0.93, abs=0.05)
def test_bias_batch_predict(ncpus): from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings algo = bias.Bias(damping=5) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.predict(algo, test, n_jobs=ncpus) return recs preds = pd.concat( (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) _log.info('analyzing predictions') rmse = pm.rmse(preds.prediction, preds.rating) _log.info('RMSE is %f', rmse) assert rmse == pytest.approx(0.95, abs=0.1)
def test_batch_predict_preshared(): "Test batch prediction with isolated training and a pre-serialized algorithm." from lenskit.algorithms import bias import lenskit.crossfold as xf algo = bias.Bias() splits = xf.sample_users(lktu.ml_test.ratings, 1, 100, xf.SampleN(5)) train, test = next(splits) ares = lkb.train_isolated(algo, train) preds = lkb.predict(ares, test) assert len(preds) == len(test) assert not any(preds['prediction'].isna())
def test_uu_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings uu_algo = knn.UserUser(30) algo = basic.Fallback(uu_algo, bias.Bias()) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = [__batch_eval((algo, train, test)) for (train, test) in folds] preds = pd.concat(preds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.71, abs=0.05) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.055)
def test_topn_big(): ratings = lktu.ml_test.ratings users = ratings.user.unique() items = ratings.item.unique() user_items = ratings.set_index('user').item algo = basic.TopN(bias.Bias()) a2 = algo.fit(ratings) assert a2 is algo # test 100 random users for u in np.random.choice(users, 100, False): recs = algo.recommend(u, 100) assert len(recs) == 100 rated = user_items.loc[u] assert all(~recs['item'].isin(rated)) unrated = np.setdiff1d(items, rated) scores = algo.predictor.predict_for_user(u, unrated) top = scores.nlargest(100) assert top.values == approx(recs.score.values)