def test_als_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu') cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd') # algo = basic.Fallback(svd_algo, basic.Bias(damping=5)) def eval(train, test): _log.info('training LU') lu_algo.fit(train) _log.info('training CD') cd_algo.fit(train) _log.info('testing %d users', test.user.nunique()) return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test)) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred) _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False)) _log.info('diff summary:\n%s', preds.abs_diff.describe()) lu_mae = pm.mae(preds.lu_pred, preds.rating) assert lu_mae == approx(0.73, abs=0.025) cd_mae = pm.mae(preds.cd_pred, preds.rating) assert cd_mae == approx(0.73, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.05) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.05)
def test_mae_series_two(): mae = pm.mae(pd.Series([1, 2]), pd.Series([1, 2])) assert isinstance(mae, float) assert mae == approx(0) mae = pm.mae(pd.Series([1, 1]), pd.Series([2, 2])) assert mae == approx(1) mae = pm.mae(pd.Series([1, 3]), pd.Series([3, 1])) assert mae == approx(2)
def test_mae_array_two(): mae = pm.mae(np.array([1, 2]), np.array([1, 2])) assert isinstance(mae, float) assert mae == approx(0) mae = pm.mae(np.array([1, 1]), np.array([2, 2])) assert mae == approx(1) mae = pm.mae(np.array([1, 3]), np.array([3, 1])) assert mae == approx(2)
def test_mae_two(): mae = pm.mae([1, 2], [1, 2]) assert isinstance(mae, float) assert mae == approx(0) mae = pm.mae([1, 1], [2, 2]) assert mae == approx(1) mae = pm.mae([1, 3], [3, 1]) assert mae == approx(2) mae = pm.mae([1, 3], [3, 2]) assert mae == approx(1.5)
def test_fsvd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings svd_algo = svd.FunkSVD(25, 125, damping=10) algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05)
def test_ii_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings ii_algo = knn.ItemItem(30) algo = basic.Fallback(ii_algo, basic.Bias()) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test, n_jobs=4) preds = pd.concat((eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.70, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.90, abs=0.05)
def test_tf_bmf_batch_accuracy(tf_session): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings algo = lktf.BiasedMF(25, damping=10, batch_size=1024, epochs=20, rng_spec=42) algo = basic.Fallback(algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.83, abs=0.025) user_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(1.03, abs=0.05)
def test_tf_isvd(ml20m): algo = lenskit_tf.IntegratedBiasMF(20) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.60, abs=0.025) user_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05)
def test_global_metric(): import lenskit.crossfold as xf import lenskit.batch as batch from lenskit.algorithms.bias import Bias train, test = next( xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5))) algo = Bias() algo.fit(train) preds = batch.predict(algo, test) rmse = pm.global_metric(preds) assert rmse == pm.rmse(preds.prediction, preds.rating) mae = pm.global_metric(preds, metric=pm.mae) assert mae == pm.mae(preds.prediction, preds.rating)
def test_uu_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings uu_algo = knn.UserUser(30) algo = basic.Fallback(uu_algo, basic.Bias()) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = [__batch_eval((algo, train, test)) for (train, test) in folds] preds = pd.concat(preds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.71, abs=0.028) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.055)
def test_user_metric(): import lenskit.crossfold as xf import lenskit.batch as batch from lenskit.algorithms.bias import Bias train, test = next( xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5))) algo = Bias() algo.fit(train) preds = batch.predict(algo, test) rmse = pm.user_metric(preds) u_rmse = preds.groupby('user').apply( lambda df: pm.rmse(df.prediction, df.rating)) assert rmse == approx(u_rmse.mean()) mae = pm.user_metric(preds, metric=pm.mae) u_mae = preds.groupby('user').apply( lambda df: pm.mae(df.prediction, df.rating)) assert mae == approx(u_mae.mean())
def test_als_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.load_ratings() svd_algo = als.BiasedMF(25, iterations=20, damping=5) algo = basic.Fallback(svd_algo, basic.Bias(damping=5)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return test.assign(prediction=algo.predict(test)) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.73, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.05)