def test_bias_batch_recommend(): from lenskit.algorithms import basic import lenskit.crossfold as xf from lenskit import batch, topn if not os.path.exists('ml-100k/u.data'): raise pytest.skip() ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) algo = basic.Bias(damping=5) algo = TopN(algo) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100) return recs folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) test = pd.concat(y for (x, y) in folds) recs = pd.concat(eval(train, test) for (train, test) in folds) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg _log.info('nDCG for %d users is %f (max=%f)', len(dcg), dcg.mean(), dcg.max()) assert dcg.mean() > 0
def test_run_two(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) rla.add_metric(topn.ndcg) recs = pd.DataFrame({ 'data': 'a', 'user': ['a', 'a', 'a', 'b', 'b'], 'item': [2, 3, 1, 4, 5], 'rank': [1, 2, 3, 1, 2] }) truth = pd.DataFrame({ 'user': ['a', 'a', 'a', 'b', 'b', 'b'], 'item': [1, 2, 3, 1, 5, 6], 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] }) def prog(inner): assert len(inner) == 2 return inner res = rla.compute(recs, truth, progress=prog) print(res) assert len(res) == 2 assert res.index.nlevels == 2 assert res.index.names == ['data', 'user'] assert all(res.index.levels[0] == 'a') assert all(res.index.levels[1] == ['a', 'b']) assert all(res.reset_index().user == ['a', 'b']) partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3]) assert res.ndcg.values == approx([1.0, partial_ndcg]) assert res.precision.values == approx([1.0, 1 / 2]) assert res.recall.values == approx([1.0, 1 / 3])
def test_als_implicit_batch_accuracy(): import lenskit.crossfold as xf from lenskit import batch from lenskit import topn ratings = lktu.ml100k.load_ratings() algo = als.ImplicitMF(25, iterations=20) def eval(train, test): _log.info('running training') train['rating'] = train.rating.astype(np.float_) algo.fit(train) users = test.user.unique() _log.info('testing %d users', len(users)) candidates = topn.UnratedCandidates(train) recs = batch.recommend(algo, users, 100, candidates) return recs folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) test = pd.concat(te for (tr, te) in folds) recs = pd.concat(eval(train, test) for (train, test) in folds) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) _log.info('nDCG for users is %.4f', results.ndcg.mean()) assert results.ndcg.mean() > 0
def test_implicit_als_batch_accuracy(): import lenskit.crossfold as xf from lenskit import batch, topn ratings = lktu.ml100k.ratings algo_t = ALS(25) def eval(train, test): _log.info('running training') train['rating'] = train.rating.astype(np.float_) algo = util.clone(algo_t) algo.fit(train) users = test.user.unique() _log.info('testing %d users', len(users)) recs = batch.recommend(algo, users, 100) return recs folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) test = pd.concat(f.test for f in folds) recs = pd.concat(eval(train, test) for (train, test) in folds) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg _log.info('nDCG for %d users is %.4f', len(dcg), dcg.mean()) assert dcg.mean() > 0
def test_ii_batch_recommend(ncpus): import lenskit.crossfold as xf from lenskit import topn ratings = lktu.ml100k.ratings def eval(train, test): _log.info('running training') algo = knn.ItemItem(30) algo = Recommender.adapt(algo) algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus) return recs test_frames = [] recs = [] for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)): test_frames.append(test) recs.append(eval(train, test)) test = pd.concat(test_frames) recs = pd.concat(recs) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg _log.info('nDCG for %d users is %f', len(dcg), dcg.mean()) assert dcg.mean() > 0.03
def test_ii_batch_recommend(ncpus): import lenskit.crossfold as xf from lenskit import batch, topn if not os.path.exists('ml-100k/u.data'): raise pytest.skip() ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) def eval(train, test): _log.info('running training') algo = knn.ItemItem(30) algo = Recommender.adapt(algo) algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus) return recs test_frames = [] recs = [] for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)): test_frames.append(test) recs.append(eval(train, test)) test = pd.concat(test_frames) recs = pd.concat(recs) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg _log.info('nDCG for %d users is %f', len(dcg), dcg.mean()) assert dcg.mean() > 0.03
def test_inner_format(): rla = topn.RecListAnalysis() recs = pd.DataFrame({ 'data': 'a', 'user': ['a', 'a', 'a', 'b', 'b'], 'item': [2, 3, 1, 4, 5], 'rank': [1, 2, 3, 1, 2] }) truth = pd.DataFrame({ 'user': ['a', 'a', 'a', 'b', 'b', 'b'], 'item': [1, 2, 3, 1, 5, 6], 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] }) def inner(recs, truth, foo='a'): assert foo == 'b' assert set(recs.columns) == set(['data', 'user', 'item', 'rank']) assert len(recs[['data', 'user']].drop_duplicates()) == 1 assert truth.index.name == 'item' assert truth.index.is_unique assert all(truth.columns == ['rating']) return len(recs.join(truth, on='item', how='inner')) rla.add_metric(inner, name='bob', foo='b') res = rla.compute(recs, truth) print(res) assert len(res) == 2 assert res.index.nlevels == 2 assert res.index.names == ['data', 'user'] assert all(res.index.levels[0] == 'a') assert all(res.index.levels[1] == ['a', 'b']) assert all(res.reset_index().user == ['a', 'b']) assert all(res['bob'] == [3, 1])
def eval_ndcg(all_recs, test_data): rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(all_recs, test_data) print('Normalized Discounted Cumulative Gains table head:') print(results.head()) return results.groupby('Algorithm').ndcg.mean()
def test_tf_bpr_batch_accuracy(tf_session): from lenskit.algorithms import basic import lenskit.crossfold as xf from lenskit import batch, topn ratings = lktu.ml100k.ratings algo = lktf.BPR(20, batch_size=1024, epochs=20, rng_spec=42) algo = Recommender.adapt(algo) all_recs = [] all_test = [] for train, test in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, np.unique(test.user), 50) all_recs.append(recs) all_test.append(test) _log.info('analyzing results') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) rla.add_metric(topn.recip_rank) scores = rla.compute(pd.concat(all_recs, ignore_index=True), pd.concat(all_test, ignore_index=True), include_missing=True) scores.fillna(0, inplace=True) _log.info('MRR: %f', scores['recip_rank'].mean()) _log.info('nDCG: %f', scores['ndcg'].mean()) assert scores['ndcg'].mean() > 0.1
def test_spec_group_cols(): rla = topn.RecListAnalysis(group_cols=['data', 'user']) rla.add_metric(topn.precision) rla.add_metric(topn.recall) rla.add_metric(topn.ndcg) recs = pd.DataFrame({ 'data': 'a', 'user': ['a', 'a', 'a', 'b', 'b'], 'item': [2, 3, 1, 4, 5], 'rank': [1, 2, 3, 1, 2], 'wombat': np.random.randn(5) }) truth = pd.DataFrame({ 'user': ['a', 'a', 'a', 'b', 'b', 'b'], 'item': [1, 2, 3, 1, 5, 6], 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] }) res = rla.compute(recs, truth) print(res) assert len(res) == 2 assert res.index.nlevels == 2 assert res.index.names == ['data', 'user'] assert all(res.index.levels[0] == 'a') assert all(res.index.levels[1] == ['a', 'b']) assert all(res.reset_index().user == ['a', 'b']) partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3]) assert res.ndcg.values == approx([1.0, partial_ndcg]) assert res.precision.values == approx([1.0, 1/2]) assert res.recall.values == approx([1.0, 1/3])
def test_run_one(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) recs = pd.DataFrame({'user': 1, 'item': [2]}) recs.name = 'recs' truth = pd.DataFrame({ 'user': 1, 'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0] }) truth.name = 'truth' print(recs) print(truth) res = rla.compute(recs, truth) print(res) assert res.index.name == 'user' assert res.index.is_unique assert len(res) == 1 assert all(res.index == 1) assert all(res.precision == 1.0) assert res.recall.values == approx(1 / 3)
def test_fill_users(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) algo = UserUser(20, min_nbrs=10) algo = Recommender.adapt(algo) splits = xf.sample_users(ml_test.ratings, 1, 50, xf.SampleN(5)) train, test = next(splits) algo.fit(train) rec_users = test['user'].sample(50).unique() recs = batch.recommend(algo, rec_users, 25) scores = rla.compute(recs, test, include_missing=True) assert len(scores) == test['user'].nunique() assert scores['recall'].notna().sum() == len(rec_users) assert all(scores['ntruth'] == 5) mscores = rla.compute(recs, test) assert len(mscores) < len(scores) recall = scores.loc[scores['recall'].notna(), 'recall'].copy() recall, mrecall = recall.align(mscores['recall']) assert all(recall == mrecall)
def _measure_recs(recs, test): rla = topn.RecListAnalysis() rla.add_metric(topn.recall) rla.add_metric(topn.recip_rank) rla.add_metric(topn.ndcg) return rla.compute(recs[['user', 'item', 'score']], test, include_missing=True)
def test_uu_implicit_batch_accuracy(): from lenskit import batch, topn import lenskit.crossfold as xf ratings = lktu.ml100k.ratings algo = knn.UserUser(30, center=False, aggregate='sum') folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) all_test = pd.concat(f.test for f in folds) rec_lists = [] for train, test in folds: _log.info('running training') rec_algo = Recommender.adapt(algo) rec_algo.fit(train.loc[:, ['user', 'item']]) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2) rec_lists.append(recs) recs = pd.concat(rec_lists) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, all_test) user_dcg = results.ndcg dcg = user_dcg.mean() assert dcg >= 0.03
def _run_eval(self, params): timer = Stopwatch() _log.info('evaluating at %s', params) if self.retrainer: if not self.retrainer.initialized: self.retrainer.fit_initial(self.train) algo = self.retrainer.instantiate(params) else: algo = self.module.instantiate(params, not self.explicit) algo = Recommender.adapt(algo) _log.info('[%s] train %s', timer, algo) algo.fit(self.train) _log.info('[%s] recommend %s', timer, algo) users = self.test['user'].unique() recs = batch.recommend(algo, users, self.n_recs, n_jobs=self.n_jobs) if len(recs) == 0: _log.info('[%s] %s produced no recommendations', timer, algo) return 0 _log.info('[%s] evaluate %s', timer, algo) rla = topn.RecListAnalysis() rla.add_metric(topn.recip_rank) rla.add_metric(topn.recall) scores = rla.compute(recs, self.test, include_missing=True) assert len(scores) == len(self.test) mrr = scores['recip_rank'].fillna(0).mean() hr = scores['recall'].fillna(0).mean() _log.info('%s had MRR of %.3f', algo, mrr) _log.info('%s had hit rate of %.3f', algo, hr) return -mrr
def getMetrics(df_test, df, N): rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) rla.add_metric(topn.recall) results = rla.compute(df_test, df) ndcg_mean = results['ndcg'].mean() recall_mean = results['recall'].mean() return ndcg_mean, recall_mean
def check_positive_ndcg(self, recs): _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, self.test) dcg = results.ndcg _log.info('nDCG for %d users is %f (max=%f)', len(dcg), dcg.mean(), dcg.max()) assert dcg.mean() > 0
def metrics(recs, truth): rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) rla.add_metric(topn.precision) results = rla.compute(recs, truth) #results.head() print(results.groupby('Algorithm').precision.mean()) print(results.groupby('Algorithm').ndcg.mean())
def analyze_performance(self, recs): """ Computation of the algorithm's performance, using the nDCG metric. """ rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, self.test) print(results.groupby('Algorithm').ndcg.mean()) return results
class LegMedLensKit(): def loadData(): ratings = pd.read_csv('/Users/josse/Desktop/ratings.dat', sep='::', names=['user', 'item', 'rating', 'timestamp']) print(ratings.head()) return (ratings) #print ("test") ratings = loadData() data_matrix = np.array( ratings.pivot(index='item', columns='user', values='rating')) print(data_matrix) data_matrix_rev = np.nan_to_num(data_matrix) print(data_matrix_rev) algo_ii = knn.ItemItem(20) algo_als = als.BiasedMF(50) def eval(aname, algo, train, test): print("test") fittable = util.clone(algo) fittable = Recommender.adapt(fittable) fittable.fit(train) users = test.user.unique() # now we run the recommender recs = batch.recommend(fittable, users, 100) # add the algorithm name for analyzability recs['Algorithm'] = aname print("recs") print(recs.head()) return recs all_recs = [] test_data = [] for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 1, xf.SampleFrac(0.2)): test_data.append(test) #print(test.head(10)) all_recs.append(eval('ItemItem', algo_ii, train, test)) all_recs.append(eval('ALS', algo_als, train, test)) print("test2") print(all_recs.head()) all_recs = pd.concat(all_recs, ignore_index=True) print(all_recs.head()) test_data = pd.concat(test_data, ignore_index=True) #print(test_data.head) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(all_recs, test_data) results.head() results.groupby('Algorithm').ndcg.mean() results.groupby('Algorithm').ndcg.mean().plot.bar()
def test_adv_fill_users(): rla = topn.RecListAnalysis() rla.add_metric(topn.precision) rla.add_metric(topn.recall) a_uu = UserUser(30, min_nbrs=10) a_uu = Recommender.adapt(a_uu) a_ii = ItemItem(20, min_nbrs=4) a_ii = Recommender.adapt(a_ii) splits = xf.sample_users(ml_test.ratings, 2, 50, xf.SampleN(5)) all_recs = {} all_test = {} for i, (train, test) in enumerate(splits): a_uu.fit(train) rec_users = test['user'].sample(50).unique() all_recs[(i + 1, 'UU')] = batch.recommend(a_uu, rec_users, 25) a_ii.fit(train) rec_users = test['user'].sample(50).unique() all_recs[(i + 1, 'II')] = batch.recommend(a_ii, rec_users, 25) all_test[i + 1] = test recs = pd.concat(all_recs, names=['part', 'algo']) recs.reset_index(['part', 'algo'], inplace=True) recs.reset_index(drop=True, inplace=True) test = pd.concat(all_test, names=['part']) test.reset_index(['part'], inplace=True) test.reset_index(drop=True, inplace=True) scores = rla.compute(recs, test, include_missing=True) inames = scores.index.names scores.sort_index(inplace=True) assert len(scores) == 50 * 4 assert all(scores['ntruth'] == 5) assert scores['recall'].isna().sum() > 0 _log.info('scores:\n%s', scores) ucounts = scores.reset_index().groupby('algo')['user'].agg( ['count', 'nunique']) assert all(ucounts['count'] == 100) assert all(ucounts['nunique'] == 100) mscores = rla.compute(recs, test) mscores = mscores.reset_index().set_index(inames) mscores.sort_index(inplace=True) assert len(mscores) < len(scores) _log.info('mscores:\n%s', mscores) recall = scores.loc[scores['recall'].notna(), 'recall'].copy() recall, mrecall = recall.align(mscores['recall']) assert all(recall == mrecall)
def test_java_equiv(): dir = Path(__file__).parent metrics = pd.read_csv(str(dir / 'topn-java-metrics.csv')) recs = pd.read_csv(str(dir / 'topn-java-recs.csv')) truth = pd.read_csv(str(dir / 'topn-java-truth.csv')) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) res = rla.compute(recs, truth) umm = pd.merge(metrics, res.reset_index()) umm['err'] = umm['ndcg'] - umm['Java.nDCG'] _log.info('merged: \n%s', umm) assert umm['err'].values == approx(0, abs=1.0e-6)
def test_recall_bulk_k(demo_recs): "bulk and normal match" train, test, recs = demo_recs assert test['user'].value_counts().max() > 5 rla = topn.RecListAnalysis() rla.add_metric(precision, name='pk', k=5) rla.add_metric(precision) # metric without the bulk capabilities rla.add_metric(lambda *a, **k: precision(*a, **k), name='ind_pk', k=5) rla.add_metric(lambda *a: precision(*a), name='ind_p') res = rla.compute(recs, test) assert res.precision.values == approx(res.ind_p.values) assert res.pk.values == approx(res.ind_pk.values)
def evaluate_model_recommendations(recommendations, test, metrics) -> pd.DataFrame: """Evaluates a model via its recommendations :param recommendations: pd.DataFrame with at least the following columns : 'user', 'item', 'score', 'rank' :param test: pd.DataFrame. The testing data :param metrics: list. A list of metrics' names (see recodiv.model.METRICS) """ analysis = topn.RecListAnalysis(n_jobs=20) users = test.user.unique() rec_users = recommendations['user'].unique() for metric_name in metrics: analysis.add_metric(METRICS[metric_name]) return analysis.compute(recommendations, test)
def test_recall_bulk_k(demo_recs): "bulk and normal match" train, test, recs = demo_recs assert test['user'].value_counts().max() > 5 rla = topn.RecListAnalysis() rla.add_metric(recall, name='rk', k=5) rla.add_metric(recall) # metric without the bulk capabilities rla.add_metric(lambda *a, **k: recall(*a, **k), name='ind_rk', k=5) rla.add_metric(lambda *a: recall(*a), name='ind_r') res = rla.compute(recs, test) print(res) _log.info('recall mismatches:\n%s', res[res.recall != res.ind_r]) assert res.recall.values == approx(res.ind_r.values) assert res.rk.values == approx(res.ind_rk.values)
def test_pr_bulk_match(demo_recs, drop_rating): "bulk and normal match" train, test, recs = demo_recs if drop_rating: test = test[['user', 'item']] rla = topn.RecListAnalysis() rla.add_metric(precision) rla.add_metric(recall) # metric without the bulk capabilities rla.add_metric(lambda *a: precision(*a), name='ind_p') rla.add_metric(lambda *a: recall(*a), name='ind_r') res = rla.compute(recs, test) print(res) _log.info('precision mismatches:\n%s', res[res.precision != res.ind_p]) _log.info('recall mismatches:\n%s', res[res.recall != res.ind_r]) assert res.precision.values == approx(res.ind_p.values) assert res.recall.values == approx(res.ind_r.values)
def test_als_implicit_batch_accuracy(): import lenskit.crossfold as xf from lenskit import batch from lenskit import topn ratings = lktu.ml100k.ratings def eval(train, test): train['rating'] = train.rating.astype(np.float_) _log.info('training CG') cg_algo = als.ImplicitMF(25, iterations=20, method='cg') cg_algo = Recommender.adapt(cg_algo) cg_algo.fit(train) _log.info('training LU') lu_algo = als.ImplicitMF(25, iterations=20, method='lu') lu_algo = Recommender.adapt(lu_algo) lu_algo.fit(train) users = test.user.unique() _log.info('testing %d users', len(users)) cg_recs = batch.recommend(cg_algo, users, 100, n_jobs=2) lu_recs = batch.recommend(lu_algo, users, 100, n_jobs=2) return pd.concat({ 'CG': cg_recs, 'LU': lu_recs }, names=['Method']).reset_index('Method') folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) test = pd.concat(te for (tr, te) in folds) recs = pd.concat((eval(train, test) for (train, test) in folds), ignore_index=True) _log.info('analyzing recommendations') rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) results = results.groupby('Method')['ndcg'].mean() _log.info('LU nDCG for users is %.4f', results.loc['LU'].mean()) _log.info('CG nDCG for users is %.4f', results.loc['CG'].mean()) assert all(results > 0.28) assert results.loc['LU'] == approx(results.loc['CG'], rel=0.05)
def objective_fn(params: Dict[str, Any]): algo = als.BiasedMF( features=params["features"], iterations=params["iteration"], reg=0.1, damping=5, ) model = util.clone(algo) model = Recommender.adapt(model) model.fit(train_df) recs = batch.recommend(model, test_users, recsize) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test_df) target_metric = -results.ndcg.mean() return {"loss": target_metric, "status": STATUS_OK}
def test_split_keys(): rla = topn.RecListAnalysis() recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score'], ['user', 'item', 'rating']) assert truth == ['user'] assert recs == ['algorithm', 'user']
def RR(rec, truth): #recs = pd.read_parquet(file_name) rla = topn.RecListAnalysis() rla.add_metric(topn.recip_rank) RR_result = rla.compute(rec, truth) return RR_result