def test_uu_train(): algo = knn.UserUser(30) ret = algo.fit(ml_ratings) assert ret is algo # it should have computed correct means umeans = ml_ratings.groupby('user').rating.mean() mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean') umeans, mlmeans = umeans.align(mlmeans) assert mlmeans.values == approx(umeans.values) # we should be able to reconstruct rating values uir = ml_ratings.set_index(['user', 'item']).rating r_items = algo.transpose_matrix_.rowinds() ui_rbdf = pd.DataFrame({ 'user': algo.user_index_[algo.transpose_matrix_.colinds], 'item': algo.item_index_[r_items], 'nrating': algo.transpose_matrix_.values }).set_index(['user', 'item']) ui_rbdf = ui_rbdf.join(mlmeans) ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean'] ui_rbdf['orig_rating'] = uir assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
def test_uu_known_preds(): from lenskit import batch algo = knn.UserUser(30, min_sim=1.0e-6) _log.info('training %s on ml data', algo) algo.fit(lktu.ml_test.ratings) dir = Path(__file__).parent pred_file = dir / 'user-user-preds.csv' _log.info('reading known predictions from %s', pred_file) known_preds = pd.read_csv(str(pred_file)) pairs = known_preds.loc[:, ['user', 'item']] _log.info('generating %d known predictions', len(pairs)) preds = batch.predict(algo, pairs) merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds) assert len(merged) == len(preds) merged['error'] = merged.expected - merged.prediction try: assert not any(merged.prediction.isna() & merged.expected.notna()) except AssertionError as e: bad = merged[merged.prediction.isna() & merged.expected.notna()] _log.error('%d missing predictions:\n%s', len(bad), bad) raise e err = merged.error err = err[err.notna()] try: assert all(err.abs() < 0.01) except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] _log.error('%d erroneous predictions:\n%s', len(bad), bad) raise e
def test_uu_predict_unknown_empty(): algo = knn.UserUser(30, min_nbrs=2) algo.fit(ml_ratings) preds = algo.predict_for_user(-28018, [1016, 2091]) assert len(preds) == 2 assert all(preds.isna())
def test_uu_implicit_batch_accuracy(): from lenskit import batch, topn import lenskit.crossfold as xf ratings = lktu.ml100k.ratings algo = knn.UserUser(30, center=False, aggregate='sum') folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) all_test = pd.concat(f.test for f in folds) rec_lists = [] for train, test in folds: _log.info('running training') rec_algo = Recommender.adapt(algo) rec_algo.fit(train.loc[:, ['user', 'item']]) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2) rec_lists.append(recs) recs = pd.concat(rec_lists) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, all_test) user_dcg = results.ndcg dcg = user_dcg.mean() assert dcg >= 0.03
def user_movie_recommend(ratings, optionList, userId): all_recs = [] for option in optionList: if option == 1: basic_bias_model = basic.Bias() all_recs.append( user_eval('BasicBias', basic_bias_model, ratings, userId)) if option == 2: knn_model = iknn.ItemItem(20) all_recs.append(user_eval('ItemItem', knn_model, ratings, userId)) if option == 3: knn_u_model = uknn.UserUser(20) all_recs.append(user_eval('UserUser', knn_u_model, ratings, userId)) if option == 4: als_b_model = als.BiasedMF(50) all_recs.append( user_eval('ALS-Biased', als_b_model, ratings, userId)) if option == 5: als_i_model = als.ImplicitMF(50) all_recs.append( user_eval('ALS-Implicit', als_i_model, ratings, userId)) if option == 6: funk_model = funksvd.FunkSVD(50) all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId)) all_recs = pd.concat(all_recs, ignore_index=True) return all_recs
def test_alogrithms(): # data = MovieLens('ml-latest-small') data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = [ basic.Bias(damping=5), basic.Popular(), item_knn.ItemItem(20), user_knn.UserUser(20), als.BiasedMF(50), als.ImplicitMF(50), funksvd.FunkSVD(50) ] pairs = list( partition_users(ratings[['user', 'item', 'rating']], 5, SampleFrac(0.2))) eval_algorithms(dataset=pairs, algorithms=algorithms) runs = display_runs() recs = display_recommendations() truth = pd.concat((p.test for p in pairs), ignore_index=True) ndcg_means = check_recommendations(runs, recs, truth) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def test_uu_predict_one(): algo = knn.UserUser(30) algo.fit(ml_ratings) preds = algo.predict_for_user(4, [1016]) assert len(preds) == 1 assert preds.index == [1016] assert preds.values == approx([3.62221550680778])
def test_uu_predict_too_few_blended(): algo = knn.UserUser(30, min_nbrs=2) algo.fit(ml_ratings) preds = algo.predict_for_user(4, [1016, 2091]) assert len(preds) == 2 assert np.isnan(preds.loc[2091]) assert preds.loc[1016] == approx(3.62221550680778)
def test_uu_predict_too_few(): algo = knn.UserUser(30, min_nbrs=2) algo.fit(ml_ratings) preds = algo.predict_for_user(4, [2091]) assert len(preds) == 1 assert preds.index == [2091] assert all(preds.isna())
def userKNN(self, nnbrs, aggregate, center, min_nbrs=3): algoname = "userKNN" user_user = user_knn.UserUser(nnbrs=nnbrs, min_nbrs=min_nbrs, aggregate=aggregate, center=center) eval = self.eval(algoname, user_user) print("UserKNN was fitted.") return eval
def test_uu_train_adapt(): "Test training an adapted user-user (#129)." from lenskit.algorithms import Recommender uu = knn.UserUser(30) uu = Recommender.adapt(uu) ret = uu.fit(ml_ratings) assert ret is uu assert isinstance(uu.predictor, knn.UserUser)
def test_uu_predict_live_ratings(): algo = knn.UserUser(30, min_nbrs=2) no4 = ml_ratings[ml_ratings.user != 4] algo.fit(no4) ratings = ml_ratings[ml_ratings.user == 4].set_index('item').rating preds = algo.predict_for_user(20381, [1016, 2091], ratings) assert len(preds) == 2 assert np.isnan(preds.loc[2091]) assert preds.loc[1016] == approx(3.62221550680778)
def test_uu_implicit(): "Train and use user-user on an implicit data set." algo = knn.UserUser(20, center=False, aggregate='sum') data = ml_ratings.loc[:, ['user', 'item']] algo.fit(data) assert algo.user_means_ is None mat = matrix.csr_to_scipy(algo.rating_matrix_) norms = sps.linalg.norm(mat, 2, 1) assert norms == approx(1.0) preds = algo.predict_for_user(50, [1, 2, 42]) assert all(preds[preds.notna()] > 0)
def test_uu_implicit(): "Train and use user-user on an implicit data set." algo = knn.UserUser(20, feedback='implicit') data = ml_ratings.loc[:, ['user', 'item']] algo.fit(data) assert algo.user_means_ is None mat = algo.rating_matrix_.to_scipy() norms = spla.norm(mat, 2, 1) assert norms == approx(1.0) preds = algo.predict_for_user(50, [1, 2, 42]) assert all(preds[preds.notna()] > 0)
def test_uu_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) orig = knn.UserUser(30) _log.info('training model') orig.fit(ml_ratings) fn = tmp_path / 'uu.model' _log.info('saving to %s', fn) orig.save(fn) _log.info('reloading model') algo = knn.UserUser(30) algo.load(fn) _log.info('checking model') # it should have computed correct means umeans = ml_ratings.groupby('user').rating.mean() mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean') umeans, mlmeans = umeans.align(mlmeans) assert mlmeans.values == approx(umeans.values) # we should be able to reconstruct rating values uir = ml_ratings.set_index(['user', 'item']).rating r_items = matrix.csr_rowinds(algo.transpose_matrix_) ui_rbdf = pd.DataFrame({ 'user': algo.user_index_[algo.transpose_matrix_.colinds], 'item': algo.item_index_[r_items], 'nrating': algo.transpose_matrix_.values }).set_index(['user', 'item']) ui_rbdf = ui_rbdf.join(mlmeans) ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean'] ui_rbdf['orig_rating'] = uir assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
def test_uu_save_load_implicit(tmp_path): "Save and load user-user on an implicit data set." tmp_path = lktu.norm_path(tmp_path) orig = knn.UserUser(20, center=False, aggregate='sum') data = ml_ratings.loc[:, ['user', 'item']] orig.fit(data) orig.save(tmp_path / 'uu.mod') algo = knn.UserUser(20, center=False, aggregate='sum') algo.load(tmp_path / 'uu.mod') assert algo.user_means_ is None assert all(algo.user_index_ == orig.user_index_) assert all(algo.item_index_ == orig.item_index_) assert all(algo.rating_matrix_.rowptrs == orig.rating_matrix_.rowptrs) assert all(algo.rating_matrix_.colinds == orig.rating_matrix_.colinds) assert all(algo.rating_matrix_.values == orig.rating_matrix_.values) assert all( algo.transpose_matrix_.rowptrs == orig.transpose_matrix_.rowptrs) assert all( algo.transpose_matrix_.colinds == orig.transpose_matrix_.colinds) assert algo.transpose_matrix_.values is None
def get_algo_class(self, algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20)
def get_topn_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum')) elif algo == 'useruser': return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum')) elif algo == 'biasedmf': return basic.TopN(als.BiasedMF(50, iterations=10)) elif algo == 'implicitmf': return basic.TopN(als.ImplicitMF(20, iterations=10)) elif algo == 'funksvd': return basic.TopN(svd.FunkSVD(20, iterations=20)) elif algo == 'bpr': return basic.TopN(BPR(25))
def test_uu_batch_accuracy(): from lenskit.algorithms import basic import lenskit.crossfold as xf import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings uu_algo = knn.UserUser(30) algo = basic.Fallback(uu_algo, basic.Bias()) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = [__batch_eval((algo, train, test)) for (train, test) in folds] preds = pd.concat(preds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.71, abs=0.028) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.055)
def test_uu_save_load(tmp_path): orig = knn.UserUser(30) _log.info('training model') orig.fit(ml_ratings) fn = tmp_path / 'uu.model' _log.info('saving to %s', fn) with fn.open('wb') as f: pickle.dump(orig, f) _log.info('reloading model') with fn.open('rb') as f: algo = pickle.load(f) _log.info('checking model') # it should have computed correct means umeans = ml_ratings.groupby('user').rating.mean() mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean') umeans, mlmeans = umeans.align(mlmeans) assert mlmeans.values == approx(umeans.values) # we should be able to reconstruct rating values uir = ml_ratings.set_index(['user', 'item']).rating r_items = algo.transpose_matrix_.rowinds() ui_rbdf = pd.DataFrame({ 'user': algo.user_index_[algo.transpose_matrix_.colinds], 'item': algo.item_index_[r_items], 'nrating': algo.transpose_matrix_.values }).set_index(['user', 'item']) ui_rbdf = ui_rbdf.join(mlmeans) ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean'] ui_rbdf['orig_rating'] = uir assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values) # running the predictor should work preds = algo.predict_for_user(4, [1016]) assert len(preds) == 1 assert preds.index == [1016] assert preds.values == approx([3.62221550680778])
def test_alogrithms(): data = MovieLens('ml-latest-small') #data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = { 'Bias': basic.Bias(damping=5), 'Popular': basic.Popular(), 'ItemItem': item_knn.ItemItem(20), 'UserUser': user_knn.UserUser(20), 'BiasedMF': als.BiasedMF(50), 'ImplicitMF': als.ImplicitMF(50), 'FunkSVD': funksvd.FunkSVD(50) } all_recs, test_data = eval_algos(ratings, algorithms) ndcg_means = eval_ndcg(all_recs, test_data) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def test_uu_save_load_implicit(tmp_path): "Save and load user-user on an implicit data set." orig = knn.UserUser(20, feedback='implicit') data = ml_ratings.loc[:, ['user', 'item']] orig.fit(data) ser = pickle.dumps(orig) algo = pickle.loads(ser) assert algo.user_means_ is None assert all(algo.user_index_ == orig.user_index_) assert all(algo.item_index_ == orig.item_index_) assert all(algo.rating_matrix_.rowptrs == orig.rating_matrix_.rowptrs) assert all(algo.rating_matrix_.colinds == orig.rating_matrix_.colinds) assert all(algo.rating_matrix_.values == orig.rating_matrix_.values) assert all(algo.transpose_matrix_.rowptrs == orig.transpose_matrix_.rowptrs) assert all(algo.transpose_matrix_.colinds == orig.transpose_matrix_.colinds) assert algo.transpose_matrix_.values is None
def run(self, strategy_context: RecommenderAlgorithmStrategyContext ) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) partition = list( partition_users(data=data_set, partitions=1, method=crossfold.SampleFrac(0.2)))[0] test, train = partition.test, partition.train number_of_recommendations = strategy_context.number_of_recommendations algorithm = Recommender.adapt( user_knn.UserUser(number_of_recommendations)) trained_algorithm = algorithm.fit(train) recommendations = lenskit.batch.recommend(trained_algorithm, test['user'].unique(), number_of_recommendations) return recommendations.groupby('user')['item'].apply( lambda x: x).to_numpy().reshape((-1, number_of_recommendations))
def all_movie_recommends(ratings, optionList): all_recs = [] test_data = [] #Declare algorithm models basic_bias_model = basic.Bias() knn_model = iknn.ItemItem(20) knn_u_model = uknn.UserUser(20) als_b_model = als.BiasedMF(50) als_i_model = als.ImplicitMF(50) funk_model = funksvd.FunkSVD(50) for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)): test_data.append(test) for option in optionList: if option == 1: all_recs.append( batch_eval('BasicBias', basic_bias_model, train, test)) if option == 2: all_recs.append(batch_eval('ItemItem', knn_model, train, test)) if option == 3: all_recs.append( batch_eval('UserUser', knn_u_model, train, test)) if option == 4: all_recs.append( batch_eval('ALS-Biased', als_b_model, train, test)) if option == 5: all_recs.append( batch_eval('ALS-Implicit', als_i_model, train, test)) if option == 6: all_recs.append(batch_eval('FunkSVD', funk_model, train, test)) all_recs = pd.concat(all_recs, ignore_index=True) test_data = pd.concat(test_data, ignore_index=True) return all_recs, test_data
def test_uu_implicit_batch_accuracy(): from lenskit import batch, topn import lenskit.crossfold as xf import lenskit.metrics.topn as lm ratings = lktu.ml100k.load_ratings() algo = knn.UserUser(30, center=False, aggregate='sum') folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) rec_lists = [] for train, test in folds: _log.info('running training') algo.fit(train.loc[:, ['user', 'item']]) cands = topn.UnratedCandidates(train) _log.info('testing %d users', test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, cands, test) rec_lists.append(recs) recs = pd.concat(rec_lists) user_dcg = recs.groupby('user').rating.apply(lm.dcg) dcg = user_dcg.mean() assert dcg >= 0.1
def get_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20) elif algo == 'tf_bpr': return lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42)
def test_uu_imp_clone(): algo = knn.UserUser(30, feedback='implicit') a2 = clone(algo) assert a2.get_params() == algo.get_params() assert a2.__dict__ == algo.__dict__
def test_uu_imp_config(): algo = knn.UserUser(30, feedback='implicit') assert algo.nnbrs == 30 assert not algo.center assert algo.aggregate == 'sum' assert not algo.use_ratings
def test_uu_exp_config(): algo = knn.UserUser(30, feedback='explicit') assert algo.nnbrs == 30 assert algo.center assert algo.aggregate == 'weighted-average' assert algo.use_ratings
def test_uu_dft_config(): algo = knn.UserUser(30) assert algo.nnbrs == 30 assert algo.center assert algo.aggregate == 'weighted-average' assert algo.use_ratings