def test_fsvd_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) mod_file = tmp_path / 'funksvd.npz' ratings = lktu.ml_pandas.renamed.ratings original = svd.FunkSVD(20, iterations=20) original.fit(ratings) assert original.global_bias_ == approx(ratings.rating.mean()) assert original.item_features_.shape == (ratings.item.nunique(), 20) assert original.user_features_.shape == (ratings.user.nunique(), 20) original.save(mod_file) assert mod_file.exists() algo = svd.FunkSVD(20, iterations=20) algo.load(mod_file) assert algo.global_bias_ == original.global_bias_ assert np.all(algo.user_bias_ == original.user_bias_) assert np.all(algo.item_bias_ == original.item_bias_) assert np.all(algo.user_features_ == original.user_features_) assert np.all(algo.item_features_ == original.item_features_) assert np.all(algo.item_index_ == original.item_index_) assert np.all(algo.user_index_ == original.user_index_)
def user_movie_recommend(ratings, optionList, userId): all_recs = [] for option in optionList: if option == 1: basic_bias_model = basic.Bias() all_recs.append( user_eval('BasicBias', basic_bias_model, ratings, userId)) if option == 2: knn_model = iknn.ItemItem(20) all_recs.append(user_eval('ItemItem', knn_model, ratings, userId)) if option == 3: knn_u_model = uknn.UserUser(20) all_recs.append(user_eval('UserUser', knn_u_model, ratings, userId)) if option == 4: als_b_model = als.BiasedMF(50) all_recs.append( user_eval('ALS-Biased', als_b_model, ratings, userId)) if option == 5: als_i_model = als.ImplicitMF(50) all_recs.append( user_eval('ALS-Implicit', als_i_model, ratings, userId)) if option == 6: funk_model = funksvd.FunkSVD(50) all_recs.append(user_eval('FunkSVD', funk_model, ratings, userId)) all_recs = pd.concat(all_recs, ignore_index=True) return all_recs
def test_alogrithms(): # data = MovieLens('ml-latest-small') data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = [ basic.Bias(damping=5), basic.Popular(), item_knn.ItemItem(20), user_knn.UserUser(20), als.BiasedMF(50), als.ImplicitMF(50), funksvd.FunkSVD(50) ] pairs = list( partition_users(ratings[['user', 'item', 'rating']], 5, SampleFrac(0.2))) eval_algorithms(dataset=pairs, algorithms=algorithms) runs = display_runs() recs = display_recommendations() truth = pd.concat((p.test for p in pairs), ignore_index=True) ndcg_means = check_recommendations(runs, recs, truth) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def test_fsvd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias import lenskit.crossfold as xf from lenskit import batch import lenskit.metrics.predict as pm ratings = lktu.ml100k.ratings svd_algo = svd.FunkSVD(25, 125, damping=10) algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): _log.info('running training') algo.fit(train) _log.info('testing %d users', test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05)
def test_fsvd_clamp_build(): algo = svd.FunkSVD(20, iterations=20, range=(1, 5)) algo.fit(simple_df) assert algo.global_bias_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20)
def test_fsvd_basic_build(): algo = svd.FunkSVD(20, iterations=20) algo.fit(simple_df) assert algo.bias.mean_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20)
def test_fsvd_train_binary(): ratings = lktu.ml_test.ratings.drop(columns=['rating', 'timestamp']) original = svd.FunkSVD(20, iterations=20, bias=False) original.fit(ratings) assert original.global_bias_ == 0 assert original.item_features_.shape == (ratings.item.nunique(), 20) assert original.user_features_.shape == (ratings.user.nunique(), 20)
def test_fsvd_predict_bad_item_clamp(): algo = svd.FunkSVD(20, iterations=20, range=(1, 5)) algo.fit(simple_df) assert algo.global_bias_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20) preds = algo.predict_for_user(10, [4]) assert len(preds) == 1 assert preds.index[0] == 4 assert np.isnan(preds.loc[4])
def test_fsvd_no_bias(): algo = svd.FunkSVD(20, iterations=20, bias=None) algo.fit(simple_df) assert algo.bias is None assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20) preds = algo.predict_for_user(10, [3]) assert len(preds) == 1 assert preds.index[0] == 3 assert all(preds.notna())
def test_fsvd_predict_bad_user(): algo = svd.FunkSVD(20, iterations=20) algo.fit(simple_df) assert algo.bias.mean_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20) preds = algo.predict_for_user(50, [3]) assert len(preds) == 1 assert preds.index[0] == 3 assert np.isnan(preds.loc[3])
def test_fsvd_predict_basic(): algo = svd.FunkSVD(20, iterations=20) algo.fit(simple_df) assert algo.global_bias_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20) preds = algo.predict_for_user(10, [3]) assert len(preds) == 1 assert preds.index[0] == 3 assert preds.loc[3] >= 0 assert preds.loc[3] <= 5
def test_fsvd_predict_clamp(): algo = svd.FunkSVD(20, iterations=20, range=(1, 5)) algo.fit(simple_df) assert algo.bias.mean_ == approx(simple_df.rating.mean()) assert algo.item_features_.shape == (3, 20) assert algo.user_features_.shape == (3, 20) preds = algo.predict_for_user(10, [3]) assert isinstance(preds, pd.Series) assert len(preds) == 1 assert preds.index[0] == 3 assert preds.loc[3] >= 1 assert preds.loc[3] <= 5
def get_algo_class(self, algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20)
def get_topn_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return basic.TopN(iknn.ItemItem(nnbrs=-1, center=False, aggregate='sum')) elif algo == 'useruser': return basic.TopN(uknn.UserUser(nnbrs=5, center=False, aggregate='sum')) elif algo == 'biasedmf': return basic.TopN(als.BiasedMF(50, iterations=10)) elif algo == 'implicitmf': return basic.TopN(als.ImplicitMF(20, iterations=10)) elif algo == 'funksvd': return basic.TopN(svd.FunkSVD(20, iterations=20)) elif algo == 'bpr': return basic.TopN(BPR(25))
def test_alogrithms(): data = MovieLens('ml-latest-small') #data = ML1M('ml-1m') ratings = data.ratings print('Initial ratings table head:') print(ratings.head()) algorithms = { 'Bias': basic.Bias(damping=5), 'Popular': basic.Popular(), 'ItemItem': item_knn.ItemItem(20), 'UserUser': user_knn.UserUser(20), 'BiasedMF': als.BiasedMF(50), 'ImplicitMF': als.ImplicitMF(50), 'FunkSVD': funksvd.FunkSVD(50) } all_recs, test_data = eval_algos(ratings, algorithms) ndcg_means = eval_ndcg(all_recs, test_data) print('NDCG means:') print(ndcg_means) plot_comparison(ndcg_means)
def test_fsvd_save_load(): ratings = lktu.ml_test.ratings original = svd.FunkSVD(20, iterations=20) original.fit(ratings) assert original.global_bias_ == approx(ratings.rating.mean()) assert original.item_features_.shape == (ratings.item.nunique(), 20) assert original.user_features_.shape == (ratings.user.nunique(), 20) mod = pickle.dumps(original) _log.info('serialized to %d bytes', len(mod)) algo = pickle.loads(mod) assert algo.global_bias_ == original.global_bias_ assert np.all(algo.user_bias_ == original.user_bias_) assert np.all(algo.item_bias_ == original.item_bias_) assert np.all(algo.user_features_ == original.user_features_) assert np.all(algo.item_features_ == original.item_features_) assert np.all(algo.item_index_ == original.item_index_) assert np.all(algo.user_index_ == original.user_index_)
def all_movie_recommends(ratings, optionList): all_recs = [] test_data = [] #Declare algorithm models basic_bias_model = basic.Bias() knn_model = iknn.ItemItem(20) knn_u_model = uknn.UserUser(20) als_b_model = als.BiasedMF(50) als_i_model = als.ImplicitMF(50) funk_model = funksvd.FunkSVD(50) for train, test in xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)): test_data.append(test) for option in optionList: if option == 1: all_recs.append( batch_eval('BasicBias', basic_bias_model, train, test)) if option == 2: all_recs.append(batch_eval('ItemItem', knn_model, train, test)) if option == 3: all_recs.append( batch_eval('UserUser', knn_u_model, train, test)) if option == 4: all_recs.append( batch_eval('ALS-Biased', als_b_model, train, test)) if option == 5: all_recs.append( batch_eval('ALS-Implicit', als_i_model, train, test)) if option == 6: all_recs.append(batch_eval('FunkSVD', funk_model, train, test)) all_recs = pd.concat(all_recs, ignore_index=True) test_data = pd.concat(test_data, ignore_index=True) return all_recs, test_data
def get_algo_class(algo): if algo == 'popular': return basic.Popular() elif algo == 'bias': return basic.Bias(users=False) elif algo == 'topn': return basic.TopN(basic.Bias()) elif algo == 'itemitem': return iknn.ItemItem(nnbrs=-1) elif algo == 'useruser': return uknn.UserUser(nnbrs=5) elif algo == 'biasedmf': return als.BiasedMF(50, iterations=10) elif algo == 'implicitmf': return als.ImplicitMF(20, iterations=10) elif algo == 'funksvd': return svd.FunkSVD(20, iterations=20) elif algo == 'tf_bpr': return lktf.BPR(20, batch_size=1024, epochs=5, neg_count=2, rng_spec=42)
def test_fsvd_known_preds(): algo = svd.FunkSVD(15, iterations=125, lrate=0.001) _log.info('training %s on ml data', algo) algo.fit(lktu.ml_test.ratings) dir = Path(__file__).parent pred_file = dir / 'funksvd-preds.csv' _log.info('reading known predictions from %s', pred_file) known_preds = pd.read_csv(str(pred_file)) pairs = known_preds.loc[:, ['user', 'item']] preds = algo.predict(pairs) known_preds.rename(columns={'prediction': 'expected'}, inplace=True) merged = known_preds.assign(prediction=preds) merged['error'] = merged.expected - merged.prediction assert not any(merged.prediction.isna() & merged.expected.notna()) err = merged.error err = err[err.notna()] try: assert all(err.abs() < 0.01) except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] _log.error('erroneous predictions:\n%s', bad) raise e
from lenskit.algorithms import funksvd #from lenskit.algorithms import als from lenskit.algorithms import item_knn as knn import random import time from itertools import product import statistics startTime = time.time() # read in the movielens 100k ratings with pandas ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) #algo = knn.ItemItem(20) algo = funksvd.FunkSVD(50) #algo = als.BiasedMF(50) # use als, paralleling computation ''' # train and test # split the data into a test set and a training set, k-fold xf num_folds = 5 splits = xf.partition_users(ratings, num_folds, xf.SampleFrac(0.2)) for (trainSet, testSet) in splits: train = trainSet #? test = testSet #? ''' startTime = time.time() # train model model = algo.train(ratings) spentTime = time.time() - startTime
from lenskit import batch, topn from lenskit import crossfold as xf from lenskit.algorithms import item_knn as knn from lenskit.algorithms import funksvd as funk from lenskit.algorithms import als from flask import make_response, abort, jsonify # read in the movielens 100k ratings with pandas # https://grouplens.org/datasets/movielens/100k/ ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp']) algoKNN = knn.ItemItem(30) algoFunk = funk.FunkSVD(2) algoAls = als.BiasedMF(20) # split the data in a test and a training set # for each user leave one row out for test purpose data = ratings nb_partitions = 1 splits = xf.partition_users(data, nb_partitions, xf.SampleN(1)) for (trainSet, testSet) in splits: train = trainSet test = testSet # train model modelKNN = algoKNN.fit(train) modelFunk = algoFunk.fit(train)
xf_dataset_batch, xf_dataset_test = tee(xf.partition_users(ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2))) truth = pd.concat([test for _, test in xf_dataset_test], ignore_index = True) runner = batch.MultiEval('result', False, nprocs = 4) runner.add_algorithms( [item_knn.ItemItem(10), item_knn.ItemItem(20), item_knn.ItemItem(30)], False, ['nnbrs'] ) runner.add_algorithms( [user_knn.UserUser(10), user_knn.UserUser(20), user_knn.UserUser(30)], True, ['nnbrs'] ) runner.add_algorithms( [funksvd.FunkSVD(40, damping = 0), funksvd.FunkSVD(50, damping = 5), funksvd.FunkSVD(60, damping = 10)], False, ['features', 'damping'] ) runner.add_datasets(xf_dataset_batch) runner.run() runs = pd.read_parquet('result/runs.parquet', columns = ('AlgoClass','RunId','damping','features','nnbrs')) runs.rename({'AlgoClass': 'Algorithm'}, axis = 'columns', inplace = True) def extract_config(x): from math import isnan damping, features, nnbrs = x result = ''
""" Basic algorithm definitions as starting points. """ from lenskit.algorithms import item_knn, user_knn, als, funksvd from lenskit.algorithms import basic Bias = basic.Bias(damping=5) Pop = basic.Popular() II = item_knn.ItemItem(20, save_nbrs=2500) UU = user_knn.UserUser(30) ALS = als.BiasedMF(50) IALS = als.ImplicitMF(50) MFSGD = funksvd.FunkSVD(50)
random = basic.Random() popular = basic.Popular() item_to_item_100 = item_knn.ItemItem(100) item_to_item_200 = item_knn.ItemItem(200) item_to_item_500 = item_knn.ItemItem(500) user_to_user_100 = user_knn.UserUser(100) user_to_user_200 = user_knn.UserUser(200) user_to_user_500 = user_knn.UserUser(500) biased_mf_50 = als.BiasedMF(50) biased_mf_100 = als.BiasedMF(100) biased_mf_200 = als.BiasedMF(200) implicit_mf_50 = als.ImplicitMF(50) implicit_mf_100 = als.ImplicitMF(100) implicit_mf_200 = als.ImplicitMF(200) funk_svd_mf_50 = funksvd.FunkSVD(50) funk_svd_mf_100 = funksvd.FunkSVD(100) funk_svd_mf_200 = funksvd.FunkSVD(200) bayesian = BPR() hierarchical_poisson_fact_50 = HPF(50) hierarchical_poisson_fact_100 = HPF(100) hierarchical_poisson_fact_200 = HPF(200) train, test = train_test_split(ratings[['user', 'item', 'rating']], test_size=0.2) eval = batch.MultiEval('../recs/cf', recommend=NUM_OF_RECS) eval.add_datasets((train, test), name='ml-1m') eval.add_algorithms(random, name='random') eval.add_algorithms(popular, name='popular') eval.add_algorithms(item_to_item_100, name='item_to_item_100')