def main(args): user_item_based = 'item_based' if args.item_based else 'user_based' filename = '_'.join([ args.exp_name, args.algorithm, args.sim_name, user_item_based, str(args.num_rows) ]) + '.pkl' output_file = Path(filename) if output_file.exists(): print(f'ERROR! Output file {output_file} already exists. Exiting!') sys.exit(1) print(f'Saving scores in {output_file}\n') reader = surprise.Reader(rating_scale=(1, 5)) df = pq.read_table('all_ratings_with_indices.parquet', columns=['user_idx', 'movie_idx', 'rating']).to_pandas() df.user_idx = df.user_idx.astype(np.uint32) df.movie_idx = df.movie_idx.astype(np.uint16) df.rating = df.rating.astype(np.uint8) print(df.dtypes) data = surprise.Dataset.load_from_df(df[:args.num_rows], reader=reader) del df sim_options = { 'name': args.sim_name, 'user_based': False if args.item_based else True } if args.algorithm == 'knn': algo = surprise.KNNBasic(sim_options=sim_options) elif args.algorithm == 'baseline': algo = surprise.BaselineOnly() elif args.algorithm == 'normal': algo = surprise.NormalPredictor() elif args.algorithm == 'knn_zscore': algo = surprise.KNNWithZScore(sim_options=sim_options) elif args.algorithm == 'svd': algo = surprise.SVD() elif args.algorithm == 'nmf': algo = surprise.NMF() else: print(f'Algorithm {args.algorithm} is not a valid choice.') scores = surprise.model_selection.cross_validate(algo, data, cv=args.cv_folds, verbose=True, n_jobs=-1) pickle.dump(scores, open(output_file, 'wb'))
def algo_tester(data_object): ''' Produces a dataframe displaying all the different RMSE's, test & train times of the different surprise algorithms ---Parameters--- data_object(variable) created from the read_data_surprise function ---Returns--- returns a dataframe where you can compare the performance of different algorithms ''' benchmark = [] algos = [ sp.SVDpp(), sp.SVD(), sp.SlopeOne(), sp.NMF(), sp.NormalPredictor(), sp.KNNBaseline(), sp.KNNBasic(), sp.KNNWithMeans(), sp.KNNWithZScore(), sp.BaselineOnly(), sp.CoClustering() ] # Iterate over all algorithms for algorithm in algos: # Perform cross validation results = cross_validate(algorithm, data_object, measures=['RMSE'], cv=3, verbose=False) # Get results & append algorithm name tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append( pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm'])) benchmark.append(tmp) benchmark = pd.DataFrame(benchmark).set_index('Algorithm').sort_values( 'test_rmse') return benchmark
epochs=2, validation_split=0.1, shuffle=True) y_pred = model.predict([df_hybrid_test['User'], df_hybrid_test['Movie'], test_tfidf]) y_true = df_hybrid_test['Rating'].values rmse = np.sqrt(mean_squared_error(y_pred=y_pred, y_true=y_true)) print('\n\nTesting Result With Keras Hybrid Deep Learning: {:.4f} RMSE'.format(rmse)) # Load dataset into surprise specific data-structure data = sp.Dataset.load_from_df(df_filterd[['User', 'Movie', 'Rating']].sample(20000), sp.Reader()) benchmark = [] # Iterate over all algorithms for algorithm in [sp.SVD(), sp.SVDpp(), sp.SlopeOne(), sp.NMF(), sp.NormalPredictor(), sp.KNNBaseline(), sp.KNNBasic(), sp.KNNWithMeans(), sp.KNNWithZScore(), sp.BaselineOnly(), sp.CoClustering()]: # Perform cross validation results = cross_validate(algorithm, data, measures=['RMSE', 'MAE'], cv=3, verbose=False) # Get results & append algorithm name tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm'])) # Store data benchmark.append(tmp) # Store results surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse', ascending=False) # Get data data = surprise_results[['test_rmse', 'test_mae']]
rating_df = pd.read_csv( '/Users/mac/Desktop/推荐系统/RecommendedSystemCallPackage/data_set/MovieLens/ratings.csv', sep=';') rating_df = rating_df[['UserID', 'MovieID', 'Rating']] reader = surprise.Reader(rating_scale=(1, 5)) rating_data = surprise.Dataset.load_from_df(rating_df, reader=reader) svd = surprise.SVD() svd_temp = surprise.model_selection.cross_validate(svd, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) print('SVD--------------') print(svd_temp) normalPredictor = surprise.NormalPredictor() normalPredictor_temp = surprise.model_selection.cross_validate( normalPredictor, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) print('normalPredictor--------------') print(normalPredictor_temp) baselineOnly = surprise.BaselineOnly() baselineOnly_temp = surprise.model_selection.cross_validate( baselineOnly, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) print('baselineOnly-----------------') print(baselineOnly_temp) knnBasic = surprise.KNNBasic() knnBasic_temp = surprise.model_selection.cross_validate( knnBasic, rating_data, measures=['RMSE', 'MAE'], cv=5, verbose=True) print('knnBasic-----------------') print(knnBasic_temp) knnWithMeans = surprise.KNNWithMeans()
train, test = train_test_split(data, random_state=123, test_size=0.1) #%%训练模型(未调参) algo = SVDpp() #声明模型 algo.biased = False algo.fit(train) predictions = algo.test(test) accuracy.mae(predictions) a = algo.predict('15cbc496d67626ad90514b4243e7c045', '2204590') print(a) dump.dump(file_name='SVDmodel.pkl', algo=algo) #%% algo = dump.load('best_model.pkl')[1] #%%瞎猜模型(供对比) algocompare = surprise.NormalPredictor() algocompare.fit(train) preCompare = algocompare.test(test) accuracy.mae(preCompare) #%%计算precision and recall ## code from scikit-surprise documentation FAQs from collections import defaultdict def precision_recall_at_k(predictions, k=10, threshold=3.5): '''Return precision and recall at k metrics for each user.''' # First map the predictions to each user. user_est_true = defaultdict(list) for uid, _, true_r, est, _ in predictions:
n_items_train = len(np.unique(train['ISBN'])) n_users_train = len(np.unique(train['User-ID'])) reader = surprise.Reader(rating_scale=(1, 10)) data = surprise.Dataset.load_from_df(train[['User-ID', 'ISBN', 'Book-Rating']], reader) test = pd.read_csv('test.csv') n_items_test = len(np.unique(test['ISBN'])) n_users_test = len(np.unique(train['User-ID'])) t = [tuple(x) for x in test[['User-ID', 'ISBN', 'Book-Rating']].values] ''' Training ''' #algo = surprise.BaselineOnly() algo = surprise.NormalPredictor() sim_options = {'name': 'pearson', 'user_based': False} algo_knn = surprise.KNNBasic(k=5, sim_options=sim_options) #algo_svd = surprise.SVD(n_factors = 5, lr_all= 0.01, reg_all =1.3)#n_factors = , lr_all =, reg_all = algo_svd = surprise.SVD(n_factors=10, lr_all=0.001, reg_all=1) ''' Baseline ''' print "\n Baseline\n" # retrain on the whole train set trainset = data.build_full_trainset() algo.train(trainset) # Compute biased accuracy on train set predictions = algo.test(trainset.build_testset()) precision, recall = precision_recall_at_k(predictions, k=10, threshold=7)
data = sp.Dataset.load_from_file(file_name, reader=reader) print("Done.") # defining the number of folds = 5 print("Performing splits...") kf = sp.model_selection.KFold(n_splits=5, random_state=0) print("Done.") ### ### PART 1.1 ### ''' application of all algorithms for recommendation made available by “Surprise” libraries, according to their default configuration. ''' algorithms = [sp.NormalPredictor(), sp.BaselineOnly(), sp.KNNBasic(),\ sp.KNNWithMeans(), sp.KNNWithZScore(), sp.KNNBaseline(),\ sp.SVD(), sp.SVDpp(), sp.NMF(), sp.SlopeOne(), sp.CoClustering()] for elem in algorithms: start_time = time.time() algo = elem sp.model_selection.cross_validate(algo, data, measures=['RMSE'], \ cv=kf, n_jobs = 2, verbose=True) print("--- %s seconds ---" % (time.time() - start_time)) print() ### ### PART 1.2 ### ''' Improvement of the quality of both KNNBaseline and SVD methods,
'NormalPredictor', 'BaselineOnly', 'KNNBasic', 'KNNWithMeans', 'KNNWithZScore', 'KNNBaseline', 'SVD', 'SVDpp', 'NMF', 'SlopeOne', 'CoClustering' ]) args = parser.parse_args() train_path = path + '/Data/train_format.txt' train_reader = Reader(line_format='user item rating timestamp', sep=',', rating_scale=(0, 5)) trainset = Dataset.load_from_file(train_path, reader=train_reader) trainset = trainset.build_full_trainset() if args.model == 'NormalPredictor': model = surprise.NormalPredictor() elif args.model == 'BaselineOnly': model = surprise.BaselineOnly() elif args.model == 'KNNBasic': model = surprise.KNNBasic() elif args.model == 'KNNWithMeans': model = surprise.KNNWithMeans() elif args.model == 'KNNWithZScore': model = surprise.KNNWithZScore() elif args.model == 'KNNBaseline': model = surprise.KNNBaseline() elif args.model == 'SVD': model = surprise.SVD() elif args.model == 'SVDpp': model = surprise.SVDpp(verbose=True) elif args.model == 'NMF':