def matrix_factorization_param(data_cv): # Iterate over all algorithms benchmark = [] for algorithm in [ SVD(), SVDpp(), NMF(), SlopeOne(), NormalPredictor(), CoClustering() ]: # Perform cross validation results = model_selection.cross_validate(algorithm, data_cv, measures=['RMSE', 'MAE'], cv=5, verbose=False) # Get results & append algorithm name tmp = pd.DataFrame.from_dict(results).mean(axis=0) tmp = tmp.append( pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm'])) benchmark.append(tmp) rmse = pd.DataFrame(benchmark).set_index('Algorithm').sort_values( 'test_mae') #print(rmse) # Parameter grid param_grid = { 'n_factors': [100, 150, 200], 'n_epochs': [20, 40], 'lr_all': [0.001, 0.005, 0.008], 'reg_all': [0.075, 0.1, 0.15] } algorithm_gs = model_selection.GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5, n_jobs=-1) algorithm_gs.fit(data_cv) # best parameters for a model with the lowest rmse best_algo = algorithm_gs.best_estimator['rmse'] return best_algo
rating_scale=(1, 5), skip_lines=1) data_train = Dataset.load_from_file(file_path, reader=reader) # %% Hyper parameter tuning and CV analysis # Algorithm: SVD Hyper_Params = { 'n_epochs': [10], 'n_factors': [50, 100, 150, 200], 'biased': [False], 'lr_all': [0.005], 'reg_all': [0.01, 0.1, 0.3, 1.0] } Train_CV = Grid_Search_Result = model_selection.GridSearchCV( SVD, Hyper_Params, measures=['rmse', 'mae'], cv=3, n_jobs=3) Train_CV.fit(data_train) # %% Figures plt.figure(figsize=(20, 12)) plt.rcParams.update({'font.size': 12}) plt.plot(Train_CV.cv_results['param_reg_all'], Train_CV.cv_results['mean_test_rmse'], '.k') plt.xscale('log') plt.xlabel('Regularization Parameter ($\lambda$)') plt.ylabel('RMSE') plt.grid() plt.title('3-Fold CV - Regularization Parameter ($\lambda$)') plt.savefig('3_fold_CV_Reg_Param.png')
reader = Reader(line_format='item user rating', sep=',', rating_scale=(1, 5), skip_lines=1) data_train = Dataset.load_from_file(file_path, reader=reader) # %% Hyper parameter tuning and CV analysis Hyper_Params = {'bsl_options': {'method': ['als'], 'n_epochs': [20], 'reg_u': [1, 3, 10], 'reg_i': [1, 3, 10]}} start = time.time() Train_CV = Grid_Search_Result = model_selection.GridSearchCV(BaselineOnly, Hyper_Params, measures=['rmse'], cv=3, n_jobs=3, return_train_measures=True, joblib_verbose=3) Train_CV.fit(data_train) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Figures reg_i = [] reg_u = [] for i in Train_CV.cv_results['param_bsl_options']:
user_ratings.sort(key=lambda x: x[1], reverse=True) top_n[uid] = user_ratings[:n] return top_n movie_train = Dataset.load_builtin('ml-100k') print(movie_train.raw_ratings) svd_estimator = surprise.SVD svd_grid = { 'n_factors': [50, 100, 150], 'reg_all': [0, 0.4, 0.6], 'biased': ['True', 'False'] } gs = model_selection.GridSearchCV(svd_estimator, svd_grid, measures=['rmse'], cv=3) gs.fit(movie_train) print(gs.best_score['rmse']) print(gs.best_params['rmse']) final_estimator = gs.best_estimator['rmse'] #build final model on entire train data movie_train = movie_train.build_full_trainset() final_estimator.fit(movie_train) movie_test = movie_train.build_anti_testset() predictions = final_estimator.test(movie_test) top_n = get_top_n(predictions, n=3)
movie_train = read_train_data('E:/train_v2.csv') print(movie_train.raw_ratings) knn_estimator = surprise.KNNWithMeans() knn_grid = { 'k': [10, 20], 'sim_options': { 'name': ['cosine'], 'min_support': [1, 5], 'user_based': [False] } } knn_grid_estimator = model_selection.GridSearchCV(knn_estimator, knn_grid, measures=['rmse'], cv=3) #do grid search using cv strategy knn_grid_estimator.fit(movie_train) print(knn_grid_estimator.best_score['rmse']) print(knn_grid_estimator.best_params['rmse']) results_df = pd.DataFrame.from_dict(knn_grid_estimator.cv_results) final_model = knn_grid_estimator.best_estimator['rmse'] #build final model using best parameters from grid search trainSet = movie_train.build_full_trainset() final_model.fit(trainSet) rows = csv.reader(open('F:/test_v2.csv')) rows = list(rows) rows.pop(0)
return data movie_train = read_train_data( 'C:\\Users\\Algorithmica\\Downloads\\train_v2.csv') print(movie_train.raw_ratings) knn_grid = { 'k': [10, 20], 'sim_options': { 'name': ['cosine'], 'min_support': [1, 5], 'user_based': [False] } } gs = model_selection.GridSearchCV(KNNBasic, knn_grid, measures=['rmse'], cv=3) gs.fit(movie_train) print(gs.best_score['rmse']) print(gs.best_params['rmse']) results_df = pd.DataFrame.from_dict(gs.cv_results) algo = gs.best_estimator['rmse'] trainSet = movie_train.build_full_trainset() algo.fit(trainSet) rows = csv.reader(open('F:/test_v2.csv')) rows = list(rows) rows.pop(0) f = open('F:/submission.csv', 'w', newline='') writer = csv.writer(f) writer.writerow(['ID', 'rating'])