plt.figure(figsize=(20, 12)) plt.rcParams.update({'font.size': 12}) plt.plot(Train_CV.cv_results['param_n_factors'], Train_CV.cv_results['mean_test_rmse'], '.k') plt.xlabel('Number of Factors') plt.ylabel('RMSE') plt.grid() plt.title('3-Fold CV - Number of Factors') plt.savefig('3_fold_CV_Reg_Param_NMF_n_factors.png') # %% Best Hyper-parameters Training alg = NMF() alg.biased = Grid_Search_Result.best_params['rmse']['biased'] alg.n_epochs = Grid_Search_Result.best_params['rmse']['n_epochs'] alg.n_factors = Grid_Search_Result.best_params['rmse']['n_factors'] alg.reg_pu = Grid_Search_Result.best_params['rmse']['reg_pu'] alg.reg_qi = Grid_Search_Result.best_params['rmse']['reg_qi'] start = time.time() alg.fit(data_train.build_full_trainset()) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Loading Test Data file_path = "Data/sample_submission.csv"
file.write("+ Best Param: \n \n") file.write(str(Train_CV.best_params) + "\n \n") file.write("************************************************************ \n") file.write("+ CV Summary: \n \n") file.write(str(Train_CV.cv_results) + "\n \n") file.write("************************************************************ \n") file.close() # ***************************************************************************** # %% Best Hyper-parameters Training: # Training over whole training dataset, using best hyper-parameters alg = NMF() alg.biased = Train_CV.best_params['rmse']['biased'] alg.n_epochs = Train_CV.best_params['rmse']['n_epochs'] alg.n_factors = Train_CV.best_params['rmse']['n_factors'] alg.reg_pu = Train_CV.best_params['rmse']['reg_pu'] alg.reg_qi = Train_CV.best_params['rmse']['reg_qi'] alg.reg_bu = Train_CV.best_params['rmse']['reg_bu'] alg.reg_bi = Train_CV.best_params['rmse']['reg_bi'] alg.verbose = True alg.random_state = 0 alg.fit(data_train.build_full_trainset()) # ***************************************************************************** # %% Loading Test Data file_path = "Data/sample_submission.csv" data_test = utils.load_data_desired(file_path)
# %% Load Project Dataset in a Surprise format file_path = "Data/data_train_Surprise_format.csv" reader = Reader(line_format='item user rating', sep=',', rating_scale=(1, 5), skip_lines=1) data_train = Dataset.load_from_file(file_path, reader=reader) # %% Best Hyper-parameters Training - NMF alg_NMF = NMF() alg_NMF.biased = False alg_NMF.n_epochs = 50 alg_NMF.n_factors = 35 alg_NMF.reg_pu = 0.1 alg_NMF.reg_qi = 0.1 alg_NMF.verbose = True start = time.time() alg_NMF.fit(data_train.build_full_trainset()) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Best Hyper-parameters Training - SVD
# %% Load Project Dataset in a Surprise format file_path = "Data/data_train_preprocessed_surprise_format.csv" reader = Reader(line_format='item user rating', sep=',', rating_scale=(1, 5), skip_lines=1) data_train = Dataset.load_from_file(file_path, reader=reader) # %% Best Hyper-parameters Training alg = NMF() alg.biased = False alg.n_epochs = 50 alg.n_factors = 35 alg.reg_pu = 0.1 alg.reg_qi = 0.1 alg.verbose = True start = time.time() alg.fit(data_train.build_full_trainset()) end = time.time() print("***********************************************") print("Exe time:") print(end - start) # %% Loading train data