samplingDF = pd.read_pickle('negativeSampling.pkl') merge = pd.concat([remark, samplingDF]) merge.reset_index(inplace=True) del merge['index'], remark, samplingDF merge = merge[['User', 'Item', 'rate']] #%% reader = surprise.Reader(rating_scale=(0, 1)) data = Dataset.load_from_df(merge, reader) del merge train, test = train_test_split(data, random_state=123, test_size=0.1) #%%训练模型(未调参) algo = SVDpp() #声明模型 algo.biased = False algo.fit(train) predictions = algo.test(test) accuracy.mae(predictions) a = algo.predict('15cbc496d67626ad90514b4243e7c045', '2204590') print(a) dump.dump(file_name='SVDmodel.pkl', algo=algo) #%% algo = dump.load('best_model.pkl')[1] #%%瞎猜模型(供对比) algocompare = surprise.NormalPredictor() algocompare.fit(train) preCompare = algocompare.test(test) accuracy.mae(preCompare)
plt.savefig('3_fold_CV_Reg_Param.png') plt.figure(figsize=(20, 12)) plt.rcParams.update({'font.size': 12}) plt.plot(Train_CV.cv_results['param_n_factors'], Train_CV.cv_results['mean_test_rmse'], '.k') plt.xlabel('Number of Factores') plt.ylabel('RMSE') plt.grid() plt.title('3-Fold CV - Number of Factors') plt.savefig('3_fold_CV_Factors.png') # %% Best Hyper-parameters Training alg = SVDpp() alg.biased = Grid_Search_Result.best_params['rmse']['biased'] alg.n_epochs = Grid_Search_Result.best_params['rmse']['n_epochs'] alg.n_factors = Grid_Search_Result.best_params['rmse']['n_factors'] alg.reg_pu = Grid_Search_Result.best_params['rmse']['reg_all'] alg.reg_qi = Grid_Search_Result.best_params['rmse']['reg_all'] alg.reg_yj = Grid_Search_Result.best_params['rmse']['reg_all'] alg.reg_bi = Grid_Search_Result.best_params['rmse']['reg_all'] alg.reg_bu = Grid_Search_Result.best_params['rmse']['reg_all'] alg.lr_pu = Grid_Search_Result.best_params['rmse']['lr_all'] alg.lr_qi = Grid_Search_Result.best_params['rmse']['lr_all'] start = time.time() alg.fit(data_train.build_full_trainset()) end = time.time()