コード例 #1
0
ファイル: recsys.py プロジェクト: WangLiuying/Python-scripts
samplingDF = pd.read_pickle('negativeSampling.pkl')
merge = pd.concat([remark, samplingDF])
merge.reset_index(inplace=True)
del merge['index'], remark, samplingDF
merge = merge[['User', 'Item', 'rate']]

#%%

reader = surprise.Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(merge, reader)
del merge

train, test = train_test_split(data, random_state=123, test_size=0.1)
#%%训练模型(未调参)
algo = SVDpp()  #声明模型
algo.biased = False

algo.fit(train)

predictions = algo.test(test)
accuracy.mae(predictions)
a = algo.predict('15cbc496d67626ad90514b4243e7c045', '2204590')
print(a)
dump.dump(file_name='SVDmodel.pkl', algo=algo)
#%%
algo = dump.load('best_model.pkl')[1]
#%%瞎猜模型(供对比)
algocompare = surprise.NormalPredictor()
algocompare.fit(train)
preCompare = algocompare.test(test)
accuracy.mae(preCompare)
plt.savefig('3_fold_CV_Reg_Param.png')

plt.figure(figsize=(20, 12))
plt.rcParams.update({'font.size': 12})
plt.plot(Train_CV.cv_results['param_n_factors'],
         Train_CV.cv_results['mean_test_rmse'], '.k')
plt.xlabel('Number of Factores')
plt.ylabel('RMSE')
plt.grid()
plt.title('3-Fold CV - Number of Factors')
plt.savefig('3_fold_CV_Factors.png')

# %% Best Hyper-parameters Training
alg = SVDpp()

alg.biased = Grid_Search_Result.best_params['rmse']['biased']
alg.n_epochs = Grid_Search_Result.best_params['rmse']['n_epochs']
alg.n_factors = Grid_Search_Result.best_params['rmse']['n_factors']
alg.reg_pu = Grid_Search_Result.best_params['rmse']['reg_all']
alg.reg_qi = Grid_Search_Result.best_params['rmse']['reg_all']
alg.reg_yj = Grid_Search_Result.best_params['rmse']['reg_all']
alg.reg_bi = Grid_Search_Result.best_params['rmse']['reg_all']
alg.reg_bu = Grid_Search_Result.best_params['rmse']['reg_all']
alg.lr_pu = Grid_Search_Result.best_params['rmse']['lr_all']
alg.lr_qi = Grid_Search_Result.best_params['rmse']['lr_all']

start = time.time()

alg.fit(data_train.build_full_trainset())

end = time.time()