def KNN_pred(self, is_total=0, combin_func='avg'): names = locals() r = Reader(rating_scale=(1, 5)) df = self.testdatas total_test = np.array(df[['uid', 'iid', 'total']]) total_p = self.algos[0].test(total_test) for i in range(1, self.no_of_criteria + 1): # names['c' + str(i) + '_test'] = np.array(df[['uid','iid', 'c' + str(i)]]) names['c' + str(i) + '_test'] = Dataset.load_from_df( df[['uid', 'iid', 'c' + str(i)]], reader=r) names['c' + str(i) + '_test'] = names.get('c' + str(i) + '_test').build_full_trainset() names['c' + str(i) + '_test'] = names.get('c' + str(i) + '_test').build_ names['c' + str(i) + '_p'] = self.algos[i].test( names.get('c' + str(i) + '_test')) multi_p = [] if is_total == 0: if combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = s / self.no_of_criteria p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) elif combin_func == 'total_reg': k = self.k b = self.b for i in range(len(total_p)): s = 0 for j in range(self.no_of_criteria): s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est s = s + b p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) else: if combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = (s + total_p[i].est) / (self.no_of_criteria + 1) p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) else: print('总分作为准则不适合用于回归聚合函数') s_mae = round(accuracy.mae(total_p), 4) m_mae = round(accuracy.mae(multi_p), 4) return s_mae, m_mae, total_p, multi_p
def transtoPrediction(multi, single): P = [] for m in multi: for s in single: if m[0] == s[0] and m[1] == s[1]: p = predictions.Prediction(m[0], m[1], s[2], m[2], s[4]) P.append(p) return P
def avg(P, total_p): multi_p = [] for i in range(len(P)): pred = (P[i][3] + P[i][4] + P[i][5] + P[i][6] + P[i][7] + P[i][2]) / 6 #整体评分作准则 # pred = (P[i][3] + P[i][4] + P[i][5] + P[i][6] + P[i][7] )/5 #整体评分不做准则 p = predictions.Prediction(P[i][0], P[i][1], total_p[i][2], pred, total_p[i][4]) multi_p.append(p) return multi_p
def totaldata_regModel(P, k, b, total_p): multi_p = [] for i in range(len(P)): #整体评分不做准则 pred = P[i][3] * k[0] + P[i][4] * k[1] + P[i][5] * k[2] + P[i][6] * k[ 3] + P[i][7] * k[4] + b p = predictions.Prediction(P[i][0], P[i][1], total_p[i][2], pred, total_p[i][4]) multi_p.append(p) return multi_p
def Predict(self, min_or_more='min', pred_options={ 'is_total': False, 'combin_func': 'avg' }): names = locals() r = Reader(rating_scale=(1, 5)) if min_or_more == 'min': df = self.min_test.sort_values(by='uid') else: df = self.more_test.sort_values(by='uid') # df = self.testDatas total_test = np.array(df[['uid', 'iid', 'total']]) total_p = self.algos[0].test(total_test) for i in range(1, self.no_of_criteria + 1): # names['c' + str(i) + '_test'] = np.array(df[['uid','iid', 'c' + str(i)]]) names['c' + str(i) + '_test'] = np.array( df[['uid', 'iid', 'c' + str(i)]]) names['c' + str(i) + '_p'] = self.algos[i].test( names.get('c' + str(i) + '_test')) multi_p = [] if pred_options['is_total']: if pred_options['combin_func'] == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = (s + total_p[i].est) / (self.no_of_criteria + 1) p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) else: print('总分作为准则不适合用于回归聚合函数') else: if pred_options['combin_func'] == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = s / self.no_of_criteria p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) elif pred_options['combin_func'] == 'total_reg': k = self.k b = self.b for i in range(len(total_p)): s = 0 for j in range(self.no_of_criteria): s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est s = s + b p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) elif pred_options['combin_func'] == 'info_entropy': self.Info_Entropy() H = np.array(self.H) for i in range(len(total_p)): s = 0 if len(np.argwhere(H[:, 0] == total_p[i].uid)): h = H[np.argwhere(H[:, 0] == total_p[i].uid)][0][0] for j in range(1, self.no_of_criteria + 1): s = s + h[j] * names.get('c' + str( j) + '_p')[i].est / h[self.no_of_criteria + 1] p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) else: s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est s = s / self.no_of_criteria p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) s_mae = round(accuracy.mae(total_p), 4) m_mae = round(accuracy.mae(multi_p), 4) return s_mae, m_mae
def SVD(self, seed=3, n_factor=20, n_epoch=20): ''' seed:int-3划分训练集测试集的随机种子 k:int-40,最大邻居数量 options:dict-{'name': 'pearson', 'user_based': False},算法的选项,默认为Pearson相似度,基于项目的方法 ''' df = self.datas names = locals() r = Reader(rating_scale=(1, 5)) # 读取、划分数据;训练预测数据 total = Dataset.load_from_df(df[['uid', 'iid', 'total']], reader=r) total_train, total_test = train_test_split(total, random_state=seed) total_algo = SVD(n_factors=n_factor, n_epochs=n_epoch, verbose=True) total_algo.fit(total_train) total_p = total_algo.test(total_test) for i in range(1, self.no_of_criteria + 1): names['c' + str(i)] = Dataset.load_from_df( df[['uid', 'iid', 'c' + str(i)]], reader=r) names['c' + str(i) + '_train'], names['c' + str(i) + '_test'] = train_test_split( names.get('c' + str(i)), random_state=seed) names['algo_c' + str(i)] = SVD(n_factors=n_factor, n_epochs=n_epoch, verbose=True) names.get('algo_c' + str(i)).fit(names.get('c' + str(i) + '_train')) names['c' + str(i) + '_p'] = names.get('algo_c' + str(i)).test( names.get('c' + str(i) + '_test')) # multi_p = [] if self.is_total == 0: if self.combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = s / self.no_of_criteria p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) elif self.combin_func == 'total_reg': for i in range(1, self.no_of_criteria + 1): names['x' + str(i)] = np.array( names.get('c' + str(i) + '_train').build_testset())[:, 2] if i == 1: x = names.get('x' + str(i)) else: x = np.vstack((x, names.get('x' + str(i)))) x = x.T y = np.array(total_train.build_testset())[:, 2] reg = LinearRegression() reg.fit(x, y) k = reg.coef_ b = reg.intercept_ for i in range(len(total_p)): s = 0 for j in range(self.no_of_criteria): s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est s = s + b p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p) else: if self.combin_func == 'avg': for i in range(len(total_p)): s = 0 for j in range(1, self.no_of_criteria + 1): s = s + names.get('c' + str(j) + '_p')[i].est avg = (s + total_p[i].est) / (self.no_of_criteria + 1) p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, avg, total_p[i].details) multi_p.append(p) else: print('总分作为准则不适合用于回归聚合函数') s_mae = round(accuracy.mae(total_p), 4) m_mae = round(accuracy.mae(multi_p), 4) return s_mae, m_mae
def transtoPredictions(multi_P, total_P, P): for m in multi_P: for t in total_P: if m[0] == t[0] and m[1] == t[1]: p = predictions.Prediction(m[0], m[1], t[2], m[2], t[4]) P.append(p)
'_test'] = train_test_split( names.get('c' + str(i)), random_state=3) names['algo_c' + str(i)] = SVD(n_factors=20, n_epochs=20, verbose=True) names.get('algo_c' + str(i)).fit(names.get('c' + str(i) + '_train')) names['c' + str(i) + '_p'] = names.get('algo_c' + str(i)).test( names.get('c' + str(i) + '_test')) # multi_p = [] for i in range(1, 6): names['x' + str(i)] = np.array( names.get('c' + str(i) + '_train').build_testset())[:, 2] if i == 1: x = names.get('x' + str(i)) else: x = np.vstack((x, names.get('x' + str(i)))) x = x.T y = np.array(total_train.build_testset())[:, 2] reg = LinearRegression() reg.fit(x, y) k = reg.coef_ b = reg.intercept_ for i in range(len(total_p)): s = 0 for j in range(5): s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est s = s + b p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui, s, total_p[i].details) multi_p.append(p)