예제 #1
0
    def KNN_pred(self, is_total=0, combin_func='avg'):
        names = locals()
        r = Reader(rating_scale=(1, 5))
        df = self.testdatas
        total_test = np.array(df[['uid', 'iid', 'total']])
        total_p = self.algos[0].test(total_test)
        for i in range(1, self.no_of_criteria + 1):
            # names['c' + str(i) + '_test'] = np.array(df[['uid','iid', 'c' + str(i)]])
            names['c' + str(i) + '_test'] = Dataset.load_from_df(
                df[['uid', 'iid', 'c' + str(i)]], reader=r)
            names['c' + str(i) +
                  '_test'] = names.get('c' + str(i) +
                                       '_test').build_full_trainset()
            names['c' + str(i) + '_test'] = names.get('c' + str(i) +
                                                      '_test').build_
            names['c' + str(i) + '_p'] = self.algos[i].test(
                names.get('c' + str(i) + '_test'))

        multi_p = []
        if is_total == 0:
            if combin_func == 'avg':
                for i in range(len(total_p)):
                    s = 0
                    for j in range(1, self.no_of_criteria + 1):
                        s = s + names.get('c' + str(j) + '_p')[i].est
                    avg = s / self.no_of_criteria
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, avg,
                                               total_p[i].details)
                    multi_p.append(p)
            elif combin_func == 'total_reg':
                k = self.k
                b = self.b
                for i in range(len(total_p)):
                    s = 0
                    for j in range(self.no_of_criteria):
                        s = s + k[j] * names.get('c' + str(j + 1) +
                                                 '_p')[i].est
                    s = s + b
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, s,
                                               total_p[i].details)
                    multi_p.append(p)
        else:
            if combin_func == 'avg':
                for i in range(len(total_p)):
                    s = 0
                    for j in range(1, self.no_of_criteria + 1):
                        s = s + names.get('c' + str(j) + '_p')[i].est
                    avg = (s + total_p[i].est) / (self.no_of_criteria + 1)
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, avg,
                                               total_p[i].details)
                    multi_p.append(p)
            else:
                print('总分作为准则不适合用于回归聚合函数')
        s_mae = round(accuracy.mae(total_p), 4)
        m_mae = round(accuracy.mae(multi_p), 4)
        return s_mae, m_mae, total_p, multi_p
예제 #2
0
def transtoPrediction(multi, single):
    P = []
    for m in multi:
        for s in single:
            if m[0] == s[0] and m[1] == s[1]:
                p = predictions.Prediction(m[0], m[1], s[2], m[2], s[4])
                P.append(p)
    return P
예제 #3
0
def avg(P, total_p):
    multi_p = []
    for i in range(len(P)):
        pred = (P[i][3] + P[i][4] + P[i][5] + P[i][6] + P[i][7] +
                P[i][2]) / 6  #整体评分作准则
        # pred = (P[i][3] + P[i][4] + P[i][5] + P[i][6] + P[i][7] )/5  #整体评分不做准则
        p = predictions.Prediction(P[i][0], P[i][1], total_p[i][2], pred,
                                   total_p[i][4])
        multi_p.append(p)
    return multi_p
예제 #4
0
def totaldata_regModel(P, k, b, total_p):
    multi_p = []
    for i in range(len(P)):
        #整体评分不做准则
        pred = P[i][3] * k[0] + P[i][4] * k[1] + P[i][5] * k[2] + P[i][6] * k[
            3] + P[i][7] * k[4] + b
        p = predictions.Prediction(P[i][0], P[i][1], total_p[i][2], pred,
                                   total_p[i][4])
        multi_p.append(p)
    return multi_p
예제 #5
0
    def Predict(self,
                min_or_more='min',
                pred_options={
                    'is_total': False,
                    'combin_func': 'avg'
                }):

        names = locals()
        r = Reader(rating_scale=(1, 5))
        if min_or_more == 'min':
            df = self.min_test.sort_values(by='uid')
        else:
            df = self.more_test.sort_values(by='uid')
        # df = self.testDatas
        total_test = np.array(df[['uid', 'iid', 'total']])
        total_p = self.algos[0].test(total_test)
        for i in range(1, self.no_of_criteria + 1):
            # names['c' + str(i) + '_test'] = np.array(df[['uid','iid', 'c' + str(i)]])
            names['c' + str(i) + '_test'] = np.array(
                df[['uid', 'iid', 'c' + str(i)]])
            names['c' + str(i) + '_p'] = self.algos[i].test(
                names.get('c' + str(i) + '_test'))

        multi_p = []
        if pred_options['is_total']:
            if pred_options['combin_func'] == 'avg':
                for i in range(len(total_p)):
                    s = 0
                    for j in range(1, self.no_of_criteria + 1):
                        s = s + names.get('c' + str(j) + '_p')[i].est
                    avg = (s + total_p[i].est) / (self.no_of_criteria + 1)
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, avg,
                                               total_p[i].details)
                    multi_p.append(p)
            else:
                print('总分作为准则不适合用于回归聚合函数')
        else:
            if pred_options['combin_func'] == 'avg':
                for i in range(len(total_p)):
                    s = 0
                    for j in range(1, self.no_of_criteria + 1):
                        s = s + names.get('c' + str(j) + '_p')[i].est
                    avg = s / self.no_of_criteria
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, avg,
                                               total_p[i].details)
                    multi_p.append(p)
            elif pred_options['combin_func'] == 'total_reg':
                k = self.k
                b = self.b
                for i in range(len(total_p)):
                    s = 0
                    for j in range(self.no_of_criteria):
                        s = s + k[j] * names.get('c' + str(j + 1) +
                                                 '_p')[i].est
                    s = s + b
                    p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                               total_p[i].r_ui, s,
                                               total_p[i].details)
                    multi_p.append(p)
            elif pred_options['combin_func'] == 'info_entropy':
                self.Info_Entropy()
                H = np.array(self.H)

                for i in range(len(total_p)):
                    s = 0
                    if len(np.argwhere(H[:, 0] == total_p[i].uid)):
                        h = H[np.argwhere(H[:, 0] == total_p[i].uid)][0][0]
                        for j in range(1, self.no_of_criteria + 1):
                            s = s + h[j] * names.get('c' + str(
                                j) + '_p')[i].est / h[self.no_of_criteria + 1]

                        p = predictions.Prediction(total_p[i].uid,
                                                   total_p[i].iid,
                                                   total_p[i].r_ui, s,
                                                   total_p[i].details)
                        multi_p.append(p)
                    else:
                        s = 0
                        for j in range(1, self.no_of_criteria + 1):
                            s = s + names.get('c' + str(j) + '_p')[i].est
                        s = s / self.no_of_criteria
                        p = predictions.Prediction(total_p[i].uid,
                                                   total_p[i].iid,
                                                   total_p[i].r_ui, s,
                                                   total_p[i].details)
                        multi_p.append(p)

        s_mae = round(accuracy.mae(total_p), 4)
        m_mae = round(accuracy.mae(multi_p), 4)
        return s_mae, m_mae
예제 #6
0
 def SVD(self, seed=3, n_factor=20, n_epoch=20):
     '''
     seed:int-3划分训练集测试集的随机种子
     k:int-40,最大邻居数量
     options:dict-{'name': 'pearson', 'user_based': False},算法的选项,默认为Pearson相似度,基于项目的方法
     '''
     df = self.datas
     names = locals()
     r = Reader(rating_scale=(1, 5))
     # 读取、划分数据;训练预测数据
     total = Dataset.load_from_df(df[['uid', 'iid', 'total']], reader=r)
     total_train, total_test = train_test_split(total, random_state=seed)
     total_algo = SVD(n_factors=n_factor, n_epochs=n_epoch, verbose=True)
     total_algo.fit(total_train)
     total_p = total_algo.test(total_test)
     for i in range(1, self.no_of_criteria + 1):
         names['c' + str(i)] = Dataset.load_from_df(
             df[['uid', 'iid', 'c' + str(i)]], reader=r)
         names['c' + str(i) + '_train'], names['c' + str(i) +
                                               '_test'] = train_test_split(
                                                   names.get('c' + str(i)),
                                                   random_state=seed)
         names['algo_c' + str(i)] = SVD(n_factors=n_factor,
                                        n_epochs=n_epoch,
                                        verbose=True)
         names.get('algo_c' + str(i)).fit(names.get('c' + str(i) +
                                                    '_train'))
         names['c' + str(i) + '_p'] = names.get('algo_c' + str(i)).test(
             names.get('c' + str(i) + '_test'))
     #
     multi_p = []
     if self.is_total == 0:
         if self.combin_func == 'avg':
             for i in range(len(total_p)):
                 s = 0
                 for j in range(1, self.no_of_criteria + 1):
                     s = s + names.get('c' + str(j) + '_p')[i].est
                 avg = s / self.no_of_criteria
                 p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                            total_p[i].r_ui, avg,
                                            total_p[i].details)
                 multi_p.append(p)
         elif self.combin_func == 'total_reg':
             for i in range(1, self.no_of_criteria + 1):
                 names['x' + str(i)] = np.array(
                     names.get('c' + str(i) + '_train').build_testset())[:,
                                                                         2]
                 if i == 1:
                     x = names.get('x' + str(i))
                 else:
                     x = np.vstack((x, names.get('x' + str(i))))
             x = x.T
             y = np.array(total_train.build_testset())[:, 2]
             reg = LinearRegression()
             reg.fit(x, y)
             k = reg.coef_
             b = reg.intercept_
             for i in range(len(total_p)):
                 s = 0
                 for j in range(self.no_of_criteria):
                     s = s + k[j] * names.get('c' + str(j + 1) +
                                              '_p')[i].est
                 s = s + b
                 p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                            total_p[i].r_ui, s,
                                            total_p[i].details)
                 multi_p.append(p)
     else:
         if self.combin_func == 'avg':
             for i in range(len(total_p)):
                 s = 0
                 for j in range(1, self.no_of_criteria + 1):
                     s = s + names.get('c' + str(j) + '_p')[i].est
                 avg = (s + total_p[i].est) / (self.no_of_criteria + 1)
                 p = predictions.Prediction(total_p[i].uid, total_p[i].iid,
                                            total_p[i].r_ui, avg,
                                            total_p[i].details)
                 multi_p.append(p)
         else:
             print('总分作为准则不适合用于回归聚合函数')
     s_mae = round(accuracy.mae(total_p), 4)
     m_mae = round(accuracy.mae(multi_p), 4)
     return s_mae, m_mae
예제 #7
0
def transtoPredictions(multi_P, total_P, P):
    for m in multi_P:
        for t in total_P:
            if m[0] == t[0] and m[1] == t[1]:
                p = predictions.Prediction(m[0], m[1], t[2], m[2], t[4])
                P.append(p)
예제 #8
0
                                          '_test'] = train_test_split(
                                              names.get('c' + str(i)),
                                              random_state=3)
    names['algo_c' + str(i)] = SVD(n_factors=20, n_epochs=20, verbose=True)
    names.get('algo_c' + str(i)).fit(names.get('c' + str(i) + '_train'))
    names['c' + str(i) + '_p'] = names.get('algo_c' + str(i)).test(
        names.get('c' + str(i) + '_test'))
    #
multi_p = []
for i in range(1, 6):
    names['x' + str(i)] = np.array(
        names.get('c' + str(i) + '_train').build_testset())[:, 2]
    if i == 1:
        x = names.get('x' + str(i))
    else:
        x = np.vstack((x, names.get('x' + str(i))))
x = x.T
y = np.array(total_train.build_testset())[:, 2]
reg = LinearRegression()
reg.fit(x, y)
k = reg.coef_
b = reg.intercept_
for i in range(len(total_p)):
    s = 0
    for j in range(5):
        s = s + k[j] * names.get('c' + str(j + 1) + '_p')[i].est
    s = s + b
    p = predictions.Prediction(total_p[i].uid, total_p[i].iid, total_p[i].r_ui,
                               s, total_p[i].details)
    multi_p.append(p)