def main(lineRate, alpha, topN, testN): print("加载数据...") train, test = tool.splitData(lineRate) # train, test = tool.splitDataMock() M, i2rI, r2iU, iStart = initData(train) rankList = dict() print("进行推荐...") tN = 0 testNew = dict() for user in test: if tN == testN: break tN += 1 print("\r%d/%d" % (tN, testN), end='') testNew[user] = test[user] if user not in train: rankList[user] = [] rankList[user] = recommend(r2iU[user], M, alpha, iStart, i2rI, topN, train[user]) print() eva = tool.Evaluator(train, testNew, rankList, dataScale=lineRate, topN=topN) eva.show()
def splitDataNumpy(M, k, seed=1): train, test = tool.splitData(1, M, k, seed) m = 6040 n = 3952 trainM = mat(zeros((m, n))) # 最好使用默认float类型进行矩阵计算 for user, items in train.items(): for item in items: trainM[user - 1, item - 1] = 1 return trainM, train, test
def itemCF(K, topItem, dataScale): print("加载数据...") train, test = tool.splitData(dataScale, 8, 0) rankList = dict() W = itemSimilartiy(train) print("进行推荐...") # 对test集的user进行推荐 for user in test.keys(): rankList[user] = recommendation(train, user, W, K, topItem) print("进行评估...") eva = tool.Evaluator(train, test, rankList, topN=K, dataScale=dataScale) eva.show()
def userCF(userN, itemN, dataScale): print("加载数据...") train, test = tool.splitData(dataScale, 8, 0) rankList = dict() W = userSimilarity(train) print("进行推荐...") # 对test集的user进行推荐 for user in test.keys(): rankList[user] = recommend(user, train, W, userN, itemN) print("进行评估...") eva = tool.Evaluator(train, test, rankList, userN, dataScale) eva.show()
def lfmRecommend(ratio, N, lineRate, topN=10): print("加载数据...") train, test = splitData(lineRate) print("生成参数...") P, Q = latentFactorModel(train, ratio, N) rankList = dict() print("进行推荐...") for user in test.keys(): if user not in train: rankList[user] = {} continue rankList[user] = recommend(user, P, Q, topN, train) print("进行评估...") eva = Evaluator(train, test, rankList, ratio=ratio, dataScale=lineRate, loopN=N) eva.show()
def lfmRecommend(N, alpha, rate, topN=10): print("加载数据...") train, test = splitData(1) print("生成参数...") sortedUserItem = latentFactorModel(train, N, alpha, rate) print("进行推荐...") rankList = dict() itemSet = set() for items in train.values(): itemSet.update(items) for items in test.values(): itemSet.update(items) for user in test.keys(): if user not in train: rankList[user] = {} continue rankList[user] = [(item, 1) for item in sortedUserItem[user] if item not in train[user] and item in itemSet ][:topN] print("进行评估...") eva = Evaluator(train, test, rankList, loopN=N) eva.show()
def RFsDP(self, hX, hy, privacy, num_learner, max_height, n_rows): ''' 每个用户本地RFsDP ''' # 第p个用户lambda_p=Np/N lamb = float(hX.shape[0] / n_rows) # 第p个用户的训练集和验证集 hX_train, hy_train, hX_val, hy_val = splitData(hX, hy) rfs_p = MyRandomForest(X=hX_train, y=hy_train, privacy_p=privacy, num_learners=num_learner, max_depth=max_height) predRF_p = rfs_p.predict(hX_val) # 第p个用户分类器准确度 acc = accuracy_score(hy_val, predRF_p, normalize=True) # print "acc", acc # 第p个用户w函数值 lamb_acc = math.exp(lamb) * acc return lamb_acc, rfs_p
def AdaBoostDP(self, hX, hy, privacy, num_learner, height, n_rows): ''' 每个用户本地AdaboostDP ''' # 第p个用户lambda_p=Np/N lamb = float(hX.shape[0] / n_rows) # 第p个用户的训练集和验证集 hX_train, hy_train, hX_val, hy_val = splitData(hX, hy) ada_p = AdaboostDTBinaryClassifier(privacy_p=privacy, num_learners=num_learner, max_depth=height) ada_p.fit(hX_train, hy_train) # 对比论文13 2016年 # alpha_p.append(ada_p.learner_weight_) # Ada_set_p.append(ada_p.learners_) # 预测 predAda_p = ada_p.predict(hX_val) # 第p个用户分类器准确度 acc = accuracy_score(hy_val, predAda_p, normalize=True) lamb_acc = math.exp(lamb) * acc return lamb_acc, ada_p