def getAllUserRating(n=1,k=20, similarity=sim_pearson): if(n==1): traindata = loadMovieLensTrain('ml-100k/u1.base') # 加载训练集 testdata = loadMovieLensTest('ml-100k/u1.test') # 加载测试集 else: pivot = 0.7 traindata={} testdata={} for line in loadfile('ml-1m/ratings.dat'): user, movie, rating, _ = line.split('::') # split the data by pivot if (random.random() < pivot): traindata.setdefault(user, {}) traindata[user][movie] = int(rating) else: testdata.setdefault(user, {}) testdata[user][movie] = int(rating) inAllnum = 0 records=[] for userid in testdata: # test集中每个用户 for item in testdata[userid]: # 对于test集合中每一个项目用base数据集,CF预测评分 rating = getRating(traindata, userid, item, k) # 基于训练集预测用户评分(用户数目<=K) records.append([userid,item,testdata[userid][item],rating]) inAllnum = inAllnum + 1 #np.savetxt("records.txt",records,fmt='%1.4e') #print("-------------Completed!!-----------", inAllnum) SaveRecords(records) return records
def getAllUserRating(fileTrain='u1.base', fileTest='u1.test',k=20, similarity=sim_pearson): traindata = loadMovieLensTrain(fileTrain) # 加载训练集 testdata = loadMovieLensTest(fileTest) # 加载测试集 inAllnum = 0 records=[] for userid in testdata: # test集中每个项目 for item in testdata[userid]: # 对于test集合中每一个项目用base数据集,CF预测评分 rating = getRating(traindata, userid, item, k) # 基于训练集预测用户评分(用户数目<=K) records.append([userid,item,testdata[userid][item],rating]) inAllnum = inAllnum + 1 #np.savetxt("records.txt",records,fmt='%1.4e') print("-------------Completed!!-----------", inAllnum) return records
def getAllUserRating(trainfile, testfile, k=20, similarity=sim_pearson): traindata = loadMovieLensTrain(trainfile) # 加载训练集 testdata = loadMovieLensTest(testfile) # 加载测试集 inAllnum = 0 records = [] for userid in testdata: # test集中每个用户 for item in testdata[userid]: # 对于test集合中每一个项目用base数据集,CF预测评分 rating = getRating(traindata, userid, item, k, similarity) # 基于训练集预测用户评分(用户数目<=K) records.append([userid, item, testdata[userid][item], rating]) inAllnum = inAllnum + 1 SaveRecords(records) return records
def getAllUserRating(fileTrain='u1.base', fileTest='u1.test', fileResult='result.txt', similarity=sim_pearson): prefer1 = loadMovieLensTrain(fileTrain) # 加载训练集 prefer2 = loadMovieLensTest(fileTest) # 加载测试集 inAllnum = 0 file = open(fileResult, 'a') file.write("%s\n"%("------------------------------------------------------")) for userid in prefer2: #test集中每个用户 for item in prefer2[userid]: #对于test集合中每一个项目用base数据集,CF预测评分 rating = getRating(prefer1, userid, item, 20) #基于训练集预测用户评分(用户数目<=K) file.write('%s\t%s\t%s\n'%(userid, item, rating)) inAllnum = inAllnum +1 file.close() print("-------------Completed!!-----------", inAllnum)
def getAllUserRating(fileTrain='u1.base', fileTest='u1.test', fileResult='result.txt', similarity=sim_pearson): prefer1 = loadMovieLensTrain(fileTrain) prefer2 = loadMovieLensTest(fileTest) inAllnum = 0 file = open(fileResult, 'a') file.write("%s\n" % ("------------------------------------------------------")) for userid in prefer2: for item in prefer2[userid]: rating = getRating( prefer1, userid, item, 20) # predict the user's rating based on the train data file.write('%s\t%s\t%s\n' % (userid, item, rating)) inAllnum = inAllnum + 1 file.close() print("-------------Completed!!-----------", inAllnum)
def readData(self, train=None, test=None): self.trainfile = train or self.trainfile self.testfile = test or self.testfile self.traindata = loadMovieLensTrain(train) # 加载训练集 self.testdata = loadMovieLensTest(test) # 加载测试集
def loadMovieLensResult(fileName='result.txt'): str1 = './' # 目录的相对地址 prefer = {} for line in open(str1 + fileName, 'r'): # 打开指定文件 (userid, movieid, rating) = line.split('\t') # 数据集中每行有4项 用户id 、项目 id 、 评分 prefer.setdefault(userid, {}) # 设置字典的默认格式,元素是user:{}字典 prefer[userid][movieid] = float(rating) return prefer # 格式如{'user1':{itemid:rating, itemid2:rating, ,,}, {,,,}} if __name__ == "__main__": print("\n--------------准确率生成中 -----------\n") count = 0 win_count = 0 prefer_result = loadMovieLensResult() prefer_test = loadMovieLensTest() for item_1 in prefer_result: for item_2 in prefer_test: if item_1 == item_2: for item1 in prefer_result[item_1]: for item2 in prefer_test[item_2]: if item1 == item2: dis = abs(prefer_result[item_1][item1] - prefer_test[item_2][item2]) if dis < 1: win_count += 1 count += 1 print('准确率为:' + str(float(win_count)/count)) print('预测次数为:' + str(count)) print('正确预测次数为:' + str(win_count))