def testSystem(F_list, step_list,LearnRating_list,penalty_list): # 建立 用户 到 物品 的倒排表 trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') user_items = dict() for user_item, rating in trainDataSet.items(): user = user_item[0] item = user_item[1] if user not in user_items: user_items[user] = list() user_items[user].append((item, rating)) mu = calMu(trainDataSet) for F in F_list: for step in step_list: for LearnRating in LearnRating_list: for penalty in penalty_list: bu, bi, p, q = LearningBiasLFM(user_items, trainDataSet, testDataSet, F, step, LearnRating, penalty, mu) error = 0 for user_item, rating in testDataSet.items(): predictRating = Predict(user_item[0], user_item[1], p, q, bu, bi, mu) error += (predictRating - rating) ** 2 RMSE = math.sqrt(error / len(testDataSet)) print('RMSE=%s' % RMSE) with open('result_ItemCF.txt','a') as fileObject: fileObject.write(str(F)+',') fileObject.write(str(step)+',') fileObject.write(str(LearnRating)+',') fileObject.write(str(penalty)+',') fileObject.write(str(mu)+',') fileObject.write(str(RMSE)+'\n')
def initializeSystem(F_list, step_list, LearnRating_list, penalty_list): trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') dataSet = {**trainDataSet, **testDataSet} mu = calMu(dataSet) # user_items = dict() for user_item, rating in dataSet.items(): user = user_item[0] item = user_item[1] if user not in user_items: user_items[user] = list() user_items[user].append((item, rating)) for F in F_list: for step in step_list: for LearnRating in LearnRating_list: for penalty in penalty_list: print(F, step, LearnRating, penalty) bu, bi, p, q = LearningBiasLFM(user_items, dataSet, dict(), F, step, LearnRating, penalty,mu) path = 'SVD++/Model_Parameter/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty) if not os.path.exists(path): os.makedirs(path) FO.WriteBuDict(path, bu) FO.WriteBiDict(path, bi) FO.WritePDict(path, p, F) FO.WriteQDict(path, q, F)
def fillMatrix(F_list, step_list, LearnRating_list, penalty_list, N = 30): trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') userList = FO.readUserList('data/users.txt') itemList = FO.readItemList('data/movies.txt') # 获取物品列表 dataSet = {**trainDataSet, **testDataSet} mu = calMu(dataSet) for F in F_list: for step in step_list: for LearnRating in LearnRating_list: for penalty in penalty_list: print(F,step,LearnRating,penalty) path = 'SVD++/Matrix/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty) if not os.path.exists(path): os.makedirs(path) bu, bi, p, q = ReadParameter(F, step, LearnRating, penalty) for user in userList: print(user) rank = dict() userRated = set([user_item[1] for user_item, value in dataSet.items() if user_item[0] == user]) UnRatedList = itemList - userRated for item in UnRatedList: rating = Predict(user, item, p, q, bu, bi, mu) if math.isnan(float(rating)): continue rank[item] = round(rating) if len(rank) == 0: continue if N > len(UnRatedList): N = len(UnRatedList) chooseList = random.sample(UnRatedList, N) with open(path + '/new_ratings.txt', 'a') as fileObject: for choose in chooseList: fileObject.write(str(user) + '::' + str(choose) + '::' + str(rank[choose]) + '\n')
def initializeSystem(F_list, step_list, LearnRating_list, penalty_list): trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') dataSet = {**trainDataSet, **testDataSet} mu = calMu(dataSet) for F in F_list: for step in step_list: for LearnRating in LearnRating_list: for penalty in penalty_list: print(F, step, LearnRating, penalty) bu, bi, p, q = LearningBiasLFM(dataSet, dict(), F, step, LearnRating, penalty, mu) path = 'SVD/Model_Parameter/' + str(F) + '-' + str( step) + '-' + str(LearnRating) + '-' + str(penalty) if not os.path.exists(path): os.makedirs(path) FO.WriteBuDict(path, bu) FO.WriteBiDict(path, bi) FO.WritePDict(path, p, F) FO.WriteQDict(path, q, F)
def calUserRatingMean(dataSet): userList = FO.readUserList('data/users.txt') userMeanDict = dict() for user in userList: print(user) UserRating = [ value for user_item, value in dataSet.items() if user_item[0] == user ] userMeanDict[user] = sum(UserRating) / len(UserRating) return userMeanDict if __name__ == '__main__': trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') #addDataSet = FO.readDataSet('SVD++/Matrix/5-5-0.01-0.1/new_ratings.txt') trainDataSet = {**trainDataSet, **testDataSet} userMeanDict = calUserRatingMean(trainDataSet) FO.WirteRatingMean('UserCF/UserMean/DataSetUserRating_mean.txt', userMeanDict) #calUserSimilarity(trainDataSet) ''' userList = FO.readUserList('data/users.txt') userMeanDict =calUserRatingMean(trainDataSet) FO.WirteUserRatingMean('trainDataSetUserRatingMean.txt',userMeanDict) data = FO.readFromFile('data/ratings.txt', 'ratings') allUserMeanDict = calUserRatingMean(data)
trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt') testDataSet = FO.readDataSet('UserCF/testDataSet.txt') calUserSimilarity(trainDataSet,'explicit',cosSim) ''' ''' trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt') testDataSet = FO.readDataSet('UserCF/testDataSet.txt') itemList = FO.readItemList('data/movies.txt') rank = predictSystem(trainDataSet, testDataSet, 1, itemList) print(rank) ''' timeList = [] # 记录程序运行时间 timeList.append(datetime.datetime.now()) # 加载数据 trainDataSet = FO.readDataSet('UserCF/trainDataSet.txt') testDataSet = FO.readDataSet('UserCF/testDataSet.txt') KList = [10, 20, 30, 40] NList = [10, 20, 30, 40] type_list = ['implicit', 'cosSim', 'ecludSim'] for type in type_list: for k in KList: for n in NList: recall, precision, coverage, popularity = testSystem( trainDataSet, testDataSet, K=k, N=n, type=type) with open('UserCF/Result/result_' + type + '.txt', 'a') as fileObject: fileObject.write( str(k) + ',' + str(n) + ',' + str(recall) + ',' + str(precision) + ',' + str(coverage) + ',' + str(popularity) + '\n')
W = FO.readItemSimilarity(item, type) # 获取当前用户与其他用户的相似度 topItem = dict( sorted(W.items(), key=operator.itemgetter(1), reverse=True)[:K]) topSimItem = set([items[1] for items, value in topItem.items()]) topLoveItem = userLove & topSimItem if len(topLoveItem) == 0: continue # print(topLoveItem) for LoveItem in topLoveItem: wji = W[item, LoveItem] rui = dataSet[user, LoveItem] rank[item] = rank.get(item, 0) + wji * rui return dict( sorted(rank.items(), key=operator.itemgetter(1), reverse=True)[:N]) if __name__ == '__main__': trainDataSet = FO.readDataSet('ItemCF/trainDataSet.txt') testDataSet = FO.readDataSet('ItemCF/testDataSet.txt') ''' calItemSimilarity(trainDataSet, 'implicit') # 计算隐式相似度 calItemSimilarity(trainDataSet, 'explicit', cosSim) # 以余弦相似度计算显式相似度 calItemSimilarity(trainDataSet, 'explicit', ecludSim) # 以欧氏距离计算显式相似度 ''' itemList = FO.readItemList('data/movies.txt') for i in range(1, 11): rank = predictSystem(trainDataSet, testDataSet, i, itemList) print(rank)
value = sum(itemRating) / len(itemRating) itemMeanDict[item] = value return itemMeanDict def calUserRatingMean(dataSet): userList = FO.readUserList('data/users.txt') userMeanDict = dict() for user in userList: print(user) UserRating = [value for user_item, value in dataSet.items() if user_item[0] == user] userMeanDict[user] = sum(UserRating) / len(UserRating) return userMeanDict if __name__ == '__main__': trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') addDataSet = FO.readDataSet('SVD++/Matrix/10-10-0.01-0.1/new_ratings.txt') trainDataSet = {**trainDataSet, **addDataSet} itemMeanDict = calItemRatingMean(trainDataSet) FO.WirteRatingMean('ItemCF/ItemMean/add_trainDataSet_mean.txt', itemMeanDict) userMeanDict = calUserRatingMean(trainDataSet) FO.WirteRatingMean('ItemCF/UserMean/trainDataSet_mean.txt', userMeanDict) calItemSimilarity(trainDataSet) ''' itemMeanDict = calItemRatingMean(trainDataSet)