def fillMatrix(F_list, step_list, LearnRating_list, penalty_list, N = 30): trainDataSet = FO.readDataSet('trainDataSet.txt') testDataSet = FO.readDataSet('testDataSet.txt') userList = FO.readUserList('data/users.txt') itemList = FO.readItemList('data/movies.txt') # 获取物品列表 dataSet = {**trainDataSet, **testDataSet} mu = calMu(dataSet) for F in F_list: for step in step_list: for LearnRating in LearnRating_list: for penalty in penalty_list: print(F,step,LearnRating,penalty) path = 'SVD++/Matrix/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty) if not os.path.exists(path): os.makedirs(path) bu, bi, p, q = ReadParameter(F, step, LearnRating, penalty) for user in userList: print(user) rank = dict() userRated = set([user_item[1] for user_item, value in dataSet.items() if user_item[0] == user]) UnRatedList = itemList - userRated for item in UnRatedList: rating = Predict(user, item, p, q, bu, bi, mu) if math.isnan(float(rating)): continue rank[item] = round(rating) if len(rank) == 0: continue if N > len(UnRatedList): N = len(UnRatedList) chooseList = random.sample(UnRatedList, N) with open(path + '/new_ratings.txt', 'a') as fileObject: for choose in chooseList: fileObject.write(str(user) + '::' + str(choose) + '::' + str(rank[choose]) + '\n')
def calUserSimilarity(trainDataSet): ''' :param trainDataSet: 训练数据集 :param type: 以何种方式计算相似度 :param simMeas: 计算相似的方法 :return: None 最后将用户之间的相似度写到文件里面 ''' # 建立 物品 到 用户 的倒排表 item_users = dict() for user_item, rating in trainDataSet.items(): user = user_item[0] item = user_item[1] if item not in item_users: item_users[item] = set() item_users[item].add(user) # for item, user in item_users.items(): # print(item, ':', user) # 读取用户列表 userList = FO.readUserList('data/users.txt') UserRatingMean = FO.ReadRatingMean( 'UserCF/UserMean/add_trainDataSetRating_mean.txt') # 遍历每一个用户, 计算用户之间的相似度 for user in userList: print(user) W = UserSimilarity(trainDataSet, item_users, userList, UserRatingMean, user) filename = 'UserCF/Similarity_add/' + str(user) + '.txt' FO.WirteSimilarty(filename, W)
def calUserRatingMean(dataSet): userList = FO.readUserList('data/users.txt') userMeanDict = dict() for user in userList: print(user) UserRating = [value for user_item, value in dataSet.items() if user_item[0] == user] userMeanDict[user] = sum(UserRating) / len(UserRating) return userMeanDict
def calUserSimilarity(trainDataSet, type='implicit', simMeas=cosSim): ''' :param trainDataSet: 训练数据集 :param type: 以何种方式计算相似度 :param simMeas: 计算相似的方法 :return: None 最后将用户之间的相似度写到文件里面 ''' # 建立 物品 到 用户 的倒排表 item_users = dict() for user_item, rating in trainDataSet.items(): user = user_item[0] item = user_item[1] if item not in item_users: item_users[item] = set() item_users[item].add(user) # for item, user in item_users.items(): # print(item, ':', user) # 读取用户列表 userList = FO.readUserList('data/users.txt') # 遍历每一个用户, 计算用户之间的相似度 for user in userList: print(user) if type == 'implicit': # 判读计算相似度的方式, 这里隐式计算 W = UserSimilarity_implicit(userList, user) filename = 'UserCF/UserSimilarity/implicit/' + str(user) + '.txt' elif type == 'explicit': # 判读计算相似度的方式, 这里显式方式 W = UserSimilarity_explicit(trainDataSet, item_users, userList, user,simMeas) filename ='UserCF/UserSimilarity/explicit/'+ str(simMeas.__name__)+'/'+str(user) + '.txt' # 将结果写入文件之中 with open(filename, 'w') as fileObject: for users, values in W.items(): for u in users: fileObject.write(str(str(u) + '::')) fileObject.write(str(values) + '\n')