Beispiel #1
0
def fillMatrix(F_list, step_list, LearnRating_list, penalty_list, N = 30):
    trainDataSet = FO.readDataSet('trainDataSet.txt')
    testDataSet = FO.readDataSet('testDataSet.txt')
    userList = FO.readUserList('data/users.txt')
    itemList = FO.readItemList('data/movies.txt')  # 获取物品列表
    dataSet = {**trainDataSet, **testDataSet}
    mu = calMu(dataSet)

    for F in F_list:
        for step in step_list:
            for LearnRating in LearnRating_list:
                for penalty in penalty_list:
                    print(F,step,LearnRating,penalty)
                    path = 'SVD++/Matrix/' + str(F) + '-' + str(step) + '-' + str(LearnRating) + '-' + str(penalty)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    bu, bi, p, q = ReadParameter(F, step, LearnRating, penalty)
                    for user in userList:
                        print(user)
                        rank = dict()
                        userRated = set([user_item[1] for user_item, value in dataSet.items() if user_item[0] == user])
                        UnRatedList = itemList - userRated
                        for item in UnRatedList:
                            rating = Predict(user, item, p, q, bu, bi, mu)
                            if math.isnan(float(rating)):
                                continue
                            rank[item] = round(rating)
                        if len(rank) == 0:
                            continue
                        if N > len(UnRatedList):
                            N = len(UnRatedList)
                        chooseList = random.sample(UnRatedList, N)
                        with open(path + '/new_ratings.txt', 'a') as fileObject:
                            for choose in chooseList:
                                fileObject.write(str(user) + '::' + str(choose) + '::' + str(rank[choose]) + '\n')
Beispiel #2
0
def calUserSimilarity(trainDataSet):
    '''
    :param trainDataSet: 训练数据集
    :param type: 以何种方式计算相似度
    :param simMeas: 计算相似的方法
    :return: None
    最后将用户之间的相似度写到文件里面
    '''
    # 建立 物品 到 用户 的倒排表
    item_users = dict()
    for user_item, rating in trainDataSet.items():
        user = user_item[0]
        item = user_item[1]
        if item not in item_users:
            item_users[item] = set()
        item_users[item].add(user)

    # for item, user in item_users.items():
    #    print(item, ':', user)

    # 读取用户列表
    userList = FO.readUserList('data/users.txt')
    UserRatingMean = FO.ReadRatingMean(
        'UserCF/UserMean/add_trainDataSetRating_mean.txt')
    # 遍历每一个用户, 计算用户之间的相似度
    for user in userList:
        print(user)
        W = UserSimilarity(trainDataSet, item_users, userList, UserRatingMean,
                           user)
        filename = 'UserCF/Similarity_add/' + str(user) + '.txt'
        FO.WirteSimilarty(filename, W)
def calUserRatingMean(dataSet):
    userList = FO.readUserList('data/users.txt')
    userMeanDict = dict()
    for user in userList:
        print(user)
        UserRating = [value for user_item, value in dataSet.items() if user_item[0] == user]
        userMeanDict[user] = sum(UserRating) / len(UserRating)
    return userMeanDict
Beispiel #4
0
def calUserSimilarity(trainDataSet, type='implicit', simMeas=cosSim):
    '''
    :param trainDataSet: 训练数据集
    :param type: 以何种方式计算相似度
    :param simMeas: 计算相似的方法
    :return: None
    最后将用户之间的相似度写到文件里面
    '''
    # 建立 物品 到 用户 的倒排表
    item_users = dict()
    for user_item, rating in trainDataSet.items():
        user = user_item[0]
        item = user_item[1]
        if item not in item_users:
            item_users[item] = set()
        item_users[item].add(user)

    # for item, user in item_users.items():
    #    print(item, ':', user)

    # 读取用户列表
    userList = FO.readUserList('data/users.txt')

    # 遍历每一个用户, 计算用户之间的相似度
    for user in userList:
        print(user)
        if type == 'implicit':  # 判读计算相似度的方式, 这里隐式计算
            W = UserSimilarity_implicit(userList, user)
            filename = 'UserCF/UserSimilarity/implicit/' + str(user) + '.txt'
        elif type == 'explicit': # 判读计算相似度的方式, 这里显式方式
            W = UserSimilarity_explicit(trainDataSet, item_users, userList, user,simMeas)
            filename ='UserCF/UserSimilarity/explicit/'+ str(simMeas.__name__)+'/'+str(user) + '.txt'
        # 将结果写入文件之中
        with open(filename, 'w') as fileObject:
            for users, values in W.items():
                for u in users:
                    fileObject.write(str(str(u) + '::'))
                fileObject.write(str(values) + '\n')