예제 #1
0
def main(arg, f):
    # 数据集按均匀分布划分为M份,M-1份均为训练集,剩下1份为测试集
    M = 8
    k = 0
    seed = 42  # 随机数种子

    data = [
        tuple(line.split('::')[:2]) for line in open(
            'G:/master/python/PycharmProjects/RecommendationSystem/ItemCF/MovieLens/data/ml-1m/ratings.dat'
        ).readlines()
    ]  # win10上的ml-1m数据集
    # data = [tuple(line.split(',')[:2]) for line in open('G:/Recommend/User-CF/MovieLens/ml-latest-small/ratings_test.csv').readlines()]		# ml-latest-small数据集

    train, test = SplitData(data, M, k, seed)

    # 基于训练集计算物品相似度
    W = ItemCF.ItemSimilarity(train)

    # 离线指标计算
    precision, recall = PrecisionRecall(train, test, W, arg[1], arg[0])
    f.write(str(precision))
    f.write(',')
    f.write(str(recall))
    f.write(',')
    coverage = Coverge(train, W, arg[1], arg[0])
    f.write(str(coverage))
    f.write(',')
    novelty = Novelty(train, W, arg[1], arg[0])
    f.write(str(novelty))
    f.write(',')
    F1 = 2 * precision * recall / (precision + recall)
    f.write(str(F1))
    f.write(',')

    # print(f'precision:{precision}\trecall:{recall}\tcoverage:{coverage}\tpopularity:{novelty}\tF1:{F1}')
    '''