Example #1
0
def TestUserCFMult():
    start = time.time()
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120]
    #  K = [40, 80]
    #  seeds = np.arange(M)
    seeds = [0]
    columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity']
    d = pd.DataFrame(np.zeros([len(K), len(columns_list)]),
                     index=K,
                     columns=columns_list)
    for index, seed in enumerate(seeds):
        train, test = eva.SplitData(d_file, M, seed)
        item_popularity = eva.ItemsPopularity(d_file, M, seed)
        W_user = UserCF.UserSimilarityVersion3(train)
        q = Queue()
        for k in K:
            pw = Process(target=Evaluation,
                         args=(q, k, train, test, item_popularity, W_user, N))
            pw.start()  # 启动写
        pr = Process(target=WriteIntoD, args=(q, d))
        pr.start()  # 启动读
        pw.join()  # 等待pw结束
        end = time.time()
        print('Total Time: %.2fs' % (end - start))
        pr.join()  # 强制结束读
Example #2
0
def TestRandomMostPopupar():
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    #  seeds = np.arange(M)
    seeds = [0]
    columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity']
    d = pd.DataFrame(np.zeros([2, len(columns_list)]),
                     index=['Random', 'MostPopular'],
                     columns=columns_list)
    for index, seed in enumerate(seeds):
        train, test = eva.SplitData(d_file, M, seed)
        item_popularity = eva.ItemsPopularity(d_file, M, seed)
        precision, recall, coverage, popularity = eva.RandomResult(
            train, test, item_popularity, N)
        d.loc['Random',
              columns_list] += [precision, recall, coverage, popularity]
        d.loc['Random'] /= (index + 1)
        precision, recall, coverage, popularity = eva.MostPopularResult(
            train, test, item_popularity, N)
        d.loc['MostPopular',
              columns_list] += [precision, recall, coverage, popularity]
        d.loc['MostPopular'] /= (index + 1)

    print(d)
    d.to_excel('Result-Random-Popular.xlsx', 'Random-Popular')
Example #3
0
def main():
    # 读取数据集
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    #  seeds = np.arange(M)
    seeds = [0]
    columns_list = [
        'PrecisionUserCF', 'PrecisionItemCF', 'RecallUserCF', 'RecallItemCF',
        'CoverageUserCF', 'CoverageItemCF', 'PopularityUserCF',
        'PopularityItemCF'
    ]
    userCF_columns = [
        'PrecisionUserCF', 'RecallUserCF', 'CoverageUserCF', 'PopularityUserCF'
    ]
    itemCF_columns = [
        'PrecisionItemCF', 'RecallItemCF', 'CoverageItemCF', 'PopularityItemCF'
    ]
    d = pd.DataFrame(np.zeros([len(K), len(columns_list)]),
                     index=K,
                     columns=columns_list)
    for index, seed in enumerate(seeds):
        train, test = eva.SplitData(d_file, M, seed)
        item_popularity = eva.ItemsPopularity(d_file, M, seed)
        W_user = UserCF.UserSimilarityVersion3(train)
        W_item = ItemCF.ItemSimilarityVersion2(train)
        for k in K:
            precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity(
                train, test, item_popularity, k, W_user, N, 1)
            d.loc[k,
                  userCF_columns] += [precision, recall, coverage, popularity]
            precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity(
                train, test, item_popularity, k, W_item, N, 2)
            d.loc[k,
                  itemCF_columns] += [precision, recall, coverage, popularity]
        d.loc[k] /= (index + 1)

    d.to_excel('Result-UserCF-ItemCF-K.xlsx', 'UserCF-ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])

    plt.show()
Example #4
0
def TestUserCF():
    # 读取数据集
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    #  K = [5, 10]
    K = [5, 10, 20, 40, 80, 120, 160]
    #  seeds = np.arange(M)
    seeds = [0]
    columns_list = ['Precision', 'Recall', 'Coverage', 'Popularity']
    userCF_columns = ['Precision', 'Recall', 'Coverage', 'Popularity']
    d = pd.DataFrame(np.zeros([len(K), len(columns_list)]),
                     index=K,
                     columns=columns_list)
    for index, seed in enumerate(seeds):
        train, test = eva.SplitData(d_file, M, seed)
        item_popularity = eva.ItemsPopularity(d_file, M, seed)
        W_user = UserCF.UserSimilarityVersion3(train)
        for k in K:
            print(k)
            precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity(
                train, test, item_popularity, k, W_user, N, 1)
            d.loc[k,
                  userCF_columns] += [precision, recall, coverage, popularity]
        d.loc[k] /= (index + 1)

    d.to_excel('Result-UserCF-K.xlsx', 'UserCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    axes[0][0].plot(d.iloc[:, 0], 'o-', label='precision')
    axes[0][1].set_title('Recall')
    axes[0][1].plot(d.iloc[:, 1], 'o-', label='recall')
    axes[1][0].set_title('Coverage')
    axes[1][0].plot(d.iloc[:, 2], 'o-', label='coverage')
    axes[1][1].set_title('Popularity')
    axes[1][1].plot(d.iloc[:, 3], 'o-', label='popularity')
    plt.legend()
    plt.show()
Example #5
0
def test_recommend():
    """ 对比原版推荐算法和改进后的推荐算法

    Desc:


    Args:


    Returns:


    """
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    #  K = [5, 10, 20, 40]
    train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0)  # 0: seed
    #  W = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user)
    W = ItemCF.ItemSimilarityVersion2(train, item_popularity, items_user)
    columns_list = [
        'Precision-I', 'Precision-II', 'Recall-I', 'Recall-II',
        'Coverage-I', 'Coverage-II', 'Popularity-I',
        'Popularity-II'
    ]
    I_columns = [
        'Precision-I', 'Recall-I', 'Coverage-I', 'Popularity-I'
    ]
    II_columns = [
        'Precision-II', 'Recall-II', 'Coverage-II', 'Popularity-II'
    ]
    d = pd.DataFrame(
        np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list)

    # ItemCF
    p = Pool(4)
    resultI = dict()
    resultII = dict()
    for k in K:
        resultI[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 1))
        resultII[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 2))
    p.close()
    p.join()  # 等待所有子进程执行完毕
    for k, v in resultI.items():
        d.loc[k, I_columns] += v.get()
    for k, v in resultII.items():
        d.loc[k, II_columns] += v.get()

    d.to_excel('Result-ItemCF-Recommend-K.xlsx', 'ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])
    plt.legend()
    plt.show()
Example #6
0
def TestItemCF_Norm():
    """ 对比 ItemCF 和 ItemCF-Norm

    Desc:


    Args:


    Returns:


    """
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    start = time.time()
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0)  # 0: seed
    W_ItemCF = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user)
    W_Norm = ItemCF.ItemSimilarityNorm(train, item_popularity, items_user)
    columns_list = [
        'Precision-ItemCF', 'Precision-Norm', 'Recall-ItemCF', 'Recall-Norm',
        'Coverage-ItemCF', 'Coverage-Norm', 'Popularity-ItemCF',
        'Popularity-Norm'
    ]
    I_columns = [
        'Precision-ItemCF', 'Recall-ItemCF', 'Coverage-ItemCF', 'Popularity-ItemCF'
    ]
    II_columns = [
        'Precision-Norm', 'Recall-Norm', 'Coverage-Norm', 'Popularity-Norm'
    ]
    d = pd.DataFrame(
        np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list)

    # ItemCF
    p = Pool(4)
    resultItemCF = dict()
    resultNorm = dict()
    for k in K:
        resultItemCF[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_ItemCF, N))
        resultNorm[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_Norm, N))
    p.close()
    p.join()  # 等待所有子进程执行完毕
    for k, v in resultItemCF.items():
        d.loc[k, I_columns] += v.get()
    for k, v in resultNorm.items():
        d.loc[k, II_columns] += v.get()

    end = time.time()
    print('total time: %.2fs' % (end - start))

    d.to_excel('Result-ItemCF-Norm-K.xlsx', 'ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])
    plt.legend()
    plt.show()