예제 #1
0
def main():
    # 读取数据集
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    #  seeds = np.arange(M)
    seeds = [0]
    columns_list = [
        'PrecisionUserCF', 'PrecisionItemCF', 'RecallUserCF', 'RecallItemCF',
        'CoverageUserCF', 'CoverageItemCF', 'PopularityUserCF',
        'PopularityItemCF'
    ]
    userCF_columns = [
        'PrecisionUserCF', 'RecallUserCF', 'CoverageUserCF', 'PopularityUserCF'
    ]
    itemCF_columns = [
        'PrecisionItemCF', 'RecallItemCF', 'CoverageItemCF', 'PopularityItemCF'
    ]
    d = pd.DataFrame(np.zeros([len(K), len(columns_list)]),
                     index=K,
                     columns=columns_list)
    for index, seed in enumerate(seeds):
        train, test = eva.SplitData(d_file, M, seed)
        item_popularity = eva.ItemsPopularity(d_file, M, seed)
        W_user = UserCF.UserSimilarityVersion3(train)
        W_item = ItemCF.ItemSimilarityVersion2(train)
        for k in K:
            precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity(
                train, test, item_popularity, k, W_user, N, 1)
            d.loc[k,
                  userCF_columns] += [precision, recall, coverage, popularity]
            precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity(
                train, test, item_popularity, k, W_item, N, 2)
            d.loc[k,
                  itemCF_columns] += [precision, recall, coverage, popularity]
        d.loc[k] /= (index + 1)

    d.to_excel('Result-UserCF-ItemCF-K.xlsx', 'UserCF-ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])

    plt.show()
예제 #2
0
def test_recommend():
    """ 对比原版推荐算法和改进后的推荐算法

    Desc:


    Args:


    Returns:


    """
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    #  K = [5, 10, 20, 40]
    train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0)  # 0: seed
    #  W = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user)
    W = ItemCF.ItemSimilarityVersion2(train, item_popularity, items_user)
    columns_list = [
        'Precision-I', 'Precision-II', 'Recall-I', 'Recall-II',
        'Coverage-I', 'Coverage-II', 'Popularity-I',
        'Popularity-II'
    ]
    I_columns = [
        'Precision-I', 'Recall-I', 'Coverage-I', 'Popularity-I'
    ]
    II_columns = [
        'Precision-II', 'Recall-II', 'Coverage-II', 'Popularity-II'
    ]
    d = pd.DataFrame(
        np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list)

    # ItemCF
    p = Pool(4)
    resultI = dict()
    resultII = dict()
    for k in K:
        resultI[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 1))
        resultII[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 2))
    p.close()
    p.join()  # 等待所有子进程执行完毕
    for k, v in resultI.items():
        d.loc[k, I_columns] += v.get()
    for k, v in resultII.items():
        d.loc[k, II_columns] += v.get()

    d.to_excel('Result-ItemCF-Recommend-K.xlsx', 'ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])
    plt.legend()
    plt.show()