예제 #1
0
def TestItemCF():
    """ 测试 ItemCF 算法

    Desc:


    Args:


    Returns:


    """
    start = time.time()
    path = '~/file/rs/dataset/ml-1m/ratingsTest.dat'
    d_file = pd.read_csv(path, sep='::', usecols=[0, 1, 2])
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 1)  # 0: seed
    w_item = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user)
    columns_list = [
        'precision', 'recall', 'coverage', 'popularity'
    ]
    d = pd.DataFrame(
        np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list)
    p = Pool(4)
    result = dict()
    for k in K:
        result[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, w_item, N, 2))
    p.close()
    p.join()  # 等待所有子进程执行完毕
    for k, v in result.items():
        d.loc[k, columns_list] += v.get()
    end = time.time()
    print('total time: %.2fs' % (end - start))

    d.to_excel('Result-ItemCF-K.xlsx', 'ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    axes[0][0].plot(d.iloc[:, 0], 'o-', label='precision')
    axes[0][1].set_title('Recall')
    axes[0][1].plot(d.iloc[:, 1], 'o-', label='recall')
    axes[1][0].set_title('Coverage')
    axes[1][0].plot(d.iloc[:, 2], 'o-', label='coverage')
    axes[1][1].set_title('Popularity')
    axes[1][1].plot(d.iloc[:, 3], 'o-', label='popularity')
    plt.legend()
    plt.show()
예제 #2
0
def TestItemCF_Norm():
    """ 对比 ItemCF 和 ItemCF-Norm

    Desc:


    Args:


    Returns:


    """
    file_path = '~/file/rs/dataset/ml-1m/ratings.dat'
    start = time.time()
    d_file = eva.readData(file_path, '::')
    M = 8  # 分组数
    N = 10  # 推荐个数
    K = [5, 10, 20, 40, 80, 120, 160]
    train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0)  # 0: seed
    W_ItemCF = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user)
    W_Norm = ItemCF.ItemSimilarityNorm(train, item_popularity, items_user)
    columns_list = [
        'Precision-ItemCF', 'Precision-Norm', 'Recall-ItemCF', 'Recall-Norm',
        'Coverage-ItemCF', 'Coverage-Norm', 'Popularity-ItemCF',
        'Popularity-Norm'
    ]
    I_columns = [
        'Precision-ItemCF', 'Recall-ItemCF', 'Coverage-ItemCF', 'Popularity-ItemCF'
    ]
    II_columns = [
        'Precision-Norm', 'Recall-Norm', 'Coverage-Norm', 'Popularity-Norm'
    ]
    d = pd.DataFrame(
        np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list)

    # ItemCF
    p = Pool(4)
    resultItemCF = dict()
    resultNorm = dict()
    for k in K:
        resultItemCF[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_ItemCF, N))
        resultNorm[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_Norm, N))
    p.close()
    p.join()  # 等待所有子进程执行完毕
    for k, v in resultItemCF.items():
        d.loc[k, I_columns] += v.get()
    for k, v in resultNorm.items():
        d.loc[k, II_columns] += v.get()

    end = time.time()
    print('total time: %.2fs' % (end - start))

    d.to_excel('Result-ItemCF-Norm-K.xlsx', 'ItemCF-K')
    fig, axes = plt.subplots(2, 2)
    axes[0][0].set_title('Precision')
    d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-'])
    axes[0][1].set_title('Recall')
    d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-'])
    axes[1][0].set_title('Coverage')
    d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-'])
    axes[1][1].set_title('Popularity')
    d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-'])
    plt.legend()
    plt.show()