def TestItemCF(): """ 测试 ItemCF 算法 Desc: Args: Returns: """ start = time.time() path = '~/file/rs/dataset/ml-1m/ratingsTest.dat' d_file = pd.read_csv(path, sep='::', usecols=[0, 1, 2]) M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 1) # 0: seed w_item = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user) columns_list = [ 'precision', 'recall', 'coverage', 'popularity' ] d = pd.DataFrame( np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) p = Pool(4) result = dict() for k in K: result[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, w_item, N, 2)) p.close() p.join() # 等待所有子进程执行完毕 for k, v in result.items(): d.loc[k, columns_list] += v.get() end = time.time() print('total time: %.2fs' % (end - start)) d.to_excel('Result-ItemCF-K.xlsx', 'ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') axes[0][0].plot(d.iloc[:, 0], 'o-', label='precision') axes[0][1].set_title('Recall') axes[0][1].plot(d.iloc[:, 1], 'o-', label='recall') axes[1][0].set_title('Coverage') axes[1][0].plot(d.iloc[:, 2], 'o-', label='coverage') axes[1][1].set_title('Popularity') axes[1][1].plot(d.iloc[:, 3], 'o-', label='popularity') plt.legend() plt.show()
def TestItemCF_Norm(): """ 对比 ItemCF 和 ItemCF-Norm Desc: Args: Returns: """ file_path = '~/file/rs/dataset/ml-1m/ratings.dat' start = time.time() d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0) # 0: seed W_ItemCF = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user) W_Norm = ItemCF.ItemSimilarityNorm(train, item_popularity, items_user) columns_list = [ 'Precision-ItemCF', 'Precision-Norm', 'Recall-ItemCF', 'Recall-Norm', 'Coverage-ItemCF', 'Coverage-Norm', 'Popularity-ItemCF', 'Popularity-Norm' ] I_columns = [ 'Precision-ItemCF', 'Recall-ItemCF', 'Coverage-ItemCF', 'Popularity-ItemCF' ] II_columns = [ 'Precision-Norm', 'Recall-Norm', 'Coverage-Norm', 'Popularity-Norm' ] d = pd.DataFrame( np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) # ItemCF p = Pool(4) resultItemCF = dict() resultNorm = dict() for k in K: resultItemCF[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_ItemCF, N)) resultNorm[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W_Norm, N)) p.close() p.join() # 等待所有子进程执行完毕 for k, v in resultItemCF.items(): d.loc[k, I_columns] += v.get() for k, v in resultNorm.items(): d.loc[k, II_columns] += v.get() end = time.time() print('total time: %.2fs' % (end - start)) d.to_excel('Result-ItemCF-Norm-K.xlsx', 'ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.legend() plt.show()