def main(): # 读取数据集 file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] # seeds = np.arange(M) seeds = [0] columns_list = [ 'PrecisionUserCF', 'PrecisionItemCF', 'RecallUserCF', 'RecallItemCF', 'CoverageUserCF', 'CoverageItemCF', 'PopularityUserCF', 'PopularityItemCF' ] userCF_columns = [ 'PrecisionUserCF', 'RecallUserCF', 'CoverageUserCF', 'PopularityUserCF' ] itemCF_columns = [ 'PrecisionItemCF', 'RecallItemCF', 'CoverageItemCF', 'PopularityItemCF' ] d = pd.DataFrame(np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) for index, seed in enumerate(seeds): train, test = eva.SplitData(d_file, M, seed) item_popularity = eva.ItemsPopularity(d_file, M, seed) W_user = UserCF.UserSimilarityVersion3(train) W_item = ItemCF.ItemSimilarityVersion2(train) for k in K: precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity( train, test, item_popularity, k, W_user, N, 1) d.loc[k, userCF_columns] += [precision, recall, coverage, popularity] precision, recall, coverage, popularity = eva.PrecisionAndRecallAndCoverageAndPopularity( train, test, item_popularity, k, W_item, N, 2) d.loc[k, itemCF_columns] += [precision, recall, coverage, popularity] d.loc[k] /= (index + 1) d.to_excel('Result-UserCF-ItemCF-K.xlsx', 'UserCF-ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.show()
def test_recommend(): """ 对比原版推荐算法和改进后的推荐算法 Desc: Args: Returns: """ file_path = '~/file/rs/dataset/ml-1m/ratings.dat' d_file = eva.readData(file_path, '::') M = 8 # 分组数 N = 10 # 推荐个数 K = [5, 10, 20, 40, 80, 120, 160] # K = [5, 10, 20, 40] train, test, train_items_list, item_popularity, items_user = SplitData(d_file, M, 0) # 0: seed # W = ItemCF.ItemSimilarityVersion1(train, item_popularity, items_user) W = ItemCF.ItemSimilarityVersion2(train, item_popularity, items_user) columns_list = [ 'Precision-I', 'Precision-II', 'Recall-I', 'Recall-II', 'Coverage-I', 'Coverage-II', 'Popularity-I', 'Popularity-II' ] I_columns = [ 'Precision-I', 'Recall-I', 'Coverage-I', 'Popularity-I' ] II_columns = [ 'Precision-II', 'Recall-II', 'Coverage-II', 'Popularity-II' ] d = pd.DataFrame( np.zeros([len(K), len(columns_list)]), index=K, columns=columns_list) # ItemCF p = Pool(4) resultI = dict() resultII = dict() for k in K: resultI[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 1)) resultII[k] = p.apply_async(Evaluation, args=(k, train, test, item_popularity, W, N, 2)) p.close() p.join() # 等待所有子进程执行完毕 for k, v in resultI.items(): d.loc[k, I_columns] += v.get() for k, v in resultII.items(): d.loc[k, II_columns] += v.get() d.to_excel('Result-ItemCF-Recommend-K.xlsx', 'ItemCF-K') fig, axes = plt.subplots(2, 2) axes[0][0].set_title('Precision') d.iloc[:, 0:2].plot(ax=axes[0][0], style=['o-', 'o-']) axes[0][1].set_title('Recall') d.iloc[:, 2:4].plot(ax=axes[0][1], style=['o-', 'o-']) axes[1][0].set_title('Coverage') d.iloc[:, 4:6].plot(ax=axes[1][0], style=['o-', 'o-']) axes[1][1].set_title('Popularity') d.iloc[:, 6:8].plot(ax=axes[1][1], style=['o-', 'o-']) plt.legend() plt.show()