def plot_EM_results(): # 绘制测试结果图像 from param_options import args_parser from plot_indexes_resluts import plot_indicator_results from util_tools import mkdir args = args_parser() args.dataset_path = './constructed_datasets_6_30(1)/' exp_name = 'EM' result_save_file = './results/' + exp_name + '/' plots_file = './plot_results/' + exp_name + '/' print( '====================== Save every component indicator result ========================' ) for indicator in ['rmse', 'd2', 'r2']: print('{} results'.format(indicator)) fig_save_path = plots_file + indicator + '/' mkdir(fig_save_path) for component in args.select_dim: results_logdir = [result_save_file + indicator + '/'] plot_indicator_results(results_logdir, fig_save_path, component, xaxis='station', leg=['EM']) print(">>> Finished save resluts figures!")
def save_model(self, G, D, save_file='', station_name=''): mkdir('./saved_model/' + save_file + '/') torch.save( D.state_dict(), './saved_model/' + save_file + '/independent_' + station_name + '_D.pkl') torch.save( G.state_dict(), './saved_model/' + save_file + '/independent_' + station_name + '_G.pkl')
def show_fed_eval_results(args, exp_name, results_saved_file, results_plot_file, cross_validation_sets): result_save_root = './{}/'.format(results_saved_file) + exp_name + '/' plots_save_root = './{}/'.format(results_plot_file) + exp_name + '/' indicator_list = ['rmse', 'd2', 'r2', 'all_rmse'] leg = ['Federated', 'Independent'] for indicator_name in indicator_list: # 建立保存结果的文件夹 indicator_avg_results_csv_save_fpth = result_save_root + 'avg_' + indicator_name + '/' for mode in leg: mkdir(indicator_avg_results_csv_save_fpth + mode + '/') # 计算每个数据集的几次实验的均值 for c in range(cross_validation_sets): results_logdir = [ result_save_root + 'datasets_' + str(c) + '/' + indicator_name + '/' + model_name + '/' for model_name in ['fed', 'idpt'] ] compute_avg_of_data_in_file(args, c, results_logdir, indicator_avg_results_csv_save_fpth, indicator_name, leg) # 绘制测试结果图像 print( '\033[0;34;40m [Visulize] Save every component indicator result \033[0m' ) print('[Visulize] {} results'.format(indicator_name)) results_logdir = [ result_save_root + 'avg_' + indicator_name + '/' + model_name + '/' for model_name in leg ] fig_save_path = plots_save_root + indicator_name + '/' csv_save_fpth = result_save_root + 'avg_' + indicator_name + '/' mkdir(fig_save_path) # 将每个数据集计算的均值再计算总的均值和绘制方差均值线图 plot_indicator_avg_results_m(results_logdir, fig_save_path, 'station', indicator_name, csv_save_fpth, select_dim=arg.select_dim) # plot_fed_avg_acc(results_logdir, indicator_name, fig_save_path) # plot_indicator_results(results_logdir, fig_save_path, indicator_name) print( "\033[0;34;40m >>>[Visulize] Finished save {} resluts figures! \033[0m" .format(indicator_name))
def plot_federated_indicator_avg_results(dataset_number): args = args_parser() # dataset_number = 'one_mi_v1((A5)_1)' results_saved_file = 'results_one_dn' results_plot_file = 'plot_results_one_dn' exp_name = '{}_latest'.format(dataset_number, ) result_save_root = './{}/'.format(results_saved_file) + exp_name + '/' plots_save_root = './{}/'.format(results_plot_file) + exp_name + '/' indicator_name = 'all_rmse' leg = ['Fed', 'Independent'] results_logdir = [ result_save_root + 'avg_' + indicator_name + '/' + model_name + '/' for model_name in leg ] fig_save_path = plots_save_root + indicator_name + '/' csv_save_fpth = result_save_root + 'avg_' + indicator_name + '/' mkdir(fig_save_path) # 将每个数据集计算的均值再计算总的均值和绘制方差均值线图 plot_indicator_avg_results_m(results_logdir, fig_save_path, 'station', indicator_name, csv_save_fpth)
test_param_name = 'missing_rate' Dname_prefix = '(A{})' Dname_suffix = 'norm_1r_1P' elif generate_condition == 'one_time': params_test_list = [1] test_param_name = 'One_time' for param in params_test_list: if generate_condition == 'missing_rate': args.missing_ratios = [param / 100] * len(args.selected_stations) dataset_name = Dname_prefix.format(param, param, param) + '_' + Dname_suffix print( '===================== Missing ratio {}========================' .format(param)) elif generate_condition == 'one_time': dataset_name = dataset_name_temp + '_' + test_param_name dataset_path = './constructed_datasets/{}/'.format(dataset_name) # 构造所有站点不同的缺失率数据 for exp_n in range(exp_num): args.dataset_path = dataset_path + str(exp_n) + '/' mkdir(args.dataset_path) all_station_datasets = generate_selected_station_save_datasets( args, if_random=True) # 载入数据集(用于测试函数) # all_station_datasets = get_saved_datasets(args) print(all_station_datasets)
def plot_missing_data_statistics(dataset_name_l, item_missing_stat_l, data_missing_number_l, r_stat_l, c_stat_l, data_len_l, select_dim): # 用于汇总行和列每个站点数据集有多少出连续缺失 r_stat_sum_l = [] c_stat_sum_l = [] # 用于汇总行和列每个站点数据集有多少数据点是连续缺失 c_mp_num_l = [] # 表示一个站点数据集总的缺失率 fig, ax = plt.subplots() y = [ data_missing_number_l[i] / (data_len_l[i] * len(select_dim)) * 100 for i in range(len(dataset_name_l)) ] ax.plot(dataset_name_l, y) ax.tick_params(labelsize=13) ax.set_xlabel('Statioin', size=13) ax.set_ylabel('Missing rate(%)', size=13) # ax.set_title('Missing rate of each station') save_path = 'E:/zx/Fed-AQ/experiments_results/Figures/Dataset_overview_v2/' # save_path = save_path + dataset_name + '/' mkdir(save_path) plt.savefig(save_path + 'missing_rate' + '.eps') plt.savefig(save_path + 'missing_rate' '.svg') # 保存到本地json save2json(save_path + 'missing_rate.json', y, dataset_name_l) # 每个污染物成分的缺失分析 plt.style.use('ggplot') fig1, ax1 = plt.subplots() for dataset_name, item_missing_stat, data_len in zip( dataset_name_l, item_missing_stat_l, data_len_l): y = [item_missing_stat[d] / data_len * 100 for d in select_dim] ax1.plot(select_dim, y, label=dataset_name) # 保存到本地json save2json(save_path + dataset_name + '_pollutants.json', y, select_dim) ax1.tick_params(labelsize=13) ax1.set_xlabel('Pollutants', size=13) ax1.set_ylabel('Missing rate(%)', size=13) # ax1.set_title('Missing rate of each pollutant') ax1.legend() # save_path = 'E:/zx/Fed-AQ/experiments_results/Figures/Dataset_overview/' # save_path = save_path + dataset_name + '/' # mkdir(save_path) plt.savefig(save_path + 'pollutants_stat' + '.eps') plt.savefig(save_path + 'pollutants_stat' '.svg') # 行缺失 fig2, ax2 = plt.subplots() for i, dataset_name, r_stat in zip(range(len(dataset_name_l)), dataset_name_l, r_stat_l): # bar graphs y = [v for v in r_stat.values()] x = [k for k in r_stat.keys()] y = np.array(y) x = np.array(x) width = 0.25 ax2.bar(x + width * i, y, width, label=dataset_name) ax2.tick_params(labelsize=13) ax2.set_xlabel('Gap lengths', size=13) ax2.set_ylabel('Number', size=13) # ax2.set_title(' Row-wise area') ax2.set_xticks(x + width * i / 2) ax2.set_xticklabels(x) ax2.legend() r_stat_sum = np.sum(y) # 总共多少个行缺失 r_stat_sum_l.append(r_stat_sum) save_name = save_path + '/Row-wise-missing-data-stat' plt.savefig(save_name + '.eps') plt.savefig(save_name + '.svg') # 列缺失 fig3, ax3 = plt.subplots() for i, dataset_name, c_stat in zip(range(len(dataset_name_l)), dataset_name_l, c_stat_l): # bar graphs # y = [v for v in c_stat.values()] # x = [k for k in c_stat.keys()] x_split = ['2', '3', '4', '5', '6-15', '16-30', '>30'] y_split = [0 for _ in range(len(x_split))] # 对列缺失进行段落划分,否则统计显示很不方便 c_missing_num = 0 for x_, y_ in zip(c_stat.keys(), c_stat.values()): c_missing_num += x_ * y_ if x_ == 2: y_split[0] += y_ if x_ == 3: y_split[1] += y_ if x_ == 4: y_split[2] += y_ if x_ == 5: y_split[3] += y_ if 5 < x_ <= 15: y_split[4] += y_ if 15 < x_ <= 30: y_split[5] += y_ if 30 < x_: y_split[6] += y_ y = np.array(y_split) x = np.array(np.arange(len(x_split))) width = 0.25 ax3.bar(x + width * i, y, width, label=dataset_name) ax3.tick_params(labelsize=13) ax3.set_xlabel('Gap lengths', size=13) ax3.set_ylabel('Number', size=13) # ax3.set_title(' Column-wise area') ax3.set_xticks(x + width * i / 2) ax3.set_xticklabels(x_split) plt.xticks(rotation=-15) # 设置x轴标签旋转角度 plt.tick_params(axis='x', labelsize=10) # 设置x轴标签大小 ax3.legend() c_stat_sum = np.sum(y) # 总共多少个列缺失 c_stat_sum_l.append(c_stat_sum) c_mp_num_l.append(c_missing_num) save_name = save_path + '/Column-wise-missing-data-stat' plt.savefig(save_name + '.eps') plt.savefig(save_name + '.svg') # 行和列缺失的总的个数 fig4, ax4 = plt.subplots() for i, dataset_name, r_stat_sum, c_stat_sum in zip( range(len(dataset_name_l)), dataset_name_l, r_stat_sum_l, c_stat_sum_l): y = [r_stat_sum, c_stat_sum] x_ticklable = ['row', 'column'] x = np.arange(len(x_ticklable)) width = 0.25 ax4.bar(x + width * i, y, width, label=dataset_name) ax4.tick_params(labelsize=13) ax4.set_ylabel('Number', size=13) # ax4.set_title('Numbers of missing row and column') ax4.set_xticks(x + width * i / 2) ax4.set_xticklabels(x_ticklable) ax4.legend() save_name = save_path + '/Sum-missing-data-stat' plt.savefig(save_name + '.eps') plt.savefig(save_name + '.svg') # 行和列缺失的总的占数据集的比例 fig5, ax5 = plt.subplots() for i, dataset_name, r_stat_sum, c_missing_num, data_len in zip( range(len(dataset_name_l)), dataset_name_l, r_stat_sum_l, c_mp_num_l, data_len_l): y = [ r_stat_sum / data_len * 100, c_missing_num / (data_len * len(select_dim)) * 100 ] x_ticklable = ['row', 'column'] x = np.arange(len(x_ticklable)) width = 0.25 ax5.plot(x_ticklable, y, label=dataset_name) # 保存到本地json save2json(save_path + dataset_name + '_rc.json', y, x_ticklable) ax5.tick_params(labelsize=13) ax5.set_ylabel('Ratio(%)', size=13) # ax5.set_title('Ratio of missing data in row and column gaps') # ax5.set_xticks(x + width * i / 2) # ax5.set_xticklabels(x_ticklable) ax5.legend() save_name = save_path + '/Ratio-of-missing-data-stat_for_r&c' plt.savefig(save_name + '.eps') plt.savefig(save_name + '.svg')
# dataset_number = '30' # exp_name = 'EM_6_dn({})'.format(dataset_number) for param in params_test_list: print('** {} params test: {} **'.format(test_param_name, param)) # dataset_number = 'one_mi_v1((A{})_1r)'.format(param) dataset_number = 'one_mi((A5_A20_A30)_111)' exp_name = 'EM_{}_lastest_1'.format(dataset_number) result_save_file = './{}/'.format(results_saved_file) + exp_name + '/' # 用于统计各种指标,建立相对应的文件夹 for ind in indicator_list: test_result_save_path = result_save_file + ind + '/' mkdir(test_result_save_path) for i in range(cross_validation_sets_num): print('============= Start training at datasets {} =============='. format(i)) # 用于统计各种指标,建立相对应的文件夹 # result_save_file = './{}/'.format(results_saved_file) + exp_name + '/datasets_{}/'.format(i) # plots_file = './plot_results/' + exp_name + '/datasets_{}/'.format(i) # 当前数据集 args.dataset_path = './constructed_datasets/{}/{}/'.format( dataset_number, i) # 载入数据 station_datasets = get_saved_datasets(args) rmse_on_test_results = []
'G_hidden_n': args.G_hidden_dim, 'D_hidden_n': args.D_hidden_dim, 'activate_function': 'ReLU', 'optimizer': 'Adam', 'idpt_d_lr': args.d_lr, 'idpt_g_lr': args.g_lr, 'lr_decay': args.g_lr_decay, 'decay_step': args.g_lr_decay_step, 'clip_value': args.clip_value, 'p_hint': args.p_hint, 'alpha': args.alpha } # 存储主文件路径 result_save_main_file = './{}/'.format(results_saved_file) + exp_name + '/' mkdir(result_save_main_file) # 保存参数配置 params_save_name = result_save_main_file + 'params_settings.json' with open(params_save_name, 'w+') as jsonf: json.dump(ex_params_settings, jsonf) for i in range(cross_validation_sets): print('============= Start training at datasets {} =============='.format(i)) # 用于统计各种指标,建立相对应的文件夹 result_save_file = './{}/'.format(results_saved_file) + exp_name + '/datasets_{}/'.format(i) for index in indicator_list: for model_name in model_name_list: test_result_save_path = result_save_file + index + '/' + model_name mkdir(test_result_save_path)
'optimizer': 'Adam', 'fed_d_lr': args.fed_d_lr, 'fed_g_lr': args.fed_g_lr, 'idpt_d_lr': args.d_lr, 'idpt_g_lr': args.g_lr, 'lr_decay': args.g_lr_decay, 'decay_step': args.g_lr_decay_step, 'clip_value': args.clip_value, 'p_hint': args.p_hint, 'alpha': args.alpha } # 存储主文件路径 result_save_main_file = './{}/'.format( results_saved_file) + exp_name + '/' mkdir(result_save_main_file) # 保存参数配置 params_save_name = result_save_main_file + 'params_settings.json' with open(params_save_name, 'w+') as jsonf: json.dump(ex_params_settings, jsonf) for i in range(cross_validation_sets): print( '\033[1;32m ============= Start training at datasets {} ==============\033[0m' .format(i)) # 用于统计各种指标,建立相对应的文件夹 result_save_file = './{}/'.format( results_saved_file) + exp_name + '/datasets_{}/'.format(i) for index in indicator_list:
def run_multi_cross_validation_datasets_test(): """ 用于计算每个数据集的学习训练测试结果的总学习测试结果的平均 :return: """ args = args_parser() # 做实验 exp_total_time = 10 cross_validation_sets = 5 exp_name = 'FedWeightAvg(soft)_6_dn(15)_100_32(0.001_0.001_bs_128)_32(phint_0.95)' indicator_list = ['rmse', 'd2', 'r2', 'all_rmse'] # for i in range(cross_validation_sets): # print('============= Start training at datasets {} =============='.format(i)) # # 用于统计各种指标,建立相对应的文件夹 # result_save_file = './results_v2/' + exp_name + '/datasets_{}/'.format(i) # plots_file = './plot_results/' + exp_name + '/datasets_{}/'.format(i) # for exp_t in range(exp_total_time): # # 当前数据集 # args.dataset_path = './constructed_datasets_6_dn(5)/{}/'.format(i) # fed_gain_test_exp(args, result_save_file, i) # print('>>> Finished training & testing!') result_save_root = './results_v3/' + exp_name + '/' plots_save_root = './plot_results/' + exp_name + '/' indicator_name = 'all_rmse' leg = ['Fed', 'Independent'] ''' # 建立保存结果的文件夹 indicator_results_csv_save_fpth = result_save_root + indicator_name + '/' for mode in leg: mkdir(indicator_results_csv_save_fpth+mode+'/') # 计算每个数据集的几次实验的均值 for c in range(cross_validation_sets): results_logdir = [result_save_root + 'datasets_' + str(c) + '/' + indicator_name + '/' + model_name + '/' for model_name in ['fed', 'idpt']] compute_avg_of_data_in_file(args, c, results_logdir, indicator_results_csv_save_fpth, indicator_name) ''' # 绘制测试结果图像 print( '====================== Save every component indicator result ========================' ) print('{} results'.format(indicator_name)) results_logdir = [ result_save_root + indicator_name + '/' + model_name + '/' for model_name in leg ] fig_save_path = plots_save_root + indicator_name + '/' csv_save_fpth = result_save_root + indicator_name + '/' mkdir(fig_save_path) # 计算几次实验的最后的平均值并画出均值和方差曲线图 plot_indicator_avg_results_m(results_logdir, fig_save_path, 'station', indicator_name, csv_save_fpth) # plot_indicator_results(results_logdir, fig_save_path, indicator_name) print(">>> Finished save resluts figures!")
def plot_all_algorithm_indicator_results(args, indicator_list, results_file_saved_path, fig_save_file='', leg=None): """ 用于绘制所有算法的指标结果图 :param args: :param indicator_list: :param results_file_saved_path: :param fig_save_file: :param leg: :return: """ data = None if leg is None: leg = ['Fed', 'Independent'] # 用于图像保存 if fig_save_file == '': fig_save_fpath = results_file_saved_path + 'All_indicator_avg_results/' else: fig_save_fpath = fig_save_file + 'All_indicator_avg_results/' csv_save_fpath = results_file_saved_path + 'All_indicator_avg_results/' mkdir(fig_save_fpath) # 新建文件夹用于保存图片 mkdir(csv_save_fpath) # 新建文件夹用于保存数据 # 绘制测试结果图像 print( '====================== Save every component indicator result ========================' ) for indicator in indicator_list: print('{} results'.format(indicator)) if indicator != 'all_rmse': _results_logdir = [ results_file_saved_path + indicator + '/' + model_name + '/' for model_name in ['fed', 'idpt', 'em'] ] datas = get_all_datasets(_results_logdir, leg) if isinstance(datas, list): data = pd.concat(datas) unit_sets = data['Unit'].values.tolist() unit_sets = set(unit_sets) indicator_avg_list = [] for mode in leg: avg_t = 0 for u in unit_sets: fed_avg_data = data[data.Condition == mode] fed_avg_data = fed_avg_data[fed_avg_data.Unit == u][ args.select_dim].values avg_t += fed_avg_data indicator_avg = avg_t / len(unit_sets) indicator_avg_list.append(indicator_avg) # 保存到本地 fed_save_csv_pt = csv_save_fpath + mode + '_' + indicator + '_avg_resluts.csv' save_all_avg_results(fed_save_csv_pt, indicator_avg, args.select_dim, args.selected_stations) print('***** ' + mode + ' avg: ', indicator_avg) c = len(args.select_dim) r = len(args.selected_stations) x = [h for h in range(r)] # plot style fig, axs = plt.subplots(2, 3, constrained_layout=True) for i in range(c): for j in range(len(indicator_avg_list)): axs[i // 3, i % 3].plot(x, indicator_avg_list[j][:, i], label=leg[j]) axs[i // 3, i % 3].set_xlabel('Station') axs[i // 3, i % 3].set_ylabel(indicator) axs[i // 3, i % 3].set_title(args.select_dim[i]) # if indicator == 'rmse': # axs[i // 3, i % 3].set_ylim(0, 0.1) # else: # axs[i // 3, i % 3].set_ylim(0, 1.0) # axs[i // 3, i % 3].legend(loc='upper right', fontsize=8) plt.legend(loc='upper right', fontsize=8) save_path = fig_save_fpath + indicator + '_avg_resluts.svg' plt.savefig(save_path) # plot_indicator_results(results_logdir, fig_save_path, component) print('') elif indicator == 'all_rmse': # 计算all_rmse值,并保存到表格 _results_logdir = [results_file_saved_path + 'all_rmse/'] plot_indicator_results(_results_logdir, fig_save_fpath, 'all_rmse', xaxis='station', leg=leg) plt.clf() plt.close() return True
# dataset_number = 'one_mi((A5_B10_E15)_111)' dataset_name = '(A5_A10_A15)_nCO_532r_One_time' # dataset_name = '(1P10_2P20_3P30)_532r_One_time' for param in params_test_list: print('** {} params test: {} **'.format(test_param_name, param)) if training_model == 'Many_time': dataset_name = Dname_prefix.format(param, param, param) + '_' + Dname_suffix # dataset_name = 'one_mi_v1((A{})_1r_v3)'.format(param) exp_name = 'C_Test_{}_FedWGAI_T1'.format(dataset_name) # 存储主文件路径 result_save_main_file = './{}/'.format( results_saved_file) + exp_name + '/' mkdir(result_save_main_file) for i in range(cross_validation_sets): print('============= Start training at datasets {} =============='. format(i)) # 用于统计各种指标,建立相对应的文件夹 result_save_file = './{}/'.format( results_saved_file) + exp_name + '/datasets_{}/'.format(i) for index in indicator_list: for model_name in ['fed', 'idpt']: test_result_save_path = result_save_file + index + '/' + model_name mkdir(test_result_save_path) for exp_t in range(exp_total_time): # 当前数据集
format(i)) args.dataset_path = './constructed_datasets/{}/{}/'.format( dataset_name, i) station_datasets = get_saved_datasets(args) # 用于统计各种指标,建立相对应的文件夹 result_save_file = './{}/'.format( results_saved_file) + exp_name + '/datasets_{}/'.format(i) # result_save_file = './{}/'.format(results_saved_file) + exp_name + '/' # 用于统计各种指标,建立相对应的文件夹 for index in indicator_list: test_result_save_path = result_save_file + index + '/' mkdir(test_result_save_path) for exp_t in range(exp_total_time): all_rmse_on_test_results = [] # 用于存储每个数据集测试结果 # 对每个数据集的每个站点数据进行计算 for dataset, station in zip(station_datasets, args.selected_stations): # r = Low_rank_completion(dataset, l=1, mu=0.02, initial=0) # LRC method # Linear, Spline, Cubic, MultiBayesian, RandomTrees if method_name == 'Linear': a_rmse = pandas_linear_methods(dataset) # Linear elif method_name == 'Spline': a_rmse = pandas_spline_method(dataset) # Spline elif method_name == 'KNN': a_rmse = pandas_nearest_method(dataset) # KNN elif method_name == 'Cubic':