def GAIN_main(args, save_path=''): # parse args args.device = torch.device( 'cuda' if torch.cuda.is_available() and args.gpu != -1 else 'cpu') data_dim = len(args.select_dim) # 真实数据维度,即生成器输出 input_dim = args.input_dim # 输入随机向量维度 G_H_Dim = args.G_hidden_dim # 设置G隐藏层的网络单元数量 D_H_Dim = args.G_hidden_dim # 设置D隐藏层的网络单元数量 # 选择的站点 stations = args.selected_stations # 载入数据并进行构造 # 载入所有选定站点的数据集 all_station_datasets = get_saved_datasets(args) # all_station_datasets = get_saved_datasets_vall(args) # 所有参与方进行独立的本地训练 print('Start independent training!\n') # 用于记录indpendent local usr测试集结果 num_p = len(args.selected_stations) num_d = args.num_d for p, d, dataset, station in zip(range(num_p), num_d, all_station_datasets, stations): # 建立GAIN model # 新建生成器网络G G = Generator(input_dim, G_H_Dim, data_dim).to(args.device) # 新建判别器网络D D = Discriminator(data_dim, D_H_Dim).to(args.device) print('Generater network :\n', G) print('Discriminator network :\n', D) G.apply(weights_init) D.apply(weights_init) station_name = station + '{}{}'.format(p, d) local = LocalUpdate(args=args) local_g = local.independent_training(G, D, dataset, station_name, save_path) # 清除之前独立学习的主循环所占用的显存空间 torch.cuda.empty_cache() plt.cla() # 清除之前绘图 plt.close()
for ind in indicator_list: test_result_save_path = result_save_file + ind + '/' mkdir(test_result_save_path) for i in range(cross_validation_sets_num): print('============= Start training at datasets {} =============='. format(i)) # 用于统计各种指标,建立相对应的文件夹 # result_save_file = './{}/'.format(results_saved_file) + exp_name + '/datasets_{}/'.format(i) # plots_file = './plot_results/' + exp_name + '/datasets_{}/'.format(i) # 当前数据集 args.dataset_path = './constructed_datasets/{}/{}/'.format( dataset_number, i) # 载入数据 station_datasets = get_saved_datasets(args) rmse_on_test_results = [] d2_on_test_results = [] r2_on_test_results = [] all_rmse_on_test_results = [] for dataset, station in zip(station_datasets, args.selected_stations): r, rmse, d2, r2, all_rmse = impute_em( dataset, result_save_file, ) impute_r = r['X_imputed'] source_x = r['X'] rmse_on_test_results.append(rmse) d2_on_test_results.append(d2)
def gain_test_exp(args, save_path='', exp_num=None): if args.gan_categories == 'wGAN': from WAGIN_model import Generator, Discriminator else: from GAIN_model import Generator, Discriminator # parse args # args = args_parser() args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") data_dim = len(args.select_dim) # 真实数据维度,即生成器输出 input_dim = args.input_dim # 输入随机向量维度 G_H_Dim = args.G_hidden_dim # 设置G隐藏层的网络单元数量 D_H_Dim = args.G_hidden_dim # 设置D隐藏层的网络单元数量 # 选择的站点 stations = args.selected_stations # 载入数据并进行构造 # 载入所有选定站点的数据集 all_station_datasets = get_saved_datasets(args) # all_station_datasets = get_saved_datasets_vall(args) # 建立GAIN model # 新建生成器网络G G = Generator(input_dim, G_H_Dim, data_dim).to(args.device) # 新建判别器网络D D = Discriminator(data_dim, D_H_Dim).to(args.device) # 模型存储文件夹 # model_file = save_path.split('/')[2] model_file = save_path.split('/') model_file = model_file[2] + '/' + model_file[3] if args.independent_usrs_training: # 建立每个客户的独立个体 independent_usrs_G = [] for i, s in enumerate(args.selected_stations): # 为每个站点新建一个独立的个体网络,载入本地模型 independent_G = deepcopy(G) independent_D = deepcopy(D) # 载入本地的各站点的独立模型 station_name = s + '{}{}'.format(i, args.num_d[i]) independent_G, _ = load_model(independent_G, independent_D, 'idpt', station_name, model_file) independent_usrs_G.append(independent_G) # 各站点的独立模型,testing 在测试集进行评估 print('\033[0;31;40m[Eval] Independent station Test \033[0m') idpt_mse_on_test_results = [] idpt_d2_on_test_results = [] idpt_r2_on_test_results = [] idpt_all_rmse_on_test_results = [] for idpt_g, dataset, station in zip(independent_usrs_G, all_station_datasets, stations): mse, d2, p2, all_rmse = fed_test(idpt_g, input_dim, dataset, station, args.select_dim) idpt_mse_on_test_results.append(mse) idpt_d2_on_test_results.append(d2) idpt_r2_on_test_results.append(p2) idpt_all_rmse_on_test_results.append(all_rmse) print( '\033[0;32;40m [Eval] - Station {} complete independent evaluation! \033[0m\n' .format(station)) idpt_save_csv_pt = save_path + 'rmse/idpt/idpt_rmse_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_mse_on_test_results, stations, 'MSE') save2csv(idpt_save_csv_pt, idpt_mse_on_test_results, stations, args.select_dim) idpt_save_csv_pt = save_path + 'd2/idpt/idpt_d2_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_d2_on_test_results, stations, 'D2') save2csv(idpt_save_csv_pt, idpt_d2_on_test_results, stations, args.select_dim) idpt_save_csv_pt = save_path + 'r2/idpt/idpt_r2_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_r2_on_test_results, stations, 'R2') save2csv(idpt_save_csv_pt, idpt_r2_on_test_results, stations, args.select_dim) # 保存all-rsme fed_save_csv_pt = save_path + 'all_rmse/idpt/idpt_all_rmse_test_results_' + str( exp_num) + '.csv' save_as_csv(fed_save_csv_pt, idpt_all_rmse_on_test_results, 'all_rmse') print( '\033[0;33;40m >>> [Eval] Finished Idpt model evaluate on Test datasets! \033[0m\n' )
def fed_main(args, save_path=''): if args.gan_categories == 'wGAN': # W-GAN from WAGIN_model import Generator, Discriminator, weights_init from WGAN_LocalUpdate import GANUpdate as LocalUpdate else: # 原始的GAN from Update import LocalUpdate from GAIN_model import Generator, Discriminator, weights_init # parse args args.device = torch.device( torch.device('cuda') if torch.cuda.is_available() and args.gpu != -1 else 'cpu') data_dim = len(args.select_dim) # 真实数据维度,即生成器输出 input_dim = args.input_dim # 输入随机向量维度 G_H_Dim = args.G_hidden_dim # 设置G隐藏层的网络单元数量 D_H_Dim = args.G_hidden_dim # 设置D隐藏层的网络单元数量 # 选择的站点 stations = args.selected_stations # 载入数据并进行构造 # 载入所有选定站点的数据集 all_station_datasets = get_saved_datasets(args) # 建立GAIN model # 新建生成器网络G G = Generator(input_dim, G_H_Dim, data_dim).to(args.device) # 新建判别器网络D D = Discriminator(data_dim, D_H_Dim).to(args.device) G.apply(weights_init) D.apply(weights_init) # 建立每个客户的独立个体 independent_usrs_G = [] independent_usrs_D = [] for _ in range(len(stations)): # 为每个站点新建一个独立的个体网络 independent_G = deepcopy(G) independent_D = deepcopy(D) independent_usrs_G.append(independent_G) independent_usrs_D.append(independent_D) print('Generater network :\n', G) print('Discriminator network :\n', D) G.train() # 生成网络 D.train() # 辨别网络 # 拷贝网络参数 g_w_glob = G.state_dict() d_w_glob = D.state_dict() # training # 用于记录训练过程数据 g_loss_train, d_loss_train = [], [] g_mse_train_loss_avg_l = [] g_mse_test_loss_avg_l = [] # 所有参与方进行独立的本地训练 if args.independent_usrs_training: print('Start independent training!\n') # 用于记录indpendent local usr测试集结果 local_mse_test_l = [] num_p = len(args.selected_stations) num_d = args.num_d for p, d, idp_G, idp_D, dataset, station in zip( range(num_p), num_d, independent_usrs_G, independent_usrs_D, all_station_datasets, stations): # 针对load_dataset_v2,对不同参与方,每个参与方都有自己的站点数据添加 if type(station) == list: station = 'P' station_name = station + '{}{}'.format(p, d) local = LocalUpdate(args=args) local_g = local.independent_training(idp_G, idp_D, dataset, station_name, save_path) # 清除之前独立学习的主循环所占用的显存空间 torch.cuda.empty_cache() # 写入训练过程数据 fw_name = save_path + 'Fed_main_training_' + 'log.txt' fw_fed_main = open(fw_name, 'w+') fw_fed_main.write( 'iter\t G_loss\t D_loss\t G_train_MSE_loss\t G_test_RMSE_loss\t \n') # 联邦学习主循环 with tqdm(range(args.epochs)) as tq: for iter in tq: # 暂时取消self.args.local_ep * tq.set_description('Federated Updating') g_w_locals, g_loss_locals, d_w_locals, d_loss_locals = [], [], [], [] usrs_weights = [] local_g_mse_train_loss, local_g_rmse_test_loss = [], [] # 用于随机抽取指定数量的参与者加入联邦学习 # m = max(int(args.frac * args.num_users), 1) # idxs_users = np.random.choice(range(args.num_users), m, replace=False) idxs_users = range(len(stations)) # 手动选定参与者 for idx in idxs_users: local = LocalUpdate(args=args, idx=idx) w_g, w_d, g_loss, d_loss, g_mse_train_loss, g_rmse_test_loss, train_no = local.train( G=copy.deepcopy(G).to(args.device), D=copy.deepcopy(D).to(args.device), dataset=all_station_datasets[idx], ) # 记录weights g_w_locals.append(copy.deepcopy(w_g)) d_w_locals.append(copy.deepcopy(w_d)) # 记录参与方的模型参数权重 usrs_weights.append(train_no) # 记录loss g_loss_locals.append(g_loss) d_loss_locals.append(d_loss) # 记录G MSE lossr local_g_mse_train_loss.append(g_mse_train_loss) local_g_rmse_test_loss.append(g_rmse_test_loss) # 使用联邦学习算法更新全局权重 if args.weights_avg: if args.wa_type == 'missing_ratio': w = norm(args.missing_ratios, if_verse=True) elif args.wa_type == 'missing_number': w = norm(usrs_weights) g_w_glob = FedWeightedAvg(g_w_locals, w, use_soft=True) d_w_glob = FedWeightedAvg(d_w_locals, w, use_soft=True) else: g_w_glob = FedAvg(g_w_locals) d_w_glob = FedAvg(d_w_locals) # 全局模型载入联邦平均化之后的模型参数 G.load_state_dict(g_w_glob) D.load_state_dict(d_w_glob) # 学习率衰减 if iter + 1 % args.d_lr_decay_step == 0: d_lr = d_lr * args.d_lr_decay if iter + 1 % args.fed_g_lr_decay_step == 0: g_lr = g_lr * args.fed_g_lr_decay # 打印训练过程的loss g_loss_avg = sum(g_loss_locals) / len(g_loss_locals) d_loss_avg = sum(d_loss_locals) / len(d_loss_locals) g_mse_train_loss_avg = sum(local_g_mse_train_loss) / len( local_g_mse_train_loss) g_rmse_test_loss_avg = sum(local_g_rmse_test_loss) / len( local_g_rmse_test_loss) g_mse_train_loss_avg_l.append(g_mse_train_loss_avg) g_mse_test_loss_avg_l.append(g_rmse_test_loss_avg) # print('Fed Main Loop Round {:3d}, Average G loss {:.3f}, Average D loss {:.3f}'.format(iter, g_loss_avg, # d_loss_avg)) # print('Train_MSE: {:.4}'.format(g_mse_train_loss_avg.item())) # print('Test_RMSE: {:.4}'.format(g_rmse_test_loss_avg.item())) tq.set_postfix(Avg_G_loss=g_loss_avg.item(), Avg_D_loss=d_loss_avg.item(), Fed_train_MSE=g_mse_train_loss_avg.item(), Fed_test_RMSE=g_rmse_test_loss_avg.item()) g_loss_train.append(g_loss_avg) d_loss_train.append(d_loss_avg) # 保存模型 if iter % 5 == 0: # save_model_file = save_path.split('/')[2] file_name = save_path.split('/') save_model_file = file_name[2] + '/' + file_name[3] save_model(G, D, save_model_file) fw_fed_main.write( '{}\t {:.5f}\t {:.5f}\t {:.5f}\t {:.5f}\t \n'.format( iter, g_loss_avg, d_loss_avg, g_mse_train_loss_avg, g_rmse_test_loss_avg)) # 绘制曲线 fig, axs = plt.subplots(nrows=2, ncols=2, constrained_layout=True) loss_plot(axs[0, 0], g_loss_train, 'G train loss') loss_plot(axs[0, 1], d_loss_train, 'D train loss') loss_plot(axs[1, 0], g_mse_train_loss_avg_l, 'G MSE training loss') loss_plot(axs[1, 1], g_mse_test_loss_avg_l, 'RMSE on training dataset') plt.savefig(save_path + 'fed_{}.eps'.format(args.epochs)) plt.savefig(save_path + 'fed_{}.png'.format(args.epochs)) # 关闭写入 fw_fed_main.close() plt.cla() # 清除之前绘图 plt.close() # 清空GPU缓存 torch.cuda.empty_cache()
def fed_gain_test_exp(args, save_path='', exp_num=None): if args.gan_categories == 'wGAN': from WAGIN_model import Generator, Discriminator else: from GAIN_model import Generator, Discriminator # parse args # args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') data_dim = len(args.select_dim) # 真实数据维度,即生成器输出 input_dim = args.input_dim # 输入随机向量维度 G_H_Dim = args.G_hidden_dim # 设置G隐藏层的网络单元数量 D_H_Dim = args.G_hidden_dim # 设置D隐藏层的网络单元数量 # 选择的站点sss stations = args.selected_stations # 针对load_dataset_v2,对不同参与方,每个参与方都有自己的站点数据添加 if type(stations) == list: s_name = [] for si in range(len(stations)): s_name.append('P' + str(si)) stations = s_name # 载入数据并进行构造 # 载入所有选定站点的数据集 all_station_datasets = get_saved_datasets(args) # 建立GAIN model # 新建生成器网络G G = Generator(input_dim, G_H_Dim, data_dim).to(args.device) # 新建判别器网络D D = Discriminator(data_dim, D_H_Dim).to(args.device) # 模型存储文件夹 # model_file = save_path.split('/')[2] model_file = save_path.split('/') model_file = model_file[2] + '/' + model_file[3] if args.independent_usrs_training: # 建立每个客户的独立个体 independent_usrs_G = [] independent_usrs_D = [] for i, s in enumerate(args.selected_stations): # 为每个站点新建一个独立的个体网络,载入本地模型 independent_G = deepcopy(G) independent_D = deepcopy(D) # 载入本地的各站点的独立模型 if type(s) == list: s = 'P' station_name = s + '{}{}'.format(i, args.num_d[i]) load_model(independent_G, independent_D, 'idpt', station_name, model_file) independent_usrs_G.append(independent_G) independent_usrs_D.append(independent_D) # 各站点的独立模型,testing 在测试集进行评估 print( '============== Independent station Test =======================') idpt_mse_on_test_results = [] idpt_d2_on_test_results = [] idpt_r2_on_test_results = [] idpt_all_rmse_on_test_results = [] for idpt_g, dataset, station in zip(independent_usrs_G, all_station_datasets, stations): mse, d2, p2, all_rmse = fed_test(idpt_g, input_dim, dataset, station, args.select_dim) idpt_mse_on_test_results.append(mse) idpt_d2_on_test_results.append(d2) idpt_r2_on_test_results.append(p2) idpt_all_rmse_on_test_results.append(all_rmse) print('[Idpt eval]- Station {} complete independent evaluation!\n'. format(station)) idpt_save_csv_pt = save_path + 'rmse/idpt/idpt_mse_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_mse_on_test_results, stations, 'MSE') save2csv(idpt_save_csv_pt, idpt_mse_on_test_results, stations, args.select_dim) idpt_save_csv_pt = save_path + 'd2/idpt/idpt_d2_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_d2_on_test_results, stations, 'D2') save2csv(idpt_save_csv_pt, idpt_d2_on_test_results, stations, args.select_dim) idpt_save_csv_pt = save_path + 'r2/idpt/idpt_r2_test_results_' + str( exp_num) + '.csv' # save_as_csv(idpt_save_csv_pt, idpt_r2_on_test_results, stations, 'R2') save2csv(idpt_save_csv_pt, idpt_r2_on_test_results, stations, args.select_dim) # 保存all-rsme fed_save_csv_pt = save_path + 'all_rmse/idpt/idpt_all_rmse_test_results_' + str( exp_num) + '.csv' save_as_csv(fed_save_csv_pt, idpt_all_rmse_on_test_results, 'all_rmse') print( '[Idpt eval]>>> Finished Idpt model evaluate on Test datasets!\n') # 载入本地联邦模型参数 load_model(G, D, 'fed', station='', save_file=model_file) # testing 在测试集进行评估 print( '[Fed eval]===================== Federated station Test =======================' ) fed_mse_on_test_results = [] fed_d2_on_test_results = [] fed_r2_on_test_results = [] all_rmse_on_test_results = [] for dataset, station in zip(all_station_datasets, stations): mse, d2, p2, all_rmse = fed_test(G, input_dim, dataset, station, args.select_dim) fed_mse_on_test_results.append(mse) fed_d2_on_test_results.append(d2) fed_r2_on_test_results.append(p2) all_rmse_on_test_results.append(all_rmse) print('[Fed Eval] Station {} complete federated evaluation!\n'.format( station)) # 保存到本地 fed_save_csv_pt = save_path + 'rmse/fed/fed_mse_test_results_' + str( exp_num) + '.csv' save2csv(fed_save_csv_pt, fed_mse_on_test_results, stations, args.select_dim) # save_as_csv(fed_save_csv_pt, fed_mse_on_test_results, stations, 'MSE') fed_save_csv_pt = save_path + 'd2/fed/fed_d2_test_results_' + str( exp_num) + '.csv' # save_as_csv(fed_save_csv_pt, fed_d2_on_test_results, stations, 'D2') save2csv(fed_save_csv_pt, fed_d2_on_test_results, stations, args.select_dim) fed_save_csv_pt = save_path + 'r2/fed/fed_r2_test_results_' + str( exp_num) + '.csv' # save_as_csv(fed_save_csv_pt, fed_r2_on_test_results, stations, 'R2') save2csv(fed_save_csv_pt, fed_r2_on_test_results, stations, args.select_dim) # 保存all-rsme fed_save_csv_pt = save_path + 'all_rmse/fed/fed_all_rmse_test_results_' + str( exp_num) + '.csv' save_as_csv(fed_save_csv_pt, all_rmse_on_test_results, 'all_rmse') print('>>>[Fed Eval] Finished Fed model evaluate on Test datasets!')