def multi_train_local_dif(q_l, q_w, arguemnts, idx, loss, local_train_loader, non_iid, model): #print("asdf11") print("training user: "******"ye22: " + str(non_iid[idx])) local = LocalUpdate(args=arguemnts, user_num=idx, loss_func=loss, dataset=local_train_loader.dataset, idxs=non_iid[idx]) #print("asdf22") w, loss = local.train(net=model.to(arguemnts.device)) #print("asdf33") #lock.acquire() q_l.put(loss) q_w.put(w) #lock.release() time.sleep(5)
def train(net_glob, db, w_glob, args): # training loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], [] # originally assign clients and Fed Avg -> mediator Fed Avg if args.all_clients: print("Aggregation over all clients") w_locals = [w_glob for i in range(len(db.dp.mediator))] # 3 : for each synchronization round r=1; 2; . . . ; R do for iter in range(args.epochs): # 4 : for each mediator m in 1; 2; . . . ; M parallelly do for i, mdt in enumerate(db.mediator): # 5- : loss_locals = [] if not args.all_clients: w_locals = [] need_index = [db.dp.local_train_index[k] for k in mdt] local = LocalUpdate(args=args, dataset=dp, idxs=np.hstack(need_index)) w, loss = local.train(net=copy.deepcopy(net_glob).to( args.device)) # for lEpoch in range(E): 在local.train完成 if args.all_clients: w_locals[i] = copy.deepcopy(w) else: w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) # print loss loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) # plot loss curve plt.figure() plt.plot(range(len(loss_train)), loss_train) plt.ylabel('train_loss') plt.savefig('./save/fed_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) return net_glob
def multi_train_local_dif(q_l, q_w, arguemnts, idx, loss, data_loader, distribution, model): print("training user: " + str(idx)) local = LocalUpdate(args=arguemnts, user_num=idx, loss_func=loss, dataset=data_loader.dataset, idxs=distribution[idx]) w, loss = local.train(net=model.to(arguemnts.device)) q_l.put(loss) q_w.put(w) time.sleep(5)
def run(rank, world_size, loss_train, acc_train, dataset_train, idxs_users, net_glob, grc): # net_glob.load_state_dict(torch.load('net_state_dict.pt')) if rank == 0: #compressor, epoch, dgc foldername = f'{args.compressor}epoch{args.epochs}ratio{args.gsr}' tb = SummaryWriter("runs/" + foldername) round = 0 for i in idxs_users: #for each epoch idx = dict_users[i[rank]] epoch_loss = torch.zeros(1) optimizer = torch.optim.SGD(net_glob.parameters(), lr=args.lr, momentum=args.momentum) local = LocalUpdate(args=args, dataset=dataset_train, idxs=idx) #create LocalUpdate class train_loss = local.train(net=net_glob) #train local for index, (name, parameter) in enumerate(net_glob.named_parameters()): grad = parameter.grad.data grc.acc(grad) new_tensor = grc.step(grad, name) grad.copy_(new_tensor) optimizer.step() net_glob.zero_grad() epoch_loss += train_loss dist.reduce(epoch_loss, 0, dist.ReduceOp.SUM) net_glob.eval() train_acc = torch.zeros(1) acc, loss = local.inference(net_glob, dataset_train, idx) train_acc += acc dist.reduce(train_acc, 0, dist.ReduceOp.SUM) if rank == 0: torch.save(net_glob.state_dict(), 'net_state_dict.pt') epoch_loss /= world_size train_acc /= world_size loss_train[round] = epoch_loss[0] acc_train[round] = train_acc[0] tb.add_scalar("Loss", epoch_loss[0], round) tb.add_scalar("Accuracy", train_acc[0], round) tb.add_scalar("Uncompressed Size", grc.uncompressed_size, round) tb.add_scalar("Compressed Size", grc.size, round) if round % 50 == 0: print('Round {:3d}, Rank {:1d}, Average loss {:.6f}, Average Accuracy {:.2f}%'.format(round, dist.get_rank(), epoch_loss[0], train_acc[0])) round+=1 if rank == 0: tb.close() print("Printing Compression Stats...") grc.printr()
def run(rank, world_size, loss_train, acc_train, epoch, dataset_train, idx, net_glob): net_glob.load_state_dict(torch.load('net_state_dict.pt')) dgc_trainer = DGC(model=net_glob, rank=rank, size=world_size, momentum=args.momentum, full_update_layers=[4], percentage=args.dgc) dgc_trainer.load_state_dict(torch.load('dgc_state_dict.pt')) epoch_loss = torch.zeros(1) for iter in range(args.local_ep): local = LocalUpdate(args=args, dataset=dataset_train, idxs=idx) #create LocalUpdate class b_loss = local.train(net=net_glob, world_size=world_size, rank=rank) #train local epoch_loss += b_loss if rank == 0: print("Local Epoch: {}, Local Epoch Loss: {}".format(iter, b_loss)) dgc_trainer.gradient_update() epoch_loss /= args.local_ep dist.reduce(epoch_loss, 0, dist.ReduceOp.SUM) net_glob.eval() train_acc = torch.zeros(1) local = LocalUpdate(args=args, dataset=dataset_train, idxs=idx) #create LocalUpdate class acc, loss = local.inference(net_glob, dataset_train, idx) train_acc += acc dist.reduce(train_acc, 0, dist.ReduceOp.SUM) if rank == 0: torch.save(net_glob.state_dict(), 'net_state_dict.pt') torch.save(dgc_trainer.state_dict(), 'dgc_state_dict.pt') epoch_loss /= world_size train_acc /= world_size loss_train[epoch] = epoch_loss[0] acc_train[epoch] = train_acc[0] print( 'Round {:3d}, Rank {:1d}, Average loss {:.6f}, Average Accuracy {:.2f}%' .format(epoch, dist.get_rank(), epoch_loss[0], train_acc[0]))
net_tmp = copy.deepcopy(net_glob) # 遍历簇内的每个用户 for user_key, user_val in _users.items(): idx_users.append(int(user_key)) # 该簇内的所有用户idx # print(idx_users) # shuffle the in-cluster sequential order and randomly select a CH random.shuffle(idx_users) # each cluster is performed parallel start_time = time.time() for idx in idx_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_tmp).to( args.device), stepsize=step_size) loss_local.append(copy.deepcopy(loss)) # 用相邻节点的 model初始化下一节点的 model net_tmp.load_state_dict(w) # 一个簇内的用户按 seq 方式训练完成后,记录每个簇参与上传的 model end_time = time.time() w_clusters.append(copy.deepcopy(w)) loss_clusters.append(sum(loss_local) / len(loss_local)) comp_time.append(end_time - start_time) loss_avg = sum(loss_clusters) / len(loss_clusters) # 对每个簇产生的 model进行Aggregation start_time = time.time() w_glob = Semi_FedAvg(w_clusters) end_time = time.time() fed_time = end_time - start_time
def mainFl( net_glob_mainFL: Any, dict_users_mainFL: Dict[int, Any], dict_labels_counter_mainFL, args, cost, dataset_train, dataset_test, small_shared_dataset ): """ Args: net_glob_mainFL (torch.nn.Module): global model dict_users_mainFL (Dict: dict_users_mainFL[idx_user]): dict contains users data indexes, to access one user's data index, write dict_users_mainFL[idx_user] dict_labels_counter_mainFL: dict contains each users's labels total number, we do not use it right now args: all args. You can look for details in utils/options.py. cost: An array contains cost of sending locally updated models from users to server. We do not use it right now. dataset_train (torch dataset): Total train data set. We need it for train part, as in dict_users, we just have index of data. dataset_test (torch dataset): Total test dataset. small_shared_dataset (torch dataset): The small shared dataset that we just use it here for tracking and comparing with our algorithm, not for decision making. Returns: float(loss_test_final_main): final loss test over main test dataset dict_workers_index: index of selected workers in each round, we need it for use in other algorithms Final_LargeDataSetTest_MainFL: A dict contains macroscopic data to be saved after each total FL process (each C) data_Global_main: A dict contains microscopic data to be saved after each total FL process (each round) """ data_Global_main = {"C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": []} Final_LargeDataSetTest_MainFL = {"C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": []} # saving index of workers dict_workers_index = defaultdict(list) n_k = np.zeros(shape=(args.num_users)) for i in range(len(dict_users_mainFL)): n_k[i] = len(dict_users_mainFL[i]) # print(n_k) # Main FL # contains average loss over each clients' loss loss_train_mainFL = [] # contains loss of Loss_local_each_global_total_mainFL = [] Accuracy_local_each_global_total_mainFL = [] # contains loss of each workers over small shared dataset in each round loss_workers_total_mainFL = np.zeros(shape=(args.num_users, args.epochs)) label_workers_mainFL = {i: np.array( [], dtype='int64') for i in range(args.num_users)} # validation_test_mainFed = [] acc_test, loss_test = test_img(net_glob_mainFL, dataset_test, args) workers_participation_main_fd = np.zeros((args.num_users, args.epochs)) workers_percent_main = [] net_glob_mainFL.eval() acc_test_final_mainFL, loss_test_final_mainFL = test_img( net_glob_mainFL, dataset_test, args) print("main fl initial loss is ", loss_test_final_mainFL) # while counter initialization iter_mainFL = 0 # assign index to each worker in workers_mainFL arr workers_mainFL = [] for i in range(args.num_users): workers_mainFL.append(i) temp_netglob_mainFL = copy.deepcopy(net_glob_mainFL) selected_clients_costs_total = [] total_rounds_mainFL = 0 pre_net_glob = copy.deepcopy(net_glob_mainFL) while iter_mainFL < (args.epochs): # print(f"iter {iter_mainFL} is started") selected_clients_costs_round = [] w_locals_mainFL, loss_locals_mainFL = [], [] m_mainFL = max(int(args.frac * args.num_users), 1) # selecting some clients randomly and save the index of them for use in other algorithms list_of_random_workers = random.sample(workers_mainFL, m_mainFL) # print("list of random workers is ", list_of_random_workers) for i in range(len(list_of_random_workers)): dict_workers_index[iter_mainFL].append(list_of_random_workers[i]) # calculating and saving initial loss of global model over small shared dataset for just record x_mainFL = copy.deepcopy(net_glob_mainFL) x_mainFL.eval() acc_test_global_mainFL, loss_test_global_mainFL = test_img( x_mainFL, small_shared_dataset, args) Loss_local_each_global_total_mainFL.append(loss_test_global_mainFL) Accuracy_local_each_global_total_mainFL.append(acc_test_global_mainFL) # print("loss global is ", loss_test_global_mainFL) # print("accuracy global is ", acc_test_global_mainFL) workers_count_mainFL = 0 for idx in list_of_random_workers: # start training each selected client # print("idx is ", idx) local_mainFL = LocalUpdate( args=args, dataset=dataset_train, idxs=dict_users_mainFL[idx]) w_mainFL, loss_mainFL = local_mainFL.train( net=copy.deepcopy(net_glob_mainFL).to(args.device)) # copy its updated weights w_locals_mainFL.append(copy.deepcopy(w_mainFL)) # copy the training loss of that client loss_locals_mainFL.append(loss_mainFL) temp_netglob_mainFL.load_state_dict(w_mainFL) # test the locally updated model over small shared dataset and save its loss and accuracy for record temp_netglob_mainFL.eval() acc_test_local_mainFL, loss_test_local_mainFL = test_img( temp_netglob_mainFL, small_shared_dataset, args) # print("client loss is ", loss_test_local_mainFL) # print("accuracy of client is ", acc_test_local_mainFL) # loss_workers_total_mainFL[idx, iter_mainFL] = acc_test_local_mainFL # saving how many times each client is participating for just record workers_participation_main_fd[idx][iter_mainFL] = 1 # saving total number of clients participated in that round (equal to C*N) workers_count_mainFL += 1 selected_clients_costs_round.append(cost[idx]) # Add others clients weights who did not participate # for i in range(args.num_users - len(list_of_random_workers)): # w_locals_mainFL.append(pre_weights.state_dict()) # update global weights # w_glob_mainFL = FedAvg(w_locals_mainFL) for n in range(args.num_users - m_mainFL): w_locals_mainFL.append(pre_net_glob.state_dict()) # NOTE: Updated weights (@author Nathaniel). w_glob_mainFL = fed_avg(w_locals_mainFL, n_k) # copy weight to net_glob net_glob_mainFL.load_state_dict(w_glob_mainFL) # print("after ", net_glob_mainFL) # calculating average training loss # print(loss_locals_mainFL) loss_avg_mainFL = sum(loss_locals_mainFL) / len(loss_locals_mainFL) loss_train_mainFL.append(loss_avg_mainFL) # print(loss_avg_mainFL) # calculating test loss and accuracy over main large test dataset acc_test_round_mainfed, loss_test_round_mainfed = test_img( net_glob_mainFL, dataset_test, args) validation_test_mainFed.append(acc_test_round_mainfed) workers_percent_main.append(workers_count_mainFL / args.num_users) # calculating accuracy and loss over small shared dataset acc_test_final_mainFL, loss_test_final_mainFL = test_img( net_glob_mainFL, dataset_test, args) data_Global_main["Round"].append(iter_mainFL) data_Global_main["C"].append(args.frac) data_Global_main["Average Loss Train"].append(float(loss_avg_mainFL)) data_Global_main["SDS Loss"].append(float(loss_test_global_mainFL)) data_Global_main["SDS Accuracy"].append(float(acc_test_global_mainFL)) data_Global_main["Workers Number"].append(float(workers_count_mainFL)) data_Global_main["Large Test Loss"].append( float(loss_test_final_mainFL)) data_Global_main["Large Test Accuracy"].append( float(acc_test_final_mainFL)) data_Global_main["Communication Cost"].append( sum(selected_clients_costs_round)) # TODO: This doesn't make sense? selected_clients_costs_total.append(sum(selected_clients_costs_round)) iter_mainFL += 1 # total_rounds_mainFL = iter_mainFL pre_net_glob = copy.deepcopy(net_glob_mainFL) # print(f"iter {iter_mainFL} is finished") # calculating the percentage of each workers participation workers_percent_final_mainFL = np.zeros(args.num_users) workers_name_mainFL = np.empty(args.num_users) for i in range(len(workers_participation_main_fd[:, 1])): workers_percent_final_mainFL[i] = sum( workers_participation_main_fd[i, :]) / args.epochs workers_name_mainFL[i] = i net_glob_mainFL.eval() # print("train test started") acc_train_final_main, loss_train_final_main = test_img( net_glob_mainFL, dataset_train, args) # print("train test finished") acc_test_final_main, loss_test_final_main = test_img( net_glob_mainFL, dataset_test, args) Final_LargeDataSetTest_MainFL["C"].append(args.frac) Final_LargeDataSetTest_MainFL["Test Loss"].append( float(loss_test_final_main)) Final_LargeDataSetTest_MainFL["Test Accuracy"].append( float(acc_test_final_main)) Final_LargeDataSetTest_MainFL["Train Loss"].append( float(loss_train_final_main)) Final_LargeDataSetTest_MainFL["Train Accuracy"].append( float(acc_train_final_main)) Final_LargeDataSetTest_MainFL["Communication Cost"].append( sum(selected_clients_costs_total)) Final_LargeDataSetTest_MainFL["Total Rounds"].append(args.epochs) return float(loss_test_final_main), dict_workers_index, Final_LargeDataSetTest_MainFL, data_Global_main
def run_all(clf_all1, clf_all2, adv_all1, adv_all2, adv_all3): # parse args args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load ICU dataset and split users # load ICU data set X, y, Z = load_ICU_data('../fairness-in-ml/data/adult.data') if not args.iid: X = X[:30000] y = y[:30000] Z = Z[:30000] n_points = X.shape[0] n_features = X.shape[1] n_sensitive = Z.shape[1] # split into train/test set (X_train, X_test, y_train, y_test, Z_train, Z_test) = train_test_split(X, y, Z, test_size=0.5, stratify=y, random_state=7) # standardize the data scaler = StandardScaler().fit(X_train) scale_df = lambda df, scaler: pd.DataFrame( scaler.transform(df), columns=df.columns, index=df.index) X_train = X_train.pipe(scale_df, scaler) X_test = X_test.pipe(scale_df, scaler) class PandasDataSet(TensorDataset): def __init__(self, *dataframes): tensors = (self._df_to_tensor(df) for df in dataframes) super(PandasDataSet, self).__init__(*tensors) def _df_to_tensor(self, df): if isinstance(df, pd.Series): df = df.to_frame('dummy') return torch.from_numpy(df.values).float() def _df_to_tensor(df): if isinstance(df, pd.Series): df = df.to_frame('dummy') return torch.from_numpy(df.values).float() train_data = PandasDataSet(X_train, y_train, Z_train) test_data = PandasDataSet(X_test, y_test, Z_test) print('# train samples:', len(train_data)) # 15470 print('# test samples:', len(test_data)) batch_size = 32 train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=True, drop_last=True) # sample users if args.iid: dict_users_train = fair_iid(train_data, args.num_users) dict_users_test = fair_iid(test_data, args.num_users) else: train_data = [ _df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train) ] test_data = [ _df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test) ] #import pdb; pdb.set_trace() dict_users_train, rand_set_all = fair_noniid(train_data, args.num_users, num_shards=100, num_imgs=150, train=True) dict_users_test, _ = fair_noniid(test_data, args.num_users, num_shards=100, num_imgs=150, train=False, rand_set_all=rand_set_all) train_data = [ _df_to_tensor(X_train), _df_to_tensor(y_train), _df_to_tensor(Z_train) ] test_data = [ _df_to_tensor(X_test), _df_to_tensor(y_test), _df_to_tensor(Z_test) ] class LocalClassifier(nn.Module): def __init__(self, n_features, n_hidden=32, p_dropout=0.2): super(LocalClassifier, self).__init__() self.network1 = nn.Sequential(nn.Linear(n_features, n_hidden), nn.ReLU(), nn.Dropout(p_dropout), nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Dropout(p_dropout), nn.Linear(n_hidden, n_hidden)) self.network2 = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout), nn.Linear(n_hidden, 1)) def forward(self, x): mid = self.network1(x) final = torch.sigmoid(self.network2(mid)) return mid, final def pretrain_classifier(clf, data_loader, optimizer, criterion): losses = 0.0 for x, y, _ in data_loader: x = x.to(args.device) y = y.to(args.device) clf.zero_grad() mid, p_y = clf(x) loss = criterion(p_y, y) loss.backward() optimizer.step() losses += loss.item() print('loss', losses / len(data_loader)) return clf def test_classifier(clf, data_loader): losses = 0 assert len(data_loader) == 1 with torch.no_grad(): for x, y_test, _ in data_loader: x = x.to(args.device) mid, y_pred = clf(x) y_pred = y_pred.cpu() clf_accuracy = metrics.accuracy_score(y_test, y_pred > 0.5) * 100 return clf_accuracy class Adversary(nn.Module): def __init__(self, n_sensitive, n_hidden=32): super(Adversary, self).__init__() self.network = nn.Sequential( nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Linear(n_hidden, n_sensitive), ) def forward(self, x): return torch.sigmoid(self.network(x)) def pretrain_adversary(adv, clf, data_loader, optimizer, criterion): losses = 0.0 for x, _, z in data_loader: x = x.to(args.device) z = z.to(args.device) mid, p_y = clf(x) mid = mid.detach() p_y = p_y.detach() adv.zero_grad() p_z = adv(mid) loss = (criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() loss.backward() optimizer.step() losses += loss.item() print('loss', losses / len(data_loader)) return adv def test_adversary(adv, clf, data_loader): losses = 0 adv_accuracies = [] assert len(data_loader) == 1 with torch.no_grad(): for x, _, z_test in data_loader: x = x.to(args.device) mid, p_y = clf(x) mid = mid.detach() p_y = p_y.detach() p_z = adv(mid) for i in range(p_z.shape[1]): z_test_i = z_test[:, i] z_pred_i = p_z[:, i] z_pred_i = z_pred_i.cpu() adv_accuracy = metrics.accuracy_score( z_test_i, z_pred_i > 0.5) * 100 adv_accuracies.append(adv_accuracy) return adv_accuracies def train_both(clf, adv, data_loader, clf_criterion, adv_criterion, clf_optimizer, adv_optimizer, lambdas): # Train adversary adv_losses = 0.0 for x, y, z in data_loader: x = x.to(args.device) z = z.to(args.device) local, p_y = clf(x) adv.zero_grad() p_z = adv(local) loss_adv = (adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() loss_adv.backward() adv_optimizer.step() adv_losses += loss_adv.item() print('adversarial loss', adv_losses / len(data_loader)) # Train classifier on single batch clf_losses = 0.0 for x, y, z in data_loader: pass x = x.to(args.device) y = y.to(args.device) z = z.to(args.device) local, p_y = clf(x) p_z = adv(local) clf.zero_grad() if args.adv: clf_loss = clf_criterion(p_y.to(args.device), y.to( args.device)) - ( adv_criterion(p_z.to(args.device), z.to(args.device)) * lambdas.to(args.device)).mean() else: clf_loss = clf_criterion(p_y.to(args.device), y.to(args.device)) clf_loss.backward() clf_optimizer.step() clf_losses += clf_loss.item() print('classifier loss', clf_losses / len(data_loader)) return clf, adv def eval_performance_text(test_loader_i, local_clf_i, adv_i): with torch.no_grad(): for test_x, test_y, test_z in test_loader_i: test_x = test_x.to(args.device) local_pred, clf_pred = local_clf_i(test_x) adv_pred = adv_i(local_pred) y_post_clf = pd.Series(clf_pred.cpu().numpy().ravel(), index=y_test[list( dict_users_train[idx])].index) Z_post_adv = pd.DataFrame(adv_pred.cpu().numpy(), columns=Z_test.columns) clf_roc_auc, clf_accuracy, adv_acc1, adv_acc2, adv_roc_auc = _performance_text( test_y, test_z, y_post_clf, Z_post_adv, epoch=None) return clf_roc_auc, clf_accuracy, adv_acc1, adv_acc2, adv_roc_auc def eval_global_performance_text(test_loader_i, local_clf_i, adv_i, global_clf): with torch.no_grad(): for test_x, test_y, test_z in test_loader_i: test_x = test_x.to(args.device) local_pred, clf_pred = local_clf_i(test_x) adv_pred = adv_i(local_pred) global_pred = global_clf(local_pred) y_post_clf = pd.Series(global_pred.cpu().numpy().ravel(), index=y_test[list( dict_users_train[idx])].index) Z_post_adv = pd.DataFrame(adv_pred.cpu().numpy(), columns=Z_test.columns) clf_roc_auc, clf_accuracy, adv_acc1, adv_acc2, adv_roc_auc = _performance_text( test_y, test_z, y_post_clf, Z_post_adv, epoch=None) return clf_roc_auc, clf_accuracy, adv_acc1, adv_acc2, adv_roc_auc lambdas = torch.Tensor([30.0, 30.0]) net_local_list = [] print( '\n\n======================== STARTING LOCAL TRAINING ========================\n\n\n' ) for idx in range(args.num_users): print( '\n======================== LOCAL TRAINING, USER %d ========================\n\n\n' % idx) train_data_i_raw = [ torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in train_data ] train_data_i = TensorDataset(train_data_i_raw[0], train_data_i_raw[1], train_data_i_raw[2]) train_loader_i = torch.utils.data.DataLoader(train_data_i, batch_size=batch_size, shuffle=False, num_workers=4) test_data_i_raw = [ torch.FloatTensor(bb[list(dict_users_train[idx])]) for bb in test_data ] test_data_i = TensorDataset(test_data_i_raw[0], test_data_i_raw[1], test_data_i_raw[2]) test_loader_i = torch.utils.data.DataLoader( test_data_i, batch_size=len(test_data_i), shuffle=False, num_workers=4) local_clf_i = LocalClassifier(n_features=n_features).to(args.device) local_clf_criterion_i = nn.BCELoss().to(args.device) local_clf_optimizer_i = optim.SGD(local_clf_i.parameters(), lr=0.1) adv_i = Adversary(Z_train.shape[1]).to(args.device) adv_criterion_i = nn.BCELoss(reduce=False).to(args.device) adv_optimizer_i = optim.SGD(adv_i.parameters(), lr=0.1) net_local_list.append([ train_loader_i, test_loader_i, local_clf_i, local_clf_optimizer_i, local_clf_criterion_i, adv_i, adv_criterion_i, adv_optimizer_i ]) N_CLF_EPOCHS = 10 for epoch in range(N_CLF_EPOCHS): print( '======================== pretrain_classifier epoch %d ========================' % epoch) local_clf = pretrain_classifier(local_clf_i, train_loader_i, local_clf_optimizer_i, local_clf_criterion_i) # test classifier # print ('\npretrained test accuracy on income prediction', test_classifier(local_clf_i, test_loader)) # print () print( '======================== local classifier pretraining: evaluating _performance_text on device %d ========================' % idx) eval_performance_text(test_loader_i, local_clf_i, adv_i) N_ADV_EPOCHS = 10 for epoch in range(N_ADV_EPOCHS): print( '======================== pretrain_adversary epoch %d ========================' % epoch) pretrain_adversary(adv_i, local_clf_i, train_loader_i, adv_optimizer_i, adv_criterion_i) # test adversary # print ('\npretrained adversary accuracy on race, sex prediction', test_adversary(adv_i, local_clf_i, test_loader)) # print () print( '======================== local adversary pretraining: evaluating _performance_text on device %d ========================' % idx) eval_performance_text(test_loader_i, local_clf_i, adv_i) print( '======================== by now both the local classifier and the local adversary should do well ========================' ) # train both N_EPOCH_COMBINED = 0 #250 for epoch in range(N_EPOCH_COMBINED): print( '======================== combined training epoch %d ========================' % epoch) clf, adv = train_both(local_clf_i, adv_i, train_loader_i, local_clf_criterion_i, adv_criterion_i, local_clf_optimizer_i, adv_optimizer_i, lambdas) # test classifier #print ('final test accuracy on income prediction', test_classifier(clf, test_loader)) # test adversary #print ('final adversary accuracy on race, sex prediction', test_adversary(adv, clf, test_loader)) print( '======================== local classifier and adversary pretraining: evaluating _performance_text on device %d ========================' % idx) eval_performance_text(test_loader_i, local_clf_i, adv_i) print( '======================== by now the local classifier should do well but the local adversary should not do well ========================' ) print( '======================== done pretraining local classifiers and adversaries ========================' ) class GlobalClassifier(nn.Module): def __init__(self, n_hidden=32, p_dropout=0.2): super(GlobalClassifier, self).__init__() self.global_network = nn.Sequential( nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Dropout(p_dropout), nn.Linear(n_hidden, n_hidden), nn.ReLU(), nn.Dropout(p_dropout), nn.Linear(n_hidden, 1), ) def forward(self, local): final = torch.sigmoid(self.global_network(local)) return final # build global model global_clf = GlobalClassifier().to(args.device) global_clf_criterion = nn.BCELoss().to(args.device) global_clf_optimizer = optim.Adam(global_clf.parameters(), lr=0.01) # copy weights w_glob = global_clf.state_dict() print( '\n\n======================== STARTING GLOBAL TRAINING ========================\n\n\n' ) global_epochs = 10 for iter in range(global_epochs): w_locals, loss_locals = [], [] for idx in range(args.num_users): print( '\n\n======================== GLOBAL TRAINING, ITERATION %d, USER %d ========================\n\n\n' % (iter, idx)) train_loader_i, test_loader_i, local_clf_i, local_clf_optimizer_i, local_clf_criterion_i, adv_i, adv_criterion_i, adv_optimizer_i = net_local_list[ idx] # train both local models: classifier and adversary if iter % 2 == 0: N_EPOCH_COMBINED = 0 #65 for epoch in range(N_EPOCH_COMBINED): print( '======================== combined training epoch %d ========================' % epoch) local_clf_i, adv_i = train_both(local_clf_i, adv_i, train_loader_i, local_clf_criterion_i, adv_criterion_i, local_clf_optimizer_i, adv_optimizer_i, lambdas) local = LocalUpdate(args=args, dataset=train_loader_i) w, loss = local.train(local_net=local_clf_i, local_opt=local_clf_optimizer_i, local_adv=adv_i, adv_opt=adv_optimizer_i, global_net=copy.deepcopy(global_clf).to( args.device), global_opt=global_clf_optimizer) w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) w_glob = FedAvg(w_locals) # copy weight to net_glob global_clf.load_state_dict(w_glob) for idx in range(args.num_users): train_loader_i, test_loader_i, local_clf_i, local_clf_optimizer_i, local_clf_criterion_i, adv_i, adv_criterion_i, adv_optimizer_i = net_local_list[ idx] print( '======================== local and global training: evaluating _performance_text on device %d ========================' % idx) eval_performance_text(test_loader_i, local_clf_i, adv_i) print( '======================== by now the local classifier should do well but the local adversary should not do well ========================' ) print( '======================== local and global training: evaluating _global_performance_text on device %d ========================' % idx) clf_roc_auc, clf_accuracy, adv_acc1, adv_acc2, adv_roc_auc = eval_global_performance_text( test_loader_i, local_clf_i, adv_i, global_clf) print( '======================== by now the global classifier should work better than local classifier ========================' ) clf_all1.append(clf_roc_auc) clf_all2.append(clf_accuracy) adv_all1.append(adv_acc1) adv_all2.append(adv_acc2) adv_all3.append(adv_roc_auc) print('clf_all1', np.mean(np.array(clf_all1)), np.std(np.array(clf_all1))) print('clf_all2', np.mean(np.array(clf_all2)), np.std(np.array(clf_all2))) print('adv_all1', np.mean(np.array(adv_all1)), np.std(np.array(adv_all1))) print('adv_all2', np.mean(np.array(adv_all2)), np.std(np.array(adv_all2))) print('adv_all3', np.mean(np.array(adv_all3)), np.std(np.array(adv_all3))) return clf_all1, clf_all2, adv_all1, adv_all2, adv_all3
w_locals = [] round_idx = valid_list[round] user_idx_this_round = round_idx[np.where(round_idx != -1)] # 随机 # user_idx_this_round = np.random.choice(range(args.num_users), 10, replace=False) # 在num_users里面选m个 if len(user_idx_this_round) > 0: for idx in user_idx_this_round: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) weight, loss = local.train(net=copy.deepcopy(global_net).to(args.device)) if args.all_clients: w_locals[idx] = copy.deepcopy(weight) else: w_locals.append(copy.deepcopy(weight)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob global_net.load_state_dict(w_glob) # print loss
# Client Sampling m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # print("Round {}, lr: {:.6f}, {}".format(iter, lr, idxs_users)) # Local Updates for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_train[idx]) net_local = copy.deepcopy(net_local_list[idx]) if args.local_upt_part == 'body': w_local, loss = local.train(net=net_local.to(args.device), body_lr=lr, head_lr=0.) if args.local_upt_part == 'head': w_local, loss = local.train(net=net_local.to(args.device), body_lr=0., head_lr=lr) if args.local_upt_part == 'full': w_local, loss = local.train(net=net_local.to(args.device), body_lr=lr, head_lr=lr) loss_locals.append(copy.deepcopy(loss)) if w_glob is None: w_glob = copy.deepcopy(w_local) else:
def Proposed_G1(net_glob, dict_workers_index, dict_users_data, dict_labels_counter_mainFL, args, cost, dataset_train, dataset_test, valid_ds, loss_test_final_main, optimal_clients_number, optimal_delay): data_Global_DCFL = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_DCFL = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } # copy weights # w_glob = net_glob.state_dict() temp = copy.deepcopy(net_glob) # training loss_train = [] Loss_local_each_global_total = [] selected_clients_costs_total = [] loss_workers_total = np.zeros(shape=(args.num_users, 100 * args.epochs)) workers_percent_dist = [] workers_participation = np.zeros((args.num_users, 100 * args.epochs)) workers = [] for i in range(args.num_users): workers.append(i) n_k = np.zeros(shape=(args.num_users)) for i in range(len(dict_users_data)): n_k[i] = len(dict_users_data[i]) Global_Accuracy_Tracker = np.zeros(100 * args.epochs) Global_Loss_Tracker = np.zeros(100 * args.epochs) Goal_Loss = float(loss_test_final_main) net_glob.eval() acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = float(loss_test_final) iter = 0 total_rounds_dcfl = 0 pre_net_glob = copy.deepcopy(net_glob) while abs(while_counter - Goal_Loss) >= 0.05: # print("G1 Loss is ", while_counter) selected_clients_costs_round = [] w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) x = net_glob x.eval() acc_test_global, loss_test_global = test_img(x, valid_ds, args) Loss_local_each_global_total.append(acc_test_global) Global_Accuracy_Tracker[iter] = acc_test_global Global_Loss_Tracker[iter] = loss_test_global workers_count = 0 temp_w_locals = [] temp_workers_loss = np.zeros(args.num_users) temp_workers_accuracy = np.zeros(args.num_users) temp_workers_loss_test = np.zeros(args.num_users) temp_workers_loss_difference = np.zeros((args.num_users, 2)) flag = np.zeros(args.num_users) list_of_random_workers_newfl = [] if iter < (args.epochs): for key, value in dict_workers_index.items(): if key == iter: list_of_random_workers_newfl = dict_workers_index[key] else: list_of_random_workers_newfl = random.sample(workers, m) initial_global_model = copy.deepcopy(net_glob).to(args.device) initial_global_model.eval() for idx in list_of_random_workers_newfl: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_data[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) temp_w_locals.append(copy.deepcopy(w)) temp_workers_loss[idx] = copy.deepcopy(loss) temp.load_state_dict(w) temp.eval() acc_test_local_after, loss_test_local_after = test_img( temp, valid_ds, args) temp_workers_accuracy[idx] = acc_test_local_after temp_workers_loss_test[idx] = loss_test_local_after temp_workers_loss_difference[idx, 0] = int(idx) temp_workers_loss_difference[idx, 1] = (loss_test_local_after) global_loss_diff = (Global_Loss_Tracker[iter]) if global_loss_diff >= 0: # print("yes") for i in range(len(temp_w_locals)): if cost[int(temp_workers_loss_difference[i, 0])] <= optimal_delay and\ temp_workers_loss_difference[i, 1] >= global_loss_diff: w_locals.append(copy.deepcopy(temp_w_locals[i])) loss_locals.append(temp_workers_loss[int( temp_workers_loss_difference[i, 0])]) flag[int(temp_workers_loss_difference[i, 0])] = 1 workers_count += 1 workers_participation[int( temp_workers_loss_difference[i, 0])][iter] = 1 selected_clients_costs_round.append(cost[int( temp_workers_loss_difference[i, 0])]) if len(w_locals) < 1: for i in range(len(temp_w_locals)): w_locals.append(copy.deepcopy(temp_w_locals[i])) loss_locals.append(temp_workers_loss[int( temp_workers_loss_difference[i, 0])]) flag[int(temp_workers_loss_difference[i, 0])] = 1 workers_count += 1 workers_participation[int( temp_workers_loss_difference[i, 0])][iter] = 1 selected_clients_costs_round.append(cost[int( temp_workers_loss_difference[i, 0])]) # update global weights # w_glob = FedAvg(w_locals) for n in range(args.num_users - len(w_locals)): w_locals.append(pre_net_glob.state_dict()) w_glob = fed_avg(w_locals, n_k) # copy weight to net_glob net_glob.load_state_dict(w_glob) #print("round completed") if len(loss_locals) > 0: loss_avg = sum(loss_locals) / len(loss_locals) else: loss_avg = None loss_train.append(loss_avg) workers_percent_dist.append(workers_count / args.num_users) print(iter, " round G1 fl finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = loss_test_final data_Global_DCFL["Round"].append(iter) data_Global_DCFL["C"].append(args.frac) data_Global_DCFL["Average Loss Train"].append(loss_avg) data_Global_DCFL["SDS Accuracy"].append(Global_Accuracy_Tracker[iter]) data_Global_DCFL["SDS Loss"].append(Global_Loss_Tracker[iter]) data_Global_DCFL["Workers Number"].append(workers_count) data_Global_DCFL["Large Test Loss"].append(float(loss_test_final)) data_Global_DCFL["Large Test Accuracy"].append(float(acc_test_final)) data_Global_DCFL["Communication Cost"].append( sum(selected_clients_costs_round)) selected_clients_costs_total.append(sum(selected_clients_costs_round)) iter += 1 total_rounds_dcfl = iter pre_net_glob = copy.deepcopy(net_glob) # plot workers percent of participating workers_percent_final = np.zeros(args.num_users) workers_name = np.zeros(args.num_users) for i in range(len(workers_participation[:, 1])): workers_percent_final[i] = sum( workers_participation[i, :]) / (iter - 1) workers_name[i] = i # testing net_glob.eval() acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) Final_LargeDataSetTest_DCFL["C"].append(args.frac) Final_LargeDataSetTest_DCFL["Test Loss"].append(float(loss_test_final)) Final_LargeDataSetTest_DCFL["Test Accuracy"].append(float(acc_test_final)) Final_LargeDataSetTest_DCFL["Train Loss"].append(float(loss_train_final)) Final_LargeDataSetTest_DCFL["Train Accuracy"].append( float(acc_train_final)) Final_LargeDataSetTest_DCFL["Total Rounds"].append(int(total_rounds_dcfl)) Final_LargeDataSetTest_DCFL["Communication Cost"].append( sum(selected_clients_costs_total)) return Final_LargeDataSetTest_DCFL, data_Global_DCFL
device = torch.device('cuda:0') dict_users = balanced_dataset(USER) # dict_users=unbalanced_dataset(USER) net_glob = CNNLSTM() net_glob.train() w_glob = net_glob.state_dict() loss_train = [] acc = 0 for iter in range(EPOCH): w_locals, loss_locals = [], [] idxs_users = USER for idx in range(idxs_users): local = LocalUpdate(dataset=Vehicle_train(datasets[idx]), idxs=dict_users[idx]) # local = LocalUpdate( dataset=Vehicle_train(unbalanced_datasets[idx]), idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob)) w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) w_glob = FedAvg(w_locals) net_glob.load_state_dict(w_glob) net_glob.eval() acc_test, loss_test = test_img(net_glob, Vehicle_test()) loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) np.save('loss', loss_train) # testing # net_glob=torch.load('model_fed.pkl') # torch.save(net_glob,'model_fed.pkl') net_glob.eval()
w_locals, loss_locals, ac_locals, num_samples = [], [], [], [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) pro_ground_truth = ground_truth_composition(dict_users, idxs_users, 26, label_train) print(pro_ground_truth) for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, label=label_train, idxs=dict_users[idx], alpha=ratio, size_average=True) w, loss, ac = local.train( net=copy.deepcopy(net_glob).to(args.device)) w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) ac_locals.append(copy.deepcopy(ac)) num_samples.append(len(dict_users[idx])) # monitor cc_net, cc_loss = [], [] aux_class = [i for i in range(26)] for i in aux_class: cc_local = LocalUpdate(args=args, dataset=dataset_train, label=label_train, idxs=dict_ratio[i], alpha=None, size_average=True)
def main(): # parse args args = args_parser() args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) # sample users if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter elif args.dataset == 'cifar': trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = cifar_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter elif args.dataset == 'fmnist': trans_fmnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist) dataset_test = datasets.FashionMNIST('../data/fmnist', train=False, download=True, transform=trans_fmnist) if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape acc_full_distributed = [] acc_full_main = [] loss_full_ditributed = [] loss_full_main = [] SD_acc_full_distributed = [] SD_acc_full_main = [] SD_loss_full_ditributed = [] SD_loss_full_main = [] workers_percent_full_distributed = [] workers_percent_full_main = [] variable_start = 0.1 variable_end = 1.0 while_counter = 0.1 counter_array = [] Accuracy_Fraction = [] Workers_Fraction = [] accuracy_fraction_each_round_newFL = 0 workers_fraction_each_round_newFL = 0 accuracy_fraction_each_round_mainFL = 0 workers_fraction_each_round_mainFL = 0 data_main = {} data_DCFL = {} data_Global_main = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [], "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]} data_Global_DCFL = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [], "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]} Final_LargeDataSetTest_DCFL = {"C":[], "Test Accuracy":[], "Test Loss":[], "Train Loss":[], "Train Accuracy":[], "Total Rounds":[]} Final_LargeDataSetTest_MainFL = {"C":[], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy":[]} # build model args.frac = variable_start test_ds, valid_ds_before = torch.utils.data.random_split(dataset_test, (9500, 500)) valid_ds = create_shared_dataset(valid_ds_before, 200) #while variable_start <= variable_end: for c_counter in range(1, 11, 3): if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'fmnist': net_glob = CNNFashion_Mnist(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') counter_array.append((c_counter/10)) args.frac = (c_counter/10) ######saving index of workers dict_workers_index = defaultdict(list) #############Main FL w_glob_mainFL = net_glob_mainFL.state_dict() loss_train_mainFL = [] # cv_loss_2, cv_acc_2 = [], [] # val_loss_pre_2, counter_2 = 0, 0 # net_best_2 = None # best_loss_2 = None # val_acc_list_2, net_list_2 = [], [] Loss_local_each_global_total_mainFL = [] Accuracy_local_each_global_total_mainFL = [] loss_workers_total_mainFL = np.zeros(shape=(args.num_users, args.epochs)) label_workers_mainFL = {i: np.array([], dtype='int64') for i in range(args.num_users)} validation_test_mainFed = [] acc_test, loss_test = test_img(net_glob_mainFL, dataset_test, args) workers_participation_main_fd = np.zeros((args.num_users, args.epochs)) workers_percent_main = [] # for iter in range(args.epochs): net_glob_mainFL.eval() acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args) while_counter_mainFL = loss_test_final_mainFL iter_mainFL = 0 workers_mainFL = [] for i in range(args.num_users): workers_mainFL.append(i) temp_netglob_mainFL = net_glob_mainFL while iter_mainFL < (args.epochs/2): data_main['round_{}'.format(iter_mainFL)] = [] # data_Global_main['round_{}'.format(iter)] = [] # print("round started") Loss_local_each_global_mainFL = [] loss_workers_mainFL = np.zeros((args.num_users, args.epochs)) w_locals_mainFL, loss_locals_mainFL = [], [] m_mainFL = max(int(args.frac * args.num_users), 1) idxs_users_mainFL = np.random.choice(range(args.num_users), m_mainFL, replace=False) list_of_random_workers = random.sample(workers_mainFL, m_mainFL) for i in range(len(list_of_random_workers)): dict_workers_index[iter_mainFL].append(list_of_random_workers[i]) x_mainFL = net_glob_mainFL x_mainFL.eval() acc_test_global_mainFL, loss_test_global_mainFL = test_img(x_mainFL, valid_ds, args) Loss_local_each_global_total_mainFL.append(loss_test_global_mainFL) Accuracy_local_each_global_total_mainFL.append(acc_test_global_mainFL) SD_acc_full_main.append(acc_test_global_mainFL) SD_loss_full_main.append(loss_test_global_mainFL) workers_count_mainFL = 0 temp_accuracy = np.zeros(1) temp_loss_test = np.zeros(1) temp_loss_train = np.zeros(1) for idx in list_of_random_workers: # print("train started") local_mainFL = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_mainFL[idx]) w_mainFL, loss_mainFL = local_mainFL.train(net=copy.deepcopy(net_glob_mainFL).to(args.device)) # print(w) # print("train completed") w_locals_mainFL.append(copy.deepcopy(w_mainFL)) loss_locals_mainFL.append(copy.deepcopy(loss_mainFL)) # temp = FedAvg(w) temp_netglob_mainFL.load_state_dict(w_mainFL) temp_netglob_mainFL.eval() print(pnorm_2(temp_netglob_mainFL, 2)) acc_test_local_mainFL, loss_test_local_mainFL = test_img(temp_netglob_mainFL, valid_ds, args) temp_accuracy[0] = acc_test_local_mainFL temp_loss_test[0] = loss_test_local_mainFL temp_loss_train[0] = loss_mainFL loss_workers_total_mainFL[idx, iter_mainFL] = acc_test_local_mainFL workers_participation_main_fd[idx][iter_mainFL] = 1 workers_count_mainFL += 1 data_main['round_{}'.format(iter_mainFL)].append({ 'C': args.frac, 'User ID': idx, # 'Local Update': copy.deepcopy(w_mainFL), 'Loss Train': temp_loss_train[0], 'Loss Test': temp_loss_test[0], 'Accuracy': temp_accuracy[0] }) # update global weights w_glob_mainFL = FedAvg(w_locals_mainFL) # copy weight to net_glob net_glob_mainFL.load_state_dict(w_glob_mainFL) # print("round completed") loss_avg_mainFL = sum(loss_locals_mainFL) / len(loss_locals_mainFL) # print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_mainFL)) loss_train_mainFL.append(loss_avg_mainFL) # print("round completed") acc_test_round_mainfed, loss_test_round_mainfed = test_img(net_glob_mainFL, dataset_test, args) validation_test_mainFed.append(acc_test_round_mainfed) workers_percent_main.append(workers_count_mainFL / args.num_users) # plot workers percent of participating print(iter_mainFL, " round main fl finished") acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args) while_counter_mainFL = loss_test_final_mainFL data_Global_main["Round"].append(iter_mainFL) data_Global_main["C"].append(args.frac) data_Global_main["Average Loss Train"].append(float(loss_avg_mainFL)) data_Global_main["Average Loss Test"].append(float(loss_test_global_mainFL)) data_Global_main["Accuracy Test"].append(float(acc_test_global_mainFL)) data_Global_main["Workers Number"].append(float(workers_count_mainFL)) data_Global_main["Large Test Loss"].append(float(loss_test_final_mainFL)) data_Global_main["Large Test Accuracy"].append(float(acc_test_final_mainFL)) iter_mainFL = iter_mainFL + 1 workers_percent_final_mainFL = np.zeros(args.num_users) workers_name_mainFL = np.empty(args.num_users) for i in range(len(workers_participation_main_fd[:, 1])): workers_percent_final_mainFL[i] = sum(workers_participation_main_fd[i, :]) / args.epochs workers_name_mainFL[i] = i net_glob_mainFL.eval() # print("train test started") acc_train_final_main, loss_train_final_main = test_img(net_glob_mainFL, dataset_train, args) # print("train test finished") acc_test_final_main, loss_test_final_main = test_img(net_glob_mainFL, dataset_test, args) Final_LargeDataSetTest_MainFL["C"].append(args.frac) Final_LargeDataSetTest_MainFL["Test Loss"].append(float(loss_test_final_main)) Final_LargeDataSetTest_MainFL["Test Accuracy"].append(float(acc_test_final_main)) Final_LargeDataSetTest_MainFL["Train Loss"].append(float(loss_train_final_main)) Final_LargeDataSetTest_MainFL["Train Accuracy"].append(float(acc_train_final_main)) # copy weights w_glob = net_glob.state_dict() temp_after = copy.deepcopy(net_glob) temp_before = copy.deepcopy(net_glob) # training loss_train = [] # cv_loss, cv_acc = [], [] # val_loss_pre, counter = 0, 0 # net_best = None # best_loss = None # val_acc_list, net_list = [], [] Loss_local_each_global_total = [] # valid_ds = create_shared_dataset(dataset_test, 500) loss_workers_total = np.zeros(shape=(args.num_users, args.epochs)) label_workers = {i: np.array([], dtype='int64') for i in range(args.num_users)} workers_percent_dist = [] validation_test_newFed = [] workers_participation = np.zeros((args.num_users, args.epochs)) workers = [] for i in range(args.num_users): workers.append(i) counter_threshold_decrease = np.zeros(args.epochs) Global_Accuracy_Tracker = np.zeros(args.epochs) Global_Loss_Tracker = np.zeros(args.epochs) threshold = 0.5 alpha = 0.5 ##decrease parameter beta = 0.1 ##delta accuracy controller gamma = 0.5 ##threshold decrease parameter Goal_Loss = float(loss_test_final_main) #for iter in range(args.epochs): net_glob.eval() acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = float(loss_test_final) iter = 0 total_rounds_dcfl = 0 while (while_counter + 0.01) > Goal_Loss and iter <= args.epochs: data_DCFL['round_{}'.format(iter)] = [] Loss_local_each_global = [] loss_workers = np.zeros((args.num_users, args.epochs)) w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) counter_threshold = 0 print(iter, " in dist FL started") #if iter % 5 == 0: x = copy.deepcopy(net_glob) x.eval() acc_test_global, loss_test_global = test_img(x, valid_ds, args) Loss_local_each_global_total.append(acc_test_global) Global_Accuracy_Tracker[iter] = acc_test_global Global_Loss_Tracker[iter] = loss_test_global if iter > 0 & (Global_Loss_Tracker[iter-1] - Global_Loss_Tracker[iter] <= beta): threshold = threshold - gamma if threshold == 0.0: threshold = 1.0 print("threshold decreased to", threshold) workers_count = 0 SD_acc_full_distributed.append(acc_test_global) SD_loss_full_ditributed.append(loss_test_global) temp_w_locals = [] temp_workers_loss = np.empty(args.num_users) temp_workers_accuracy = np.empty(args.num_users) temp_workers_loss_test = np.empty(args.num_users) temp_workers_loss_differenc = np.empty(args.num_users) temp_workers_accuracy_differenc = np.empty(args.num_users) flag = np.zeros(args.num_users) list_of_random_workers_newfl = [] if iter < (args.epochs/2): for key, value in dict_workers_index.items(): # print(value) if key == iter: list_of_random_workers_newfl = dict_workers_index[key] else: list_of_random_workers_newfl = random.sample(workers, m) for idx in list_of_random_workers_newfl: #print("train started") # before starting train temp_before = copy.deepcopy(net_glob) # temp_before.load_state_dict(w) temp_before.eval() acc_test_local_before, loss_test_local_before = test_img(temp_before, valid_ds, args) local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) #print(w) #print("train completed") #print("type of idx is ", type(temp_w_locals)) temp_w_locals.append(copy.deepcopy(w)) temp_workers_loss[idx] = copy.deepcopy(loss) temp_after = copy.deepcopy(net_glob) temp_after.load_state_dict(w) temp_after.eval() acc_test_local_after, loss_test_local_after = test_img(temp_after, valid_ds, args) loss_workers_total[idx, iter] = loss_test_local_after temp_workers_accuracy[idx] = acc_test_local_after temp_workers_loss_test[idx] = loss_test_local_after temp_workers_loss_differenc[idx] = loss_test_local_before - loss_test_local_after temp_workers_accuracy_differenc[idx] = acc_test_local_after - acc_test_local_before print("train finished") while len(w_locals) < 1: #print("recieving started") index = 0 for idx in list_of_random_workers_newfl: #print("acc is ", temp_workers_accuracy[idx]) # print(temp_workers_loss_differenc) if workers_count >= m: break elif temp_workers_loss_differenc[idx] >= (threshold) \ and temp_workers_loss_differenc[idx] > 0 \ and flag[idx]==0: print("Update Received") w_locals.append(copy.deepcopy(temp_w_locals[index])) #print(temp_w_locals[index]) loss_locals.append(temp_workers_loss[idx]) flag[idx] = 1 workers_count += 1 workers_participation[idx][iter] = 1 data_DCFL['round_{}'.format(iter)].append({ 'C': args.frac, 'User ID': idx, 'Loss Train': loss_workers_total[idx, iter], 'Loss Test': temp_workers_loss[idx], 'Accuracy': temp_workers_accuracy[idx] }) index += 1 if len(w_locals) < 1: threshold = threshold / 2 if threshold == -np.inf: threshold = 1 print("threshold increased to ", threshold) # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) #print("round completed") loss_avg = sum(loss_locals) / len(loss_locals) loss_train.append(loss_avg) workers_percent_dist.append(workers_count/args.num_users) counter_threshold_decrease[iter] = counter_threshold print(iter, " round dist fl finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = loss_test_final data_Global_DCFL["Round"].append(iter) data_Global_DCFL["C"].append(args.frac) data_Global_DCFL["Average Loss Train"].append(loss_avg) data_Global_DCFL["Accuracy Test"].append(Global_Accuracy_Tracker[iter]) data_Global_DCFL["Average Loss Test"].append(Global_Loss_Tracker[iter]) data_Global_DCFL["Workers Number"].append(workers_count) data_Global_DCFL["Large Test Loss"].append(float(loss_test_final)) data_Global_DCFL["Large Test Accuracy"].append(float(acc_test_final)) total_rounds_dcfl = iter iter = iter + 1 #plot workers percent of participating workers_percent_final = np.zeros(args.num_users) workers_name = np.empty(args.num_users) #print(workers_participation) for i in range(len(workers_participation[:, 1])): workers_percent_final[i] = sum(workers_participation[i, :])/args.epochs workers_name[i] = i workers_fraction_each_round_newFL = sum(workers_percent_final)/len(workers_percent_final) # testing #print("testing started") net_glob.eval() #print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) #print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) acc_full_distributed.append(acc_test_final) loss_full_ditributed.append(loss_test_final) Final_LargeDataSetTest_DCFL["C"].append(args.frac) Final_LargeDataSetTest_DCFL["Test Loss"].append(float(loss_test_final)) Final_LargeDataSetTest_DCFL["Test Accuracy"].append(float(acc_test_final)) Final_LargeDataSetTest_DCFL["Train Loss"].append(float(loss_train_final)) Final_LargeDataSetTest_DCFL["Train Accuracy"].append(float(acc_train_final)) Final_LargeDataSetTest_DCFL["Total Rounds"].append(int(total_rounds_dcfl)) variable_start = variable_start + while_counter print("C is ", c_counter/10) with open('CIFAR_100users_data_main_1229-2020.json', 'w') as outfile: json.dump(data_main, outfile) with open('CIFAR_100users_data_DCFL_1229-2020.json', 'w') as outfile: json.dump(data_DCFL, outfile) with open('CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile: json.dump(data_Global_DCFL, outfile) with open('CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile: json.dump(data_Global_main, outfile) with open('Final-CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile: json.dump(Final_LargeDataSetTest_MainFL, outfile) with open('Final-CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile: json.dump(Final_LargeDataSetTest_DCFL, outfile) return 1
if args.all_clients: print("Aggregation over all clients") w_locals = [w_glob for i in range(args.num_users)] dict_locals = [dict_glob for i in range(args.num_users)] for iter in range(args.epochs): loss_locals = [] if not args.all_clients: w_locals = [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for i, idx in enumerate(idxs_users): local_model = LocalUpdate(args=args, index=idx, dataset=dataset_train, idxs=dict_users[idx]) w, loss, epoch_losses = local_model.train( net=copy.deepcopy(net_glob).to(args.device)) epoch_loss[i] += epoch_losses if args.all_clients: w_locals[idx] = copy.deepcopy(w) # dict_locals[idx] = copy.deepcopy(dict) else: w_locals.append(copy.deepcopy(w)) # dict_locals.append(copy.deepcopy(dict)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) # for d in dict_locals: # dict_glob.update(d) # copy weight to net_glob net_glob.load_state_dict(w_glob)
def muhammed(network: torch.nn.Module, user_data_indices: Dict[int, Any], labels_counter: Dict[int, Any], args: Any, cost: Any, train_data: DataLoader, test_data: DataLoader, shared_data: DataLoader, **kwargs) -> Tuple: global_data = defaultdict(list) global_eval_data = defaultdict(list) workers_index = defaultdict(list) client_participation_counter = defaultdict(int) # r1=1 and r2=4 are the best values based on Muhammed's results. r1 = kwargs.get("r1", 1) r2 = kwargs.get("r2", 4) comm_cost_total = [] n_k = np.zeros(shape=(args.num_users)) for i in range(len(user_data_indices)): n_k[i] = len(user_data_indices[i]) pre_net_glob = copy.deepcopy(network) def compute_alpha_star(n_clients, r1, r2) -> int: fact = math.factorial coeff = math.exp(-(fact(r2) / fact(r1 - 1))**(1 / (r2 - r1 + 1))) return n_clients * coeff for comm_round in range(args.epochs): selected = set() comm_cost = [] randomized_clients = list(user_data_indices.keys()) random.shuffle(randomized_clients) network.eval() with torch.no_grad(): sds_test_acc, sds_test_loss = test_img(network, shared_data, args) # _______________________________________________ # # =================== STAGE 1 =================== # alpha_star = compute_alpha_star(len(randomized_clients), r1, r2) acc_best = 0 for m in range(int(alpha_star)): # Train client `m` using a copy of the global model and then test its # accuracy on the test data set. This is to find the "optimal" test threshold # value for client selection. trainer = LocalUpdate(args, train_data, user_data_indices[m]) local_model, local_loss = trainer.train( net=copy.deepcopy(network).to(args.device)) local_network = copy.deepcopy(network) local_network.load_state_dict(local_model) local_network.eval() acc_client, loss_client = test_img(local_network, datatest=test_data, args=args) comm_cost.append(cost[m]) if acc_client > acc_best: acc_best = acc_client # selected[clients[m]] = False # _______________________________________________ # # =================== STAGE 2 =================== # set_best = set() num_best = 0 R = max(int(args.frac * args.num_users), 1) for m in range(int(alpha_star), len(randomized_clients)): if num_best == R: continue # "Rejects" the client m. elif (len(randomized_clients) - m) <= (R - num_best): c = randomized_clients[m] selected.add(c) set_best.add(c) num_best += 1 else: # client data m # acc_client, loss_client = test_img_user(network, datatest=train_data, idxs=user_data_indices[m], # args=args) acc_client, loss_client = test_img(network, datatest=test_data, args=args) comm_cost.append(cost[m]) if acc_client > acc_best: c = randomized_clients[m] selected.add(c) set_best.add(c) num_best += 1 # _______________________________________________ # # =================== STAGE 3 =================== # # NOTE: Just use 1 to make the algorithm make sense for our setup. K = 1 for _ in range(K): local_models, local_losses = [], [] for client in selected: trainer = LocalUpdate(args, train_data, user_data_indices[client]) local_model, local_loss = trainer.train( net=copy.deepcopy(network).to(args.device)) local_models.append(local_model) local_losses.append(local_loss) comm_cost.append(cost[client]) client_participation_counter[client] += 1 for n in range(args.num_users - len(local_models)): local_models.append(pre_net_glob.state_dict()) new_weights = fed_avg(local_models, n_k) # new_weights = FedAvg(local_models) network.load_state_dict(new_weights) pre_net_glob = copy.deepcopy(network) # _______________________________________________ # # ================= DATA SAVING ================= # network.eval() with torch.no_grad(): test_acc, test_loss = test_img(network, test_data, args) global_data["Round"].append(comm_round) global_data["C"].append(args.frac) global_data["Average Loss Train"].append(np.mean(local_losses)) global_data["SDS Loss"].append(float(sds_test_loss)) global_data["SDS Accuracy"].append(float(sds_test_acc)) global_data["Workers Number"].append(int(len(selected))) global_data["Large Test Loss"].append(float(test_loss)) global_data["Large Test Accuracy"].append(float(test_acc)) global_data["Communication Cost"].append(sum(comm_cost)) comm_cost_total.append(sum(comm_cost)) # Calculate the percentage of each workers' participation. for client in client_participation_counter: client_participation_counter[client] /= args.epochs final_train_acc, final_train_loss = test_img(network, train_data, args) final_test_acc, final_test_loss = test_img(network, test_data, args) network.eval() with torch.no_grad(): global_eval_data["C"].append(args.frac) global_eval_data["Test Loss"].append(float(final_test_loss)) global_eval_data["Test Accuracy"].append(float(final_test_acc)) global_eval_data["Train Loss"].append(float(final_train_loss)) global_eval_data["Train Accuracy"].append(float(final_train_acc)) global_eval_data["Communication Cost"].append(sum(comm_cost_total)) global_eval_data["Total Rounds"].append(args.epochs) return global_eval_data, global_data
for iter in range(args.epochs): w_glob = {} loss_locals = [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) w_keys_epoch = w_glob_keys for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_train[idx]) net_local = net_local_list[idx] w_local, loss = local.train(net=net_local.to(args.device), body_lr=lr, head_lr=lr) loss_locals.append(copy.deepcopy(loss)) # sum up weights if len(w_glob) == 0: w_glob = copy.deepcopy(w_local) else: for k in w_keys_epoch: w_glob[k] += w_local[k] loss_avg = sum(loss_locals) / len(loss_locals) loss_train.append(loss_avg) # get weighted average for global weights for k in w_keys_epoch: w_glob[k] = torch.div(w_glob[k], m)
for iter in range(args.epochs): #net_glob.train() w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for order, idx in enumerate(idxs_users): local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx], net=copy.deepcopy(net_glob).to(args.device), epochs=iter) #w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) #w_locals.append(copy.deepcopy(w)) #loss_locals.append(copy.deepcopy(loss)) #w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) w, loss = local.train() print( '\rEpochs: {}\tUserID: {}\tSequence: {}\tLoss: {:.6f}'.format( iter, idx, order, loss)) loss_locals.append(copy.deepcopy(loss)) #w_locals.append(copy.deepcopy(w.state_dict())) nets_users[idx][0] = 1 nets_users[idx][1] = copy.deepcopy( w.to(torch.device('cpu')).state_dict()) # update global weights w_glob = FedAvg(nets_users) # copy weight to net_glob net_glob.load_state_dict(w_glob)
results.append(np.array([-1, acc_test_local, acc_test_avg, acc_test_local, None, None])) print('Round {:3d}, Acc (local): {:.2f}, Acc (avg): {:.2f}, Acc (local-best): {:.2f}'.format( -1, acc_test_local, acc_test_avg, acc_test_local)) for iter in range(args.epochs): w_glob = {} loss_locals = [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) w_keys_epoch = w_glob_keys for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_train[idx]) net_local = net_local_list[idx] w_local, loss = local.train(net=net_local.to(args.device), lr=args.lr) loss_locals.append(copy.deepcopy(loss)) # sum up weights if len(w_glob) == 0: w_glob = copy.deepcopy(w_local) else: for k in w_keys_epoch: w_glob[k] += w_local[k] loss_avg = sum(loss_locals) / len(loss_locals) loss_train.append(loss_avg) # get weighted average for global weights for k in w_keys_epoch: w_glob[k] = torch.div(w_glob[k], m)
def main(): # parse args args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) print("type of test dataset", type(dataset_test)) # sample users if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid( dataset_train, args.num_users) dict_users_2, dict_labels_counter_2 = dict_users, dict_labels_counter #dict_users, dict_labels_counter = mnist_noniid_unequal(dataset_train, args.num_users) elif args.dataset == 'cifar': trans_cifar = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_glob_2 = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob_2 = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') #print(net_glob) #net_glob.train() acc_test, loss_test = test_img(net_glob, dataset_test, args) print("val test finished") print("{:.2f}".format(acc_test)) temp = net_glob #net_glob_2 = net_glob temp_2 = net_glob_2 # copy weights w_glob = net_glob.state_dict() # training loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], [] Loss_local_each_global_total = [] test_ds, valid_ds = torch.utils.data.random_split(dataset_test, (9500, 500)) loss_workers_total = np.zeros(shape=(args.num_users, args.epochs)) label_workers = { i: np.array([], dtype='int64') for i in range(args.num_users) } workers_percent = [] workers_count = 0 acc_test_global, loss_test_global = test_img(x, valid_ds, args) selected_users_index = [] for idx in range(args.num_users): # print("train started") local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) # print(w) # print("train completed") # temp = FedAvg(w) temp.load_state_dict(w) temp.eval() acc_test_local, loss_test_local = test_img(temp, valid_ds, args) loss_workers_total[idx, iter] = acc_test_local if workers_count >= (args.num_users / 2): break elif acc_test_local >= (0.7 * acc_test_global): selected_users_index.append(idx) for iter in range(args.epochs): print("round started") Loss_local_each_global = [] loss_workers = np.zeros((args.num_users, args.epochs)) w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) #idxs_users = np.random.choice(range(args.num_users), m, replace=False) #if iter % 5 == 0: # Minoo x = net_glob x.eval() Loss_local_each_global_total.append(acc_test_global) for idx in selected_users_index: #print("train started") local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) #print(w) #print("train completed") #temp = FedAvg(w) temp.load_state_dict(w) temp.eval() acc_test_local, loss_test_local = test_img(temp, valid_ds, args) loss_workers_total[idx, iter] = acc_test_local if workers_count >= (args.num_users / 2): break elif acc_test_local >= (0.7 * acc_test_global): w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) print("Update Received") workers_count += 1 # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) print("round completed") loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) workers_percent.append(workers_count) # plot loss curve plt.figure() plt.plot(range(len(workers_percent)), workers_percent) plt.ylabel('train_loss') plt.savefig( './save/Newfed_WorkersPercent_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # print(loss_workers_total) # plot loss curve # plt.figure() # plt.plot(range(len(loss_train)), loss_train) # plt.ylabel('train_loss') # plt.savefig('./save/Newfed_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() for i in range(args.num_users): plot = plt.plot(range(len(loss_workers_total[i, :])), loss_workers_total[i, :], label="Worker {}".format(i)) plot5 = plt.plot(range(len(Loss_local_each_global_total)), Loss_local_each_global_total, color='000000', label="Global") plt.legend(loc='best') plt.ylabel('Small Test Set Accuracy of workers') plt.xlabel('Number of Rounds') plt.savefig( './save/NewFed_2workers_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() # bins = np.linspace(0, 9, 3) # a = dict_labels_counter[:, 0].ravel() # print(type(a)) # b = dict_labels_counter[:, 1].ravel() # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9'] # # Set plot parameters # fig, ax = plt.subplots() # width = 0.1 # width of bar # x = np.arange(10) # ax.bar(x, dict_labels_counter[:, 0], width, color='#000080', label='Worker 1') # ax.bar(x + width, dict_labels_counter[:, 1], width, color='#73C2FB', label='Worker 2') # ax.bar(x + 2*width, dict_labels_counter[:, 2], width, color='#ff0000', label='Worker 3') # ax.bar(x + 3*width, dict_labels_counter[:, 3], width, color='#32CD32', label='Worker 4') # ax.set_ylabel('Number of Labels') # ax.set_xticks(x + width + width / 2) # ax.set_xticklabels(x_labels) # ax.set_xlabel('Labels') # ax.legend() # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) # fig.tight_layout() # plt.savefig( # './save/Newfed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, # args.iid)) # testing print("testing started") net_glob.eval() print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) print("val test finished") #print("Training accuracy: {:.2f}".format(acc_train)) #print("Testing accuracy: {:.2f}".format(acc_test)) print("{:.2f}".format(acc_test_final)) #print("{:.2f".format(Loss_local_each_worker)) # training w_glob_2 = net_glob_2.state_dict() loss_train_2 = [] cv_loss_2, cv_acc_2 = [], [] val_loss_pre_2, counter_2 = 0, 0 net_best_2 = None best_loss_2 = None val_acc_list_2, net_list_2 = [], [] Loss_local_each_global_total_2 = [] loss_workers_total_2 = np.zeros(shape=(args.num_users, args.epochs)) label_workers_2 = { i: np.array([], dtype='int64') for i in range(args.num_users) } for iter in range(args.epochs): print("round started") Loss_local_each_global_2 = [] loss_workers_2 = np.zeros((args.num_users, args.epochs)) w_locals_2, loss_locals_2 = [], [] m_2 = max(int(args.frac * args.num_users), 1) idxs_users_2 = np.random.choice(range(args.num_users), m_2, replace=False) # Minoo x_2 = net_glob_2 x_2.eval() acc_test_global_2, loss_test_global_2 = test_img(x_2, valid_ds, args) Loss_local_each_global_total_2.append(acc_test_global_2) for idx in idxs_users_2: #print("train started") local_2 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_2[idx]) w_2, loss_2 = local_2.train( net=copy.deepcopy(net_glob_2).to(args.device)) #print(w) #print("train completed") w_locals_2.append(copy.deepcopy(w_2)) loss_locals_2.append(copy.deepcopy(loss_2)) #temp = FedAvg(w) temp_2.load_state_dict(w_2) temp_2.eval() acc_test_local_2, loss_test_local_2 = test_img( temp_2, valid_ds, args) loss_workers_total_2[idx, iter] = acc_test_local_2 # update global weights w_glob_2 = FedAvg(w_locals_2) # copy weight to net_glob net_glob_2.load_state_dict(w_glob_2) print("round completed") loss_avg_2 = sum(loss_locals_2) / len(loss_locals_2) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_2)) loss_train_2.append(loss_avg_2) print("round completed") # plot loss curve plt.figure() plt.plot(range(len(loss_train_2)), loss_train_2, color='#000000', label="Main FL") plt.plot(range(len(loss_train)), loss_train, color='#ff0000', label="Centralized Algorithm") plt.ylabel('train_loss') plt.savefig('./save/main_fed_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # print(loss_workers_total) plt.figure() for i in range(args.num_users): plot = plt.plot(range(len(loss_workers_total_2[i, :])), loss_workers_total_2[i, :], label="Worker {}".format(i)) plot5 = plt.plot(range(len(Loss_local_each_global_total_2)), Loss_local_each_global_total_2, color='000000', label="Global") plt.legend(loc='best') plt.ylabel('Small Test Set Accuracy of workers') plt.xlabel('Number of Rounds') plt.savefig('./save/mainfed_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() # bins = np.linspace(0, 9, 3) # a = dict_labels_counter_2[:, 0].ravel() # print(type(a)) # b = dict_labels_counter_2[:, 1].ravel() # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9'] # # Set plot parameters # fig, ax = plt.subplots() # width = 0.1 # width of bar # x = np.arange(10) # ax.bar(x, dict_labels_counter_2[:, 0], width, color='#000080', label='Worker 1') # ax.bar(x + width, dict_labels_counter_2[:, 1], width, color='#73C2FB', label='Worker 2') # ax.bar(x + 2*width, dict_labels_counter_2[:, 2], width, color='#ff0000', label='Worker 3') # ax.bar(x + 3*width, dict_labels_counter_2[:, 3], width, color='#32CD32', label='Worker 4') # ax.set_ylabel('Number of Labels') # ax.set_xticks(x + width + width / 2) # ax.set_xticklabels(x_labels) # ax.set_xlabel('Labels') # ax.legend() # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) # fig.tight_layout() # plt.savefig( # './save/main_fed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, # args.iid)) # testing print("testing started") net_glob.eval() print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) print("val test finished") #print("Training accuracy: {:.2f}".format(acc_train)) #print("Testing accuracy: {:.2f}".format(acc_test)) print("{:.2f}".format(acc_test_final)) #print("{:.2f".format(Loss_local_each_worker)) return loss_test_final, loss_train_final
w_locals, w_ema_locals, loss_locals, loss_consistent_locals = [], [], [], [] m = max(int(args.frac * args.num_users), 1) #choice trained users idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: dict_userepoch[idx] = dict_userepoch[idx] + 1 local = LocalUpdate(args=args, dataset=dataset_train, dataset_ema=dataset_train_ema, idxs=dict_users[idx], idxs_labeled=dict_users_labeled[idx], pseudo_label=pseudo_label) w, w_ema, loss, loss_consistent = local.train( net=copy.deepcopy(net_glob).to(args.device), net_ema=copy.deepcopy(net_ema_glob).to(args.device), args=args, iter_glob=iter + 1, user_epoch=dict_userepoch[idx]) w_locals.append(copy.deepcopy(w)) w_ema_locals.append(copy.deepcopy(w_ema)) loss_locals.append(copy.deepcopy(loss)) loss_consistent_locals.append(copy.deepcopy(loss_consistent)) w_glob = FedAvg(w_locals) w_ema_glob = FedAvg(w_ema_locals) net_glob.load_state_dict(w_glob) net_ema_glob.load_state_dict(w_ema_glob) net_glob.eval()
w_locals, loss_locals = [], [] #w_locals = array of local_weights w_locals_1, loss_locals_1 = [],[] w_locals_5, loss_locals_5 = [],[] w_locals_7, loss_locals_7 = [],[] w_locals_10, loss_locals_10 = [],[] m = max(int(args.frac * args.num_users), 1) #m = number of users used in one ROUND/EPOCH, check utils.options for more clarity on this idxs_users = np.random.choice(range(args.num_users), m, replace=False) #Randomly selecting m users out of 32 users. NEED TO REPLACE THIS WITH OUR SAMPLING MECHANISM for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local1 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local5 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local7 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local10 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) w1, loss1 = local1.train(net=copy.deepcopy(net_glob1).to(args.device)) w5, loss5 = local5.train(net=copy.deepcopy(net_glob5).to(args.device)) w7, loss7 = local7.train(net=copy.deepcopy(net_glob7).to(args.device)) w10, loss10 = local10.train(net=copy.deepcopy(net_glob10).to(args.device)) print("***BLAH BLAH BLAH***") if idx==fixed_agent_1: if updates_recorded_1: w1 = copy.deepcopy(fixed_agent_storage_1) elif not updates_recorded_1: fixed_agent_storage_1 = copy.deepcopy(w1) updates_recorded_1 = True if idx in fixed_agent_5:
def main(): # parse args args = args_parser() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu dataPath = args.datasetPath # random seed np.random.seed(args.seed) cudnn.benchmark = False cudnn.deterministic = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) # load dataset and split users if args.dataset == 'cifar10': _CIFAR_TRAIN_TRANSFORMS = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] dataset_train = datasets.CIFAR10( dataPath, train=True, download=True, transform=transforms.Compose(_CIFAR_TRAIN_TRANSFORMS)) _CIFAR_TEST_TRANSFORMS = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] dataset_test = datasets.CIFAR10( dataPath, train=False, transform=transforms.Compose(_CIFAR_TEST_TRANSFORMS)) if args.iid == 0: # IID dict_users = cifar_iid(dataset_train, args.num_users) elif args.iid == 2: # non-IID dict_users = cifar_noniid_2(dataset_train, args.num_users) else: exit('Error: unrecognized class') elif args.dataset == 'emnist': _MNIST_TRAIN_TRANSFORMS = _MNIST_TEST_TRANSFORMS = [ transforms.ToTensor(), transforms.ToPILImage(), transforms.Pad(2), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ] dataset_train = datasets.EMNIST( dataPath, train=True, download=True, transform=transforms.Compose(_MNIST_TRAIN_TRANSFORMS), split='letters') dataset_test = datasets.EMNIST( dataPath, train=False, download=True, transform=transforms.Compose(_MNIST_TEST_TRANSFORMS), split='letters') dict_users = femnist_star(dataset_train, args.num_users) elif args.dataset == 'cifar100': _CIFAR_TRAIN_TRANSFORMS = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] dataset_train = datasets.CIFAR100( dataPath, train=True, download=True, transform=transforms.Compose(_CIFAR_TRAIN_TRANSFORMS)) _CIFAR_TEST_TRANSFORMS = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] dataset_test = datasets.CIFAR100( dataPath, train=False, transform=transforms.Compose(_CIFAR_TEST_TRANSFORMS)) if args.iid == 0: # IID dict_users = cifar_100_iid(dataset_train, args.num_users) elif args.iid == 2: # non-IID dict_users = cifar_100_noniid(dataset_train, args.num_users) else: exit('Error: unrecognized dataset') # build model if args.dataset == 'cifar10': if args.model == "CNNStd5": net_glob = CNNCifarStd5().cuda() else: exit('Error: unrecognized model') elif args.dataset == 'emnist': if args.model == "CNNStd5": net_glob = CNNEmnistStd5().cuda() else: exit('Error: unrecognized model') elif args.dataset == 'cifar100': if args.model == "CNNStd5": net_glob = CNNCifar100Std5().cuda() else: exit('Error: unrecognized model') else: exit('Error: unrecognized model') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in net_glob.parameters()]))) net_glob.train() learning_rate = args.lr test_acc = [] avg_loss = [] # Train for iter in range(args.epochs): m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) w_locals, loss_locals = [], [] for i, idx in enumerate(idxs_users): print('user: {:d}'.format(idx)) local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(model=copy.deepcopy(net_glob).cuda(), lr=learning_rate) w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) # print loss loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.6f}'.format(iter, loss_avg)) acc_test, _ = test_img(net_glob.cuda(), dataset_test, args) print("test accuracy: {:.4f}".format(acc_test)) test_acc.append(acc_test) avg_loss.append(loss_avg) learning_rate = adjust_learning_rate(learning_rate, args.lr_drop) filename = './accuracy-' + str(args.dataset) + '-iid' + str(args.iid) + '-' + str(args.epochs) + '-seed' \ + str(args.seed) + '-' + str(args.loss_type) + '-beta' + str(args.beta) + '-mu' + str(args.mu) save_result(test_acc, avg_loss, filename)
w_cl, loss_cl, delta_loss_cl = glob_cl.cltrain( net=copy.deepcopy(net_glob_cl).to(args.device)) net_glob_cl.load_state_dict(w_cl) # update the CL w # FL setting # M clients local update m = args.num_users # num of selected users idxs_users = np.random.choice( range(args.num_users), m, replace=False) # select randomly m clients Ld_temp = [] for idx in idxs_users: glob_fl = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) # data select w_fl, loss, delta_loss_fl = glob_fl.train( net=copy.deepcopy(net_local_fl[idx]).to(args.device)) if iter_local == args.local_ep - 1: w_locals.append(copy.deepcopy(w_fl)) # collect local model net_local_fl[idx].load_state_dict(w_fl) # update the FL w loss_locals.append(loss) # collect local loss fucntion # Compute beta and lambda temp = torch.norm(delta_loss_cl - delta_loss_fl).item( ) / torch.norm(w_cl['layer_input.weight'] - w_fl['layer_input.weight']).item() if temp > beta: beta = temp if temp < lamb: lamb = temp
if args.all_clients: print("Aggregation over all clients") w_locals = [w_glob for i in range(args.num_users)] for iter in range(args.epochs): # 每一全局模型更新轮次 loss_locals = [] if not args.all_clients: w_locals = [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice( range(args.num_users), m, replace=False) # 随机选择一部分Client,全部选择会增大通信量,且实验效果可能会不好 for idx in idxs_users: # 每一client并行地做 local = LocalUpdate( args=args, dataset=dataset_train, idxs=dict_users[idx] ) # models/Update.py:在client端更新,获取当前Client训练得到的参数 w, loss = local.train(net=copy.deepcopy(net_glob).to( args.device)) # 最重要!!! 服务器传给客户端传的当前全局模型!!! if args.all_clients: w_locals[idx] = copy.deepcopy(w) else: w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) # models/Fed.py:对所有的Client返回的参数聚合 # copy weight to net_glob net_glob.load_state_dict(w_glob) # print loss loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) #这里没有检查参数是否已经收敛
acc_train_fl_his.append(acc_test_fl) filename = 'result/MLP/' + "Accuracy_FedAvg_unbalance_MLP.csv" with open(filename, "a") as myfile: myfile.write(str(acc_test_fl) + ',') w_locals, loss_locals = [], [] # M clients local update m = max(int(args.frac * args.num_users), 1) # num of selected users idxs_users = np.random.choice(range( args.num_users), m, replace=False) # select randomly m clients for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) # data select w, loss = local.train( net=copy.deepcopy(net_glob_fl).to(args.device)) w_locals.append(copy.deepcopy(w)) # collect local model loss_locals.append( copy.deepcopy(loss)) # collect local loss fucntion w_glob_fl = FedAvg(w_locals) # update the global model net_glob_fl.load_state_dict(w_glob_fl) # copy weight to net_glob # Loss loss = sum(loss_locals) / len(loss_locals) print('fl,iter = ', iter, 'loss=', loss) filename = 'result/MLP/' + "Loss_FedAvg_unbalance_MLP.csv" with open(filename, "a") as myfile: myfile.write(str(loss) + ',') # FL_Optimize setting
for iter in range(args.epochs): #agent_found_count = 0 w_locals, loss_locals = [], [] #w_locals = array of local_weights m = max( int(args.frac * args.num_users), 1 ) #m = number of users used in one ROUND/EPOCH, check utils.options for more clarity on this users_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] idxs_users = np.asarray(users_list) for idx in idxs_users: local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) print("***BLAH BLAH BLAH***") #NO ATTACK w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) # update global weights w_glob = FedAvg(w_locals) #dict_structure = {epoch: {layer:[size,mean,std,min,max]}} conv1 = w_glob['conv1.weight'] conv2 = w_glob['conv2.weight'] fc1 = w_glob['fc1.weight'] fc2 = w_glob['fc2.weight'] descriptive_stats[str(iter+1)] = {'conv1':[conv1.size(),conv1.mean().item(),conv1.std().item(),conv1.min().item(),conv1.max().item()],\
def ICC_FL(net_glob, dict_workers_index, dict_users_data, dict_labels_counter_mainFL, args, cost, dataset_train, dataset_test, valid_ds, loss_test_final_main, optimal_delay): data_Global_DCFL = {"C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": []} Final_LargeDataSetTest_DCFL = {"C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": []} # copy weights w_glob = net_glob.state_dict() temp = copy.deepcopy(net_glob) # training loss_train = [] Loss_local_each_global_total = [] selected_clients_costs_total = [] loss_workers_total = np.zeros(shape=(args.num_users, 10 * args.epochs)) workers_percent_dist = [] workers_participation = np.zeros((args.num_users, 10 * args.epochs)) workers = [] for i in range(args.num_users): workers.append(i) n_k = np.zeros(shape=(args.num_users)) for i in range(len(dict_users_data)): n_k[i] = len(dict_users_data[i]) counter_threshold_decrease = np.zeros(10 * args.epochs) Global_Accuracy_Tracker = np.zeros(10 * args.epochs) Global_Loss_Tracker = np.zeros(10 * args.epochs) threshold = 1.0 beta = 0.1 ##delta accuracy controller gamma = 0.05 ##threshold decrease parameter Goal_Loss = float(loss_test_final_main) net_glob.eval() acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = float(loss_test_final) iter = 0 total_rounds_dcfl = 0 pre_net_glob = copy.deepcopy(net_glob) while abs(while_counter - Goal_Loss) >= 0.05: selected_clients_costs_round = [] w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) counter_threshold = 0 x = net_glob x.eval() acc_test_global, loss_test_global = test_img(x, valid_ds, args) Loss_local_each_global_total.append(acc_test_global) Global_Accuracy_Tracker[iter] = acc_test_global Global_Loss_Tracker[iter] = loss_test_global if iter > 0 & (Global_Loss_Tracker[iter-1] - Global_Loss_Tracker[iter] <= beta): threshold = threshold - gamma if threshold == 0.0: threshold = 1.0 workers_count = 0 temp_w_locals = [] temp_workers_loss = np.zeros(args.num_users) temp_workers_accuracy = np.zeros(args.num_users) temp_workers_loss_test = np.zeros(args.num_users) temp_workers_loss_difference = np.zeros(args.num_users) flag = np.zeros(args.num_users) list_of_random_workers_newfl = [] if iter < (args.epochs): for key, value in dict_workers_index.items(): if key == iter: list_of_random_workers_newfl = dict_workers_index[key] else: list_of_random_workers_newfl = random.sample(workers, m) for idx in list_of_random_workers_newfl: initial_global_model = copy.deepcopy(net_glob).to(args.device) initial_global_model.eval() acc_test_local_initial, loss_test_local_initial = test_img(initial_global_model, valid_ds, args) local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_data[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) temp_w_locals.append(copy.deepcopy(w)) temp_workers_loss[idx] = copy.deepcopy(loss) temp.load_state_dict(w) temp.eval() acc_test_local_after, loss_test_local_after = test_img(temp, valid_ds, args) loss_workers_total[idx, iter] = loss_test_local_after temp_workers_accuracy[idx] = acc_test_local_after temp_workers_loss_test[idx] = loss_test_local_after temp_workers_loss_difference[idx] = abs(loss_test_local_after - loss_test_local_initial) while len(w_locals) < 1: index = 0 for idx in list_of_random_workers_newfl: if workers_count >= m: break elif temp_workers_loss_test[idx] <= threshold and flag[idx]==0 and cost[idx] <= optimal_delay: w_locals.append(copy.deepcopy(temp_w_locals[index])) loss_locals.append(temp_workers_loss[idx]) flag[idx] = 1 workers_count += 1 workers_participation[idx][iter] = 1 selected_clients_costs_round.append(cost[idx]) index += 1 if len(w_locals) < 1: threshold = threshold * 2 # update global weights w_glob = FedAvg(w_locals) # for n in range(args.num_users - len(w_locals)): # w_locals.append(pre_net_glob.state_dict()) # w_glob = fed_avg(w_locals, n_k) # copy weight to net_glob net_glob.load_state_dict(w_glob) loss_avg = sum(loss_locals) / len(loss_locals) loss_train.append(loss_avg) workers_percent_dist.append(workers_count/args.num_users) counter_threshold_decrease[iter] = counter_threshold print(iter, " round dist fl finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = loss_test_final data_Global_DCFL["Round"].append(iter) data_Global_DCFL["C"].append(args.frac) data_Global_DCFL["Average Loss Train"].append(loss_avg) data_Global_DCFL["SDS Accuracy"].append(Global_Accuracy_Tracker[iter]) data_Global_DCFL["SDS Loss"].append(Global_Loss_Tracker[iter]) data_Global_DCFL["Workers Number"].append(workers_count) data_Global_DCFL["Large Test Loss"].append(float(loss_test_final)) data_Global_DCFL["Large Test Accuracy"].append(float(acc_test_final)) data_Global_DCFL["Communication Cost"].append(sum(selected_clients_costs_round)) selected_clients_costs_total.append(sum(selected_clients_costs_round)) iter += 1 total_rounds_dcfl = iter pre_net_glob = copy.deepcopy(net_glob) # plot workers percent of participating workers_percent_final = np.zeros(args.num_users) workers_name = np.zeros(args.num_users) for i in range(len(workers_participation[:, 1])): workers_percent_final[i] = sum(workers_participation[i, :]) / (iter - 1) workers_name[i] = i # selected_clients_costs_total.append(sum(selected_clients_costs_round)) # testing net_glob.eval() acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) Final_LargeDataSetTest_DCFL["C"].append(args.frac) Final_LargeDataSetTest_DCFL["Test Loss"].append(float(loss_test_final)) Final_LargeDataSetTest_DCFL["Test Accuracy"].append(float(acc_test_final)) Final_LargeDataSetTest_DCFL["Train Loss"].append(float(loss_train_final)) Final_LargeDataSetTest_DCFL["Train Accuracy"].append(float(acc_train_final)) Final_LargeDataSetTest_DCFL["Total Rounds"].append(int(total_rounds_dcfl)) Final_LargeDataSetTest_DCFL["Communication Cost"].append(sum(selected_clients_costs_total)) return Final_LargeDataSetTest_DCFL, data_Global_DCFL
dataset=dataset_train, idxs=dict_users[idx]) local15 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local20 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local25 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) local30 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) w1, loss1 = local1.train( net=copy.deepcopy(net_glob1).to(args.device)) w5, loss5 = local5.train( net=copy.deepcopy(net_glob5).to(args.device)) w10, loss10 = local10.train( net=copy.deepcopy(net_glob10).to(args.device)) w15, loss15 = local15.train( net=copy.deepcopy(net_glob15).to(args.device)) w20, loss20 = local20.train( net=copy.deepcopy(net_glob20).to(args.device)) w25, loss25 = local25.train( net=copy.deepcopy(net_glob25).to(args.device)) w30, loss30 = local30.train( net=copy.deepcopy(net_glob30).to(args.device)) print("***BLAH BLAH BLAH***")
if args.new: print('======created a new model========:\n', net_local) else: checkpoint = torch.load(args.base_file) net_local.load_state_dict(checkpoint['state_dict']) print(type(dataset_train)) data_weight = len(dataset_train) / args.num_users / 100 if args.random_idx: idx = random.randint(0, args.num_users - 1) else: idx = args.idx local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_local)) print(loss) net_local.load_state_dict(w) #Here let's just define the trained portion of train_set for finding acccuracy acc_train, loss_train = test_img( net_local, DatasetSplit(dataset_train, dict_users[idx]), args) #acc_train, loss_train = test_img(net_local, dataset_train, args) acc_test, loss_test = test_img(net_local, dataset_test, args) print("Training accuracy: {:.2f}".format(acc_train)) print("Testing accuracy: {:.2f}".format(acc_test)) #w_locals.append(copy.deepcopy(w)) #loss_locals.append(copy.deepcopy(loss)) #w['epoch']=0