""" # ============== TRAIN ============== global_model.train() m = max(int(args.frac * args.num_users), 1) # C = args.frac. Setting number of clients m for training idxs_users = np.random.choice( range(args.num_users), m, replace=False ) # args.num_users=100 total clients. Choosing a random array of indices. Subset of clients. for idx in idxs_users: # For each client in the subset. local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( # update_weights() contain multiple prints model=copy.deepcopy(global_model), global_round=epoch) # w = local model weights local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # Averaging m local client weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Performance measure # ============== EVAL ============== # Calculate avg training accuracy over all users at every epoch
def main_test(args): start_time = time.time() now = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S') # define paths logger = SummaryWriter('../logs') # easydict 사용하는 경우 주석처리 # args = args_parser() # checkpoint 생성위치 args.save_path = os.path.join(args.save_path, args.exp_folder) if not os.path.exists(args.save_path): os.makedirs(args.save_path) save_path_tmp = os.path.join(args.save_path, 'tmp_{}'.format(now)) if not os.path.exists(save_path_tmp): os.makedirs(save_path_tmp) SAVE_PATH = os.path.join(args.save_path, '{}_{}_T[{}]_C[{}]_iid[{}]_E[{}]_B[{}]'. format(args.dataset, args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs)) # 시드 고정 torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) # torch.cuda.set_device(0) device = torch.device("cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") cpu_device = torch.device('cpu') # log 파일 생성 log_path = os.path.join('../logs', args.exp_folder) if not os.path.exists(log_path): os.makedirs(log_path) loggertxt = get_logger( os.path.join(log_path, '{}_{}_{}_{}.log'.format(args.model, args.optimizer, args.norm, now))) logging.info(args) # csv csv_save = '../csv/' + now csv_path = os.path.join(csv_save, 'accuracy.csv') csv_logger_keys = ['train_loss', 'accuracy'] csvlogger = CSVLogger(csv_path, csv_logger_keys) # load dataset and user groups train_dataset, test_dataset, client_loader_dict = get_dataset(args) # cifar-100의 경우 자동 설정 if args.dataset == 'cifar100': args.num_classes = 100 # BUILD MODEL if args.model == 'cnn': # Convolutional neural network if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.dataset == 'cifar100': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) elif args.model == 'cnn_vc': global_model = CNNCifar_fedVC(args=args) elif args.model == 'cnn_vcbn': global_model = CNNCifar_VCBN(args=args) elif args.model == 'cnn_vcgn': global_model = CNNCifar_VCGN(args=args) elif args.model == 'resnet18_ws': global_model = resnet18(num_classes=args.num_classes, weight_stand=1) elif args.model == 'resnet18': global_model = resnet18(num_classes=args.num_classes, weight_stand=0) elif args.model == 'resnet32': global_model = ResNet32_test(num_classes=args.num_classes) elif args.model == 'resnet18_mabn': global_model = resnet18_mabn(num_classes=args.num_classes) elif args.model == 'vgg': global_model = vgg11() elif args.model == 'cnn_ws': global_model = CNNCifar_WS(args=args) else: exit('Error: unrecognized model') # Set the model to train and send it to device. loggertxt.info(global_model) # fedBN처럼 gn no communication 용 client_models = [copy.deepcopy(global_model) for idx in range(args.num_users)] # copy weights global_weights = global_model.state_dict() global_model.to(device) global_model.train() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] # how does help BN 확인용 client_loss = [[] for i in range(args.num_users)] client_conv_grad = [[] for i in range(args.num_users)] client_fc_grad = [[] for i in range(args.num_users)] client_total_grad_norm = [[] for i in range(args.num_users)] # 전체 loss 추적용 -how does help BN # 재시작 if args.resume: checkpoint = torch.load(SAVE_PATH) global_model.load_state_dict(checkpoint['global_model']) if args.hold_normalize: for client_idx in range(args.num_users): client_models[client_idx].load_state_dict(checkpoint['model_{}'.format(client_idx)]) else: for client_idx in range(args.num_users): client_models[client_idx].load_state_dict(checkpoint['global_model']) resume_iter = int(checkpoint['a_iter']) + 1 print('Resume trainig form epoch {}'.format(resume_iter)) else: resume_iter = 0 # learning rate scheduler #scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, gamma=0.1,step_size=500) # start training for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] if args.verbose: print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: """ for key in global_model.state_dict().keys(): if args.hold_normalize: if 'bn' not in key: client_models[idx].state_dict()[key].data.copy_(global_model.state_dict()[key]) else: client_models[idx].state_dict()[key].data.copy_(global_model.state_dict()[key]) """ torch.cuda.empty_cache() local_model = LocalUpdate(args=args, logger=logger, train_loader=client_loader_dict[idx], device=device) w, loss, batch_loss, conv_grad, fc_grad, total_gard_norm = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch, idx_user=idx) local_weights.append(copy.deepcopy(w)) # client의 1 epoch에서의 평균 loss값 ex)0.35(즉, batch loss들의 평균) local_losses.append(copy.deepcopy(loss)) # 전체 round scheduler # scheduler.step() # loss graph용 -> client당 loss값 진행 저장 -> 모두 client별로 저장. client_loss[idx].append(batch_loss) client_conv_grad[idx].append(conv_grad) client_fc_grad[idx].append(fc_grad) client_total_grad_norm[idx].append(total_gard_norm) # print(total_gard_norm) # gn, bn 복사 # client_models[idx].load_state_dict(w) del local_model del w # update global weights global_weights = average_weights(local_weights, client_loader_dict, idxs_users) # update global weights # opt = OptRepo.name2cls('adam')(global_model.parameters(), lr=0.01, betas=(0.9, 0.99), eps=1e-3) opt = OptRepo.name2cls('sgd')(global_model.parameters(), lr=10, momentum=0.9) opt.zero_grad() opt_state = opt.state_dict() global_weights = aggregation(global_weights, global_model) global_model.load_state_dict(global_weights) opt = OptRepo.name2cls('sgd')(global_model.parameters(), lr=10, momentum=0.9) # opt = OptRepo.name2cls('adam')(global_model.parameters(), lr=0.01, betas=(0.9, 0.99), eps=1e-3) opt.load_state_dict(opt_state) opt.step() loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) global_model.eval() # for c in range(args.num_users): # local_model = LocalUpdate(args=args, dataset=train_dataset, # idxs=user_groups[idx], logger=logger) # acc, loss = local_model.inference(model=global_model) # list_acc.append(acc) # list_loss.append(loss) # train_accuracy.append(sum(list_acc)/len(list_acc)) train_accuracy = test_inference(args, global_model, test_dataset, device=device) val_acc_list.append(train_accuracy) # print global training loss after every 'i' rounds # if (epoch+1) % print_every == 0: loggertxt.info(f' \nAvg Training Stats after {epoch + 1} global rounds:') loggertxt.info(f'Training Loss : {loss_avg}') loggertxt.info('Train Accuracy: {:.2f}% \n'.format(100 * train_accuracy)) csvlogger.write_row([loss_avg, 100 * train_accuracy]) if (epoch + 1) % 100 == 0: tmp_save_path = os.path.join(save_path_tmp, 'tmp_{}.pt'.format(epoch+1)) torch.save(global_model.state_dict(),tmp_save_path) # Test inference after completion of training test_acc = test_inference(args, global_model, test_dataset, device=device) print(' Saving checkpoints to {}...'.format(SAVE_PATH)) if args.hold_normalize: client_dict = {} for idx, model in enumerate(client_models): client_dict['model_{}'.format(idx)] = model.state_dict() torch.save(client_dict, SAVE_PATH) else: torch.save({'global_model': global_model.state_dict()}, SAVE_PATH) loggertxt.info(f' \n Results after {args.epochs} global rounds of training:') # loggertxt.info("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1])) loggertxt.info("|---- Test Accuracy: {:.2f}%".format(100 * test_acc)) # frac이 1이 아닐경우 잘 작동하지않음. # batch_loss_list = np.array(client_loss).sum(axis=0) / args.num_users # conv_grad_list = np.array(client_conv_grad).sum(axis=0) / args.num_users # fc_grad_list = np.array(client_fc_grad).sum(axis=0) / args.num_users # total_grad_list = np.array(client_total_grad_norm).sum(axis=0) /args.num_users # client의 avg를 구하고 싶었으나 현재는 client 0만 확인 # client마다 batch가 다를 경우 bug 예상 return train_loss, val_acc_list, client_loss[0], client_conv_grad[0], client_fc_grad[0], client_total_grad_norm[0]
for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, batch_loss, conv_grad, fc_grad = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch, idx_user=idx) local_weights.append(copy.deepcopy(w)) # client의 1epoch에서의 평균 loss값 ex)0.35(즉, batch loss들의 평균) local_losses.append(copy.deepcopy(loss)) # loss graph용 -> client당 loss값 진행 저장 client_loss[idx].append(batch_loss) client_conv_grad[idx].append(conv_grad) client_fc_grad[idx].append(fc_grad) #loggergrad.info('user:{} , total_gradient_norm:{}'.format(idx, log_grad)) # update global weights global_weights = average_weights(local_weights, user_groups, idxs_users)
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys()) user_weights = [None for _ in range(len(user_list))] user_assignments = [i % args.clusters for i in range(len(user_list))] # global_model.to(device) # global_weights = global_model.state_dict() global_models = [copy.deepcopy(global_model) for _ in range(args.clusters)] for m in global_models: m.to(device) # if args.frac == -1: # m = args.cpr # if m > len(user_list): # raise ValueError(f"Clients Per Round: {args.cpr} is greater than number of users: {len(user_list)}") # else: # m = max(int(args.frac * len(user_list)), 1) # print(f"Training {m} users each round") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): print(f"Global Training Round: {epoch + 1}/{args.epochs}") local_losses = [] for modelidx, cluster_model in tqdm(enumerate(global_models)): local_weights = [] for useridx, (user, user_assign) in enumerate( zip(user_list, user_assignments)): if user_assign == modelidx: local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model)) local_weights.append(w) local_losses.append(loss) user_weights[useridx] = w if local_weights: cluster_model.load_state_dict(average_weights(local_weights)) train_loss.append(sum(local_losses) / len(local_losses)) # sampled_users = random.sample(user_list, m) # for user in tqdm(sampled_users): # FedSEM cluster reassignment step print(f"Calculating User Assignments") dists = np.zeros((len(user_list), len(global_models))) for cidx, cluster_model in enumerate(global_models): for ridx, user_weight in enumerate(user_weights): dists[ridx, cidx] = weight_dist(user_weight, cluster_model.state_dict()) user_assignments = list(np.argmin(dists, axis=1)) print("Cluster: number of clients in that cluster index") print(Counter(user_assignments)) print(f"") # Calculate avg training accuracy over all users at every epoch test_acc, test_loss = [], [] for modelidx, cluster_model in enumerate(global_models): local_weights = [] for user, user_assign in zip(user_list, user_assignments): if modelidx == user_assign: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=cluster_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
def poisoned_1to7_NoDefense(seed=1): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates print("Evil norms:") for idx in idxs_users[0:args.nb_attackers]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.poisoned_1to7( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print(zeta) # Non-adversarial updates print("Good norms:") for idx in idxs_users[args.nb_attackers:]: print(idx) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print(zeta) # average local updates average_del_w = average_weights(local_del_w) # Update global model: w_{t+1} = w_{t} + average_del_w for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w global_model.load_state_dict(global_weights) # test accuracy, backdoor accuracy test_acc, test_loss, back_acc = test_inference1to7( args, global_model, test_dataset) testing_accuracy.append(test_acc) backdoor_accuracy.append(back_acc) print("Test & Backdoor accuracy") print(testing_accuracy) print(backdoor_accuracy) # save accuracy np.savetxt( '../save/1to7Attack/TestAcc/NoDefense_{}_{}_seed{}.txt'.format( args.dataset, args.model, s), testing_accuracy) np.savetxt( '../save/1to7Attack/BackAcc/NoDefense_{}_{}_seed{}.txt'.format( args.dataset, args.model, s), backdoor_accuracy)
def poisoned_pixel_CDP(norm_bound, noise_scale, nb_attackers, seed=1): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # load poisoned model backdoor_model = copy.deepcopy(global_model) backdoor_model.load_state_dict(torch.load('../save/poison_model.pth')) # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates print("Evil") for idx in idxs_users[0:nb_attackers]: # backdoor model w = copy.deepcopy(backdoor_model) # compute change in parameters and norm zeta = 0 for del_w, w_old in zip(w.parameters(), global_model.parameters()): del_w.data = del_w.data - copy.deepcopy(w_old.data) zeta += del_w.norm(2).item()**2 zeta = zeta**(1. / 2) del_w = w.state_dict() print("EVIL") print(zeta) # add to global round local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # Non-adversarial updates for idx in idxs_users[nb_attackers:]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print("good") #print(zeta) # norm bound (e.g. median of norms) clip_factor = norm_bound #min(norm_bound, np.median(local_norms)) print(clip_factor) # clip updates for i in range(len(idxs_users)): for param in local_del_w[i].values(): print(max(1, local_norms[i] / clip_factor)) param /= max(1, local_norms[i] / clip_factor) # average local model updates average_del_w = average_weights(local_del_w) # Update model and add noise # w_{t+1} = w_{t} + avg(del_w1 + del_w2 + ... + del_wc) + Noise for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w param += torch.randn( param.size()) * noise_scale * norm_bound / len(idxs_users) global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss, backdoor = test_backdoor_pixel( args, global_model, test_dataset) testing_accuracy.append(test_acc) backdoor_accuracy.append(backdoor) print("Testing & Backdoor accuracies") print(testing_accuracy) print(backdoor_accuracy) # save test accuracy np.savetxt( '../save/PixelAttack/TestAcc/iid_GDP_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), testing_accuracy) np.savetxt( '../save/PixelAttack/BackdoorAcc/iid_GDP_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), backdoor_accuracy)
print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) num_data_per_client.update((key, len(value)) for key, value in user_groups.items() if key in idxs_users) for idx in idxs_users: rm_list, rv_list = [], [] local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, accuracy, optimizer = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) w_copy = copy.deepcopy(w) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) local_accuracies.append(copy.deepcopy(accuracy)) # Saving the objects train_loss and train_accuracy: if (epoch + 1) % save_every == 0: save_model(epoch + 1, global_model, optimizer, filepath) # update global weights global_weights = average_weights_baseline(local_weights)
print_every = 5 val_loss_pre, counter = 0, 0 # tqdm进度条功能 progress bar for epoch in tqdm(range(args.epochs)): print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() # 设置成训练模式 idxs_users = range(args.num_users) for idx in idxs_users: print("Training at user %d/%d." % (idx + 1, args.num_users)) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, global_model = local_model.update_weights( model=global_model, global_round=epoch) # update global weights将下个模型要用的模型改成上一个模型的初始值 # global_model.load_state_dict(w) # loss_avg = sum(local_losses) / len(local_losses) # train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() # for c in range(args.num_users): # local_model = LocalUpdate(args=args, dataset=train_dataset, # idxs=user_groups[idx], logger=logger) # 只是返回了local_model的类 # acc, loss = local_model.inference(model=global_model) # 这一步只是用了local_model的数据集,即用global_model在training dataset上做测试 # list_acc.append(acc)
def main(): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() args = adatok.arguments(args) exp_details(args) if args.gpu: torch.cuda.set_device(args.gpu) device = 'cuda' if args.gpu else 'cpu' # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) if adatok.data.image_initialization == True: adatok.data.image_initialization = False return # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() #print(global_model) # copy weights global_weights = global_model.state_dict() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 2 val_loss_pre, counter = 0, 0 for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] #print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() for c in range(args.num_users): local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) acc, loss = local_model.inference(model=global_model) list_acc.append(acc) list_loss.append(loss) train_accuracy.append(sum(list_acc) / len(list_acc)) # print global training loss after every 'i' rounds '''if (epoch+1) % print_every == 0: print(f' \nAvg Training Stats after {epoch+1} global rounds:') print(f'Training Loss : {np.mean(np.array(train_loss))}') print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))''' # Test inference after completion of training for i in adatok.data.test_groups_in_binary: adatok.data.actual_test_group_in_binary = i test_acc, test_loss = test_inference(args, global_model, test_dataset) print("Resoults") print(epoch) print(adatok.data.actual_train_group_in_binary) print(adatok.data.actual_test_group_in_binary) print(test_acc) print(test_loss) '''
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys()) global_model.to(device) global_weights = global_model.state_dict() if args.frac == -1: m = args.cpr if m > len(user_list): raise ValueError( f"Clients Per Round: {args.cpr} is greater than number of users: {len(user_list)}" ) else: m = max(int(args.frac * len(user_list)), 1) print(f"Training {m} users each round") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): local_weights, local_losses = [], [] print(f"Global Training Round: {epoch + 1}/{args.epochs}") if args.sample_dist == "uniform": sampled_users = random.sample(user_list, m) else: xs = np.linspace(-args.sigm_domain, args.sigm_domain, len(user_list)) sigmdist = 1 / (1 + np.exp(-xs)) sampled_users = np.random.choice(user_list, m, p=sigmdist / sigmdist.sum()) for user in tqdm(sampled_users): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights(copy.deepcopy(global_model)) local_weights.append(copy.deepcopy(w)) local_losses.append(loss) # update global weights global_weights = average_weights(local_weights) global_model.load_state_dict(global_weights) train_loss.append(sum(local_losses) / len(local_losses)) # Calculate avg training accuracy over all users at every epoch test_acc, test_loss = [], [] for user in user_list: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=global_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
epoch = comm.recv( source=0, tag=rank ) # receive the epoch/communication round that the parameter server is in if epoch == -1: break #The server sends the latest weight aggregate to this worker, #based on which the local model is updated before starting to train. if (epoch < args.epochs - 1) and (epoch > 0): enc_global_aggregate = comm.recv(source=0, tag=rank) ## decrypt and recompose enc_global_aggregate global_aggregate = dec_recompose(enc_global_aggregate) model.load_state_dict(global_aggregate) u_step += 1 # Now perform one iteration w, loss, u_step = local_model.update_weights(model=model, global_round=epoch, u_step=u_step) comm.send(u_step, dest=0, tag=rank) # send the step number if epoch < args.epochs - 1: send_enc( ) # send encrypted model update parameters to the global agent elif epoch == args.epochs - 1: comm.send( w, dest=0, tag=rank ) # send unencrypted model update parameters to the global agent break if rank == 0: # Test inference after completion of training test_acc, test_loss = test_inference(args, global_model, test_dataset)
def poisoned_NoDefense(nb_attackers, seed=1): # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # backdoor model dummy_model = copy.deepcopy(global_model) dummy_model.load_state_dict(torch.load('../save/all_5_model.pth')) dummy_norm = 0 for x in dummy_model.state_dict().values(): dummy_norm += x.norm(2).item() ** 2 dummy_norm = dummy_norm ** (1. / 2) # testing accuracy for global model testing_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w = [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates for idx in idxs_users[0:nb_attackers]: print("evil") local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) #del_w, _ = local_model.poisoned_SGA(model=copy.deepcopy(global_model), change=1) w = copy.deepcopy(dummy_model) # compute change in parameters and norm zeta = 0 for del_w, w_old in zip(w.parameters(), global_model.parameters()): del_w.data -= copy.deepcopy(w_old.data) del_w.data *= m / nb_attackers del_w.data += copy.deepcopy(w_old.data) zeta += del_w.norm(2).item() ** 2 zeta = zeta ** (1. / 2) del_w = copy.deepcopy(w.state_dict()) local_del_w.append(copy.deepcopy(del_w)) # Non-adversarial updates for idx in idxs_users[nb_attackers:]: print("good") local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, _ = local_model.update_weights(model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) # average local updates average_del_w = average_weights(local_del_w) # Update global model: w_{t+1} = w_{t} + average_del_w for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss = test_inference(args, global_model, test_dataset) testing_accuracy.append(test_acc) print("Test accuracy") print(testing_accuracy) # save test accuracy np.savetxt('../save/RandomAttack/NoDefense_iid_{}_{}_attackers{}_seed{}.txt'. format(args.dataset, args.model, nb_attackers, s), testing_accuracy)
""" # ============== TRAIN ============== global_model.train() m = max(int(args.frac * args.num_users), 1) # C = args.frac. Setting number of clients m for training idxs_users = np.random.choice( range(args.num_users), m, replace=False ) # args.num_users=100 total clients. Choosing a random array of indices. Subset of clients. for idx in idxs_users: # For each client in the subset. local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( # update_weights() contain multiple prints model=copy.deepcopy(global_model), global_round=epoch, dtype=torch.float16) # w = local model weights local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # Averaging m local client weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Performance measure # ============== EVAL ==============
global_model.train() for r in range(args.num_users): m = max(int(args.frac * args.num_users), 1) # 从num_users个user中随机选取frac部分的用户用于训练 idxs_users = np.random.choice(range(args.num_users), m, replace=False) print("Users selected:", idxs_users) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, t_model = local_model.update_weights( model=copy.deepcopy(models[idx]), global_round=epoch) # print("local losses:",loss) models[idx].load_state_dict(w) version_matrix[idx, idx] = version_matrix[idx, idx] + 1 idx_user = np.random.choice(range(args.num_users), 1, replace=False)[0] v_old = np.reshape(version_matrix[idx_user, :], -1) v_new = np.zeros(args.num_users) for i in range(args.num_users): v_new[i] = version_matrix[i, i] # 模型聚合 w_avg = copy.deepcopy(models[idx_user].state_dict()) n_participants = 1 # 记录参与的模型总数 for i in range(args.num_users):
def main(): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) if args.gpu: torch.cuda.set_device(0) device = 'cuda' if args.gpu else 'cpu' # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) args.num_users = len(user_groups) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() # copy weights global_weights = global_model.state_dict() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 2 val_loss_pre, counter = 0, 0 #Beolvassuk, hogy éppen mely résztvevők vesznek részt a tanításban (0 jelentése, hogy benne van, 1 az hogy nincs) users = [] fp = open('users.txt', "r") x = fp.readline().split(' ') for i in x: if i != '': users.append(int(i)) fp.close() #for epoch in tqdm(range(args.epochs)): for epoch in range(args.epochs): local_weights, local_losses = [], [] #print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() for c in range(args.num_users): local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) acc, loss = local_model.inference(model=global_model) list_acc.append(acc) list_loss.append(loss) train_accuracy.append(sum(list_acc) / len(list_acc)) # print global training loss after every 'i' rounds '''if (epoch+1) % print_every == 0: print(f' \nAvg Training Stats after {epoch+1} global rounds:') print(f'Training Loss : {np.mean(np.array(train_loss))}') print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))''' # Test inference after completion of training #Beolvassuk hogy mely résztvevőnek mely labeleket osztottuk ki. ftrain = open('traindataset.txt') testlabels = [] line = ftrain.readline() while line != "": sor = line.split(' ') array = [] for i in sor: array.append(int(i)) testlabels.append(array) line = ftrain.readline() ftrain.close() print("USERS LABELS") print(testlabels) #Minden lehetséges koalícióra lefut a tesztelés for j in range((2**args.num_users) - 1): binary = numberToBinary(j, len(users)) test_acc, test_loss = test_inference(args, global_model, test_dataset, testlabels, binary, len(binary)) #Teszt eredmények kiírása print("RESZTVEVOK") print(users) print("TEST NUMBER") print(j) print("TEST BINARY") print(binary) print("TEST LABELS") print(testlabels) print("Test Accuracy") print("{:.2f}%".format(100 * test_acc)) print() # Saving the objects train_loss and train_accuracy: '''file_name = '../save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.\
# Extract baseline model weights baseline_weights = extract_weights(global_model) # record number of samples num_samples_list = [] for idx in idxs_users: idxs = user_groups[idx] num_samples_list.append(len(idxs)) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, local_model_weight, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch ) # clients send local model weights to server # compute local delta weights w_s.append(copy.deepcopy(w)) local_delta_update = [] for i, (name, weight) in enumerate(local_model_weight): bl_name, baseline = baseline_weights[i] # Ensure correct weight is being updated assert name == bl_name # Calculate update delta = weight - baseline local_delta_update.append((name, delta))
local_weights, local_losses, local_accuracies, local_bn_rm, local_bn_rv = [], [], [], [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) #idxs_users = np.random.choice(range(args.num_users), m, replace=False) #idxs_users = [76, 54, 80, 33, 85, 84, 5, 73, 12, 91] num_users = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] idxs_users = np.random.choice(range(30), m, replace=False) for idx in idxs_users: print("======================================================== client id : ", idxs_users) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, accuracy, list_rm, list_rv = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) #print("---------------------------", loss) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) local_accuracies.append(copy.deepcopy(accuracy)) ''' print(list_rm.count(None)) if(list_rm.count(None) == 0): local_BN_Statistics.append(list_rm) rm_dict[idx] = list_rm rv_dict[idx] = list_rv ''' rm_dict[idx].append(list_rm) rv_dict[idx].append(list_rv)
def poisoned_random_CDP(seed=1): # Central DP to protect against attackers start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversaries' update for idx in idxs_users[0:args.nb_attackers]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.poisoned_1to7( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # Non-adversary updates for idx in idxs_users[args.nb_attackers:]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # norm bound (e.g. median of norms) median_norms = args.norm_bound #np.median(local_norms) #args.norm_bound print(median_norms) # clip weight updates for i in range(len(idxs_users)): for param in local_del_w[i].values(): param /= max(1, local_norms[i] / median_norms) # average the clipped weight updates average_del_w = average_weights(local_del_w) # Update global model using clipped weight updates, and add noise # w_{t+1} = w_{t} + avg(del_w1 + del_w2 + ... + del_wc) + Noise for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w param += torch.randn( param.size()) * args.noise_scale * median_norms / ( len(idxs_users)**0.5) global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss = test_inference(args, global_model, test_dataset) testing_accuracy.append(test_acc) print("Test accuracy") print(testing_accuracy) # save test accuracy np.savetxt( '../save/1to7Attack/GDP_{}_{}_seed{}_clip{}_scale{}.txt'.format( args.dataset, args.model, s, args.norm_bound, args.noise_scale), testing_accuracy)
for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) # Update local model idx del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # median of norms median_norms = 100 #np.median(local_norms) # clip norms #for i in range(len(idxs_users)): # for param in local_del_w[i].values(): # param /= max(1, local_norms[i] / median_norms) # average local model weights average_del_w = average_weights(local_del_w) # Update model and add noise
local_weights, local_losses = [], [] # print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) count = 0 for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model)) # inverting the gradient if cheat[idx] == -1: for key in w: w[key] = 2 * global_model.state_dict()[key] - w[key] # weightening the contribution if args.weight != 0.: for key in w: w[key] = global_model.state_dict()[key] + ( w[key] - global_model.state_dict()[key]) * weight[epoch, idx] # always except freeriding add the new weights to the list if cheat[idx] != 1:
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys())[:100] nusers = len(user_list) cluster_models = [copy.deepcopy(global_model)] del global_model cluster_models[0].to(device) cluster_assignments = [ user_list.copy() ] # all users assigned to single cluster_model in beginning if args.cfl_wsharing: shaccumulator = Accumulator() if args.frac == -1: m = args.cpr if m > nusers: raise ValueError( f"Clients Per Round: {args.cpr} is greater than number of users: {nusers}" ) else: m = max(int(args.frac * nusers), 1) print(f"Training {m} users each round") print(f"Trying to split after every {args.cfl_split_every} rounds") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): # CFL if (epoch + 1) % args.cfl_split_every == 0: all_losses = [] new_cluster_models, new_cluster_assignments = [], [] for cidx, (cluster_model, assignments) in enumerate( tzip(cluster_models, cluster_assignments, desc="Try to split each cluster")): # First, train all models in cluster local_weights = [] for user in tqdm(assignments, desc="Train ALL users in the cluster", leave=False): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model), local_ep_override=args.cfl_local_epochs) local_weights.append(copy.deepcopy(w)) all_losses.append(loss) # record shared weights so far if args.cfl_wsharing: shaccumulator.add(local_weights) weight_updates = subtract_weights(local_weights, cluster_model.state_dict(), args) similarities = pairwise_cossim(weight_updates) max_norm = compute_max_update_norm(weight_updates) mean_norm = compute_mean_update_norm(weight_updates) # wandb.log({"mean_norm / eps1": mean_norm, "max_norm / eps2": max_norm}, commit=False) split = mean_norm < args.cfl_e1 and max_norm > args.cfl_e2 and len( assignments) > args.cfl_min_size print(f"CIDX: {cidx}[{len(assignments)}] elem") print( f"mean_norm: {(mean_norm):.4f}; max_norm: {(max_norm):.4f}" ) print(f"split? {split}") if split: c1, c2 = cluster_clients(similarities) assignments1 = [assignments[i] for i in c1] assignments2 = [assignments[i] for i in c2] new_cluster_assignments += [assignments1, assignments2] print( f"Cluster[{cidx}][{len(assignments)}] -> ({len(assignments1)}, {len(assignments2)})" ) local_weights1 = [local_weights[i] for i in c1] local_weights2 = [local_weights[i] for i in c2] cluster_model.load_state_dict( average_weights(local_weights1)) new_cluster_models.append(cluster_model) cluster_model2 = copy.deepcopy(cluster_model) cluster_model2.load_state_dict( average_weights(local_weights2)) new_cluster_models.append(cluster_model2) else: cluster_model.load_state_dict( average_weights(local_weights)) new_cluster_models.append(cluster_model) new_cluster_assignments.append(assignments) # Write everything cluster_models = new_cluster_models if args.cfl_wsharing: shaccumulator.write(cluster_models) shaccumulator.flush() cluster_assignments = new_cluster_assignments train_loss.append(sum(all_losses) / len(all_losses)) # Regular FedAvg else: all_losses = [] # Do FedAvg for each cluster for cluster_model, assignments in tzip( cluster_models, cluster_assignments, desc="Train each cluster through FedAvg"): if args.sample_dist == "uniform": sampled_users = random.sample(assignments, m) else: xs = np.linspace(-args.sigm_domain, args.sigm_domain, len(assignments)) sigmdist = 1 / (1 + np.exp(-xs)) sampled_users = np.random.choice(assignments, m, p=sigmdist / sigmdist.sum()) local_weights = [] for user in tqdm(sampled_users, desc="Training Selected Users", leave=False): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model)) local_weights.append(copy.deepcopy(w)) all_losses.append(loss) # update global and shared weights if args.cfl_wsharing: shaccumulator.add(local_weights) new_cluster_weights = average_weights(local_weights) cluster_model.load_state_dict(new_cluster_weights) if args.cfl_wsharing: shaccumulator.write(cluster_models) shaccumulator.flush() train_loss.append(sum(all_losses) / len(all_losses)) # Calculate avg training accuracy over all users at every epoch # regardless if it was a CFL step or not test_acc, test_loss = [], [] for cluster_model, assignments in zip(cluster_models, cluster_assignments): for user in assignments: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=cluster_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]), "Clusters": len(cluster_models) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
val_loss_pre, counter = 0, 0 for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval()
print_every = 2 val_loss_pre, counter = 0, 0 for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: # print("user id ", idx) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( copy.deepcopy(global_model), epoch, args) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) print('Round {:3d}, Average loss {:.3f}'.format(epoch, loss_avg)) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], []