for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() for r in range(args.num_users): m = max(int(args.frac * args.num_users), 1) # 从num_users个user中随机选取frac部分的用户用于训练 idxs_users = np.random.choice(range(args.num_users), m, replace=False) print("Users selected:", idxs_users) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, t_model = local_model.update_weights( model=copy.deepcopy(models[idx]), global_round=epoch) # print("local losses:",loss) models[idx].load_state_dict(w) version_matrix[idx, idx] = version_matrix[idx, idx] + 1 idx_user = np.random.choice(range(args.num_users), 1, replace=False)[0] v_old = np.reshape(version_matrix[idx_user, :], -1) v_new = np.zeros(args.num_users) for i in range(args.num_users): v_new[i] = version_matrix[i, i] # 模型聚合
loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # ============== EVAL ============== # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() # print("========== idx ========== ", idx) for c in range(args.num_users): # for c in range(cluster_size): # C = np.random.choice(keylist, int(args.frac * args.num_users), replace=False) # random set of clients # print("C: ", C) # for c in C: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[c], logger=logger) acc, loss = local_model.inference(model=global_model, dtype=torch.float16) list_acc.append(acc) list_loss.append(loss) train_accuracy.append(sum(list_acc) / len(list_acc)) # Add testacc_check = 100 * train_accuracy[-1] epoch = epoch + 1 # print global training loss after every 'i' rounds if (epoch + 1) % print_every == 0: print(f' \nAvg Training Stats after {epoch+1} global rounds:') print(f'Training Loss : {np.mean(np.array(train_loss))}') print('Train Accuracy: {:.2f}% \n'.format(100 *
NUM_TRAINING_IMAGES = 60000 priv_accountant = accountant.GaussianMomentsAccountant(args.num_users) for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) l2norms = [] for idx in idxs_users: print("user id ", idx) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, l2norm = local_model.central_update_weights(copy.deepcopy(global_model), epoch, args) l2norms.append(l2norm) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) sensitivity = statistics.median(l2norms).item() args.sigma = (sensitivity * args.noise_scale) / (args.frac * args.num_users) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) # add noise
def poisoned_NoDefense(nb_attackers, seed=1): # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # backdoor model dummy_model = copy.deepcopy(global_model) dummy_model.load_state_dict(torch.load('../save/all_5_model.pth')) dummy_norm = 0 for x in dummy_model.state_dict().values(): dummy_norm += x.norm(2).item() ** 2 dummy_norm = dummy_norm ** (1. / 2) # testing accuracy for global model testing_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w = [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates for idx in idxs_users[0:nb_attackers]: print("evil") local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) #del_w, _ = local_model.poisoned_SGA(model=copy.deepcopy(global_model), change=1) w = copy.deepcopy(dummy_model) # compute change in parameters and norm zeta = 0 for del_w, w_old in zip(w.parameters(), global_model.parameters()): del_w.data -= copy.deepcopy(w_old.data) del_w.data *= m / nb_attackers del_w.data += copy.deepcopy(w_old.data) zeta += del_w.norm(2).item() ** 2 zeta = zeta ** (1. / 2) del_w = copy.deepcopy(w.state_dict()) local_del_w.append(copy.deepcopy(del_w)) # Non-adversarial updates for idx in idxs_users[nb_attackers:]: print("good") local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, _ = local_model.update_weights(model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) # average local updates average_del_w = average_weights(local_del_w) # Update global model: w_{t+1} = w_{t} + average_del_w for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss = test_inference(args, global_model, test_dataset) testing_accuracy.append(test_acc) print("Test accuracy") print(testing_accuracy) # save test accuracy np.savetxt('../save/RandomAttack/NoDefense_iid_{}_{}_attackers{}_seed{}.txt'. format(args.dataset, args.model, nb_attackers, s), testing_accuracy)
def FedProto_taskheter(args, train_dataset, test_dataset, user_groups, user_groups_lt, local_model_list, classes_list): summary_writer = SummaryWriter('../tensorboard/' + args.dataset + '_fedproto_' + str(args.ways) + 'w' + str(args.shots) + 's' + str(args.stdev) + 'e_' + str(args.num_users) + 'u_' + str(args.rounds) + 'r') global_protos = [] idxs_users = np.arange(args.num_users) train_loss, train_accuracy = [], [] for round in tqdm(range(args.rounds)): local_weights, local_losses, local_protos = [], [], {} print(f'\n | Global Training Round : {round + 1} |\n') proto_loss = 0 for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx]) w, loss, acc, protos = local_model.update_weights_het( args, idx, global_protos, model=copy.deepcopy(local_model_list[idx]), global_round=round) agg_protos = agg_func(protos) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss['total'])) local_protos[idx] = agg_protos summary_writer.add_scalar('Train/Loss/user' + str(idx + 1), loss['total'], round) summary_writer.add_scalar('Train/Loss1/user' + str(idx + 1), loss['1'], round) summary_writer.add_scalar('Train/Loss2/user' + str(idx + 1), loss['2'], round) summary_writer.add_scalar('Train/Acc/user' + str(idx + 1), acc, round) proto_loss += loss['2'] # update global weights local_weights_list = local_weights for idx in idxs_users: local_model = copy.deepcopy(local_model_list[idx]) local_model.load_state_dict(local_weights_list[idx], strict=True) local_model_list[idx] = local_model # update global weights global_protos = proto_aggregation(local_protos) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) acc_list_l, acc_list_g, loss_list = test_inference_new_het_lt( args, local_model_list, test_dataset, classes_list, user_groups_lt, global_protos) print( 'For all users (with protos), mean of test acc is {:.5f}, std of test acc is {:.5f}' .format(np.mean(acc_list_g), np.std(acc_list_g))) print( 'For all users (w/o protos), mean of test acc is {:.5f}, std of test acc is {:.5f}' .format(np.mean(acc_list_l), np.std(acc_list_l))) print( 'For all users (with protos), mean of proto loss is {:.5f}, std of test acc is {:.5f}' .format(np.mean(loss_list), np.std(loss_list))) # save protos if args.dataset == 'mnist': save_protos(args, local_model_list, test_dataset, user_groups_lt)
def poisoned_pixel_LDP(norm_bound, noise_scale, nb_attackers, seed=1): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Poisonous updates for idx in idxs_users[0:nb_attackers]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, _ = local_model.pixel_ldp(model=copy.deepcopy(global_model), norm_bound=norm_bound, noise_scale=noise_scale) local_w.append(copy.deepcopy(w)) # Regular updates for idx in idxs_users[nb_attackers:]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, _ = local_model.dp_sgd(model=copy.deepcopy(global_model), norm_bound=norm_bound, noise_scale=noise_scale) local_w.append(copy.deepcopy(w)) # update global weights global_weights = average_weights(local_w) global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss, backdoor = test_backdoor_pixel( args, global_model, test_dataset) testing_accuracy.append(test_acc) backdoor_accuracy.append(backdoor) print("Testing & Backdoor accuracies") print(testing_accuracy) print(backdoor_accuracy) # save test accuracy np.savetxt( '../save/PixelAttack/TestAcc/LDP_iid_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), testing_accuracy) np.savetxt( '../save/PixelAttack/BackdoorAcc/LDP_iid_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), backdoor_accuracy)
val_loss_pre, counter = 0, 0 # testing accuracy for global model testing_accuracy = [0] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) # Update local model idx del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # median of norms median_norms = 100 #np.median(local_norms) # clip norms #for i in range(len(idxs_users)): # for param in local_del_w[i].values(): # param /= max(1, local_norms[i] / median_norms)
def poisoned_random_CDP(seed=1): # Central DP to protect against attackers start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversaries' update for idx in idxs_users[0:args.nb_attackers]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.poisoned_1to7( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # Non-adversary updates for idx in idxs_users[args.nb_attackers:]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # norm bound (e.g. median of norms) median_norms = args.norm_bound #np.median(local_norms) #args.norm_bound print(median_norms) # clip weight updates for i in range(len(idxs_users)): for param in local_del_w[i].values(): param /= max(1, local_norms[i] / median_norms) # average the clipped weight updates average_del_w = average_weights(local_del_w) # Update global model using clipped weight updates, and add noise # w_{t+1} = w_{t} + avg(del_w1 + del_w2 + ... + del_wc) + Noise for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w param += torch.randn( param.size()) * args.noise_scale * median_norms / ( len(idxs_users)**0.5) global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss = test_inference(args, global_model, test_dataset) testing_accuracy.append(test_acc) print("Test accuracy") print(testing_accuracy) # save test accuracy np.savetxt( '../save/1to7Attack/GDP_{}_{}_seed{}_clip{}_scale{}.txt'.format( args.dataset, args.model, s, args.norm_bound, args.noise_scale), testing_accuracy)
def main(): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) if args.gpu: torch.cuda.set_device(0) device = 'cuda' if args.gpu else 'cpu' # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) args.num_users = len(user_groups) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() # copy weights global_weights = global_model.state_dict() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 2 val_loss_pre, counter = 0, 0 #Beolvassuk, hogy éppen mely résztvevők vesznek részt a tanításban (0 jelentése, hogy benne van, 1 az hogy nincs) users = [] fp = open('users.txt', "r") x = fp.readline().split(' ') for i in x: if i != '': users.append(int(i)) fp.close() #for epoch in tqdm(range(args.epochs)): for epoch in range(args.epochs): local_weights, local_losses = [], [] #print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() for c in range(args.num_users): local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) acc, loss = local_model.inference(model=global_model) list_acc.append(acc) list_loss.append(loss) train_accuracy.append(sum(list_acc) / len(list_acc)) # print global training loss after every 'i' rounds '''if (epoch+1) % print_every == 0: print(f' \nAvg Training Stats after {epoch+1} global rounds:') print(f'Training Loss : {np.mean(np.array(train_loss))}') print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))''' # Test inference after completion of training #Beolvassuk hogy mely résztvevőnek mely labeleket osztottuk ki. ftrain = open('traindataset.txt') testlabels = [] line = ftrain.readline() while line != "": sor = line.split(' ') array = [] for i in sor: array.append(int(i)) testlabels.append(array) line = ftrain.readline() ftrain.close() print("USERS LABELS") print(testlabels) #Minden lehetséges koalícióra lefut a tesztelés for j in range((2**args.num_users) - 1): binary = numberToBinary(j, len(users)) test_acc, test_loss = test_inference(args, global_model, test_dataset, testlabels, binary, len(binary)) #Teszt eredmények kiírása print("RESZTVEVOK") print(users) print("TEST NUMBER") print(j) print("TEST BINARY") print(binary) print("TEST LABELS") print(testlabels) print("Test Accuracy") print("{:.2f}%".format(100 * test_acc)) print() # Saving the objects train_loss and train_accuracy: '''file_name = '../save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.\
for epoch in tqdm(range(args.epochs)): local_weights, local_losses, local_accuracies, local_bn_rm, local_bn_rv = [], [], [], [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) #idxs_users = np.random.choice(range(args.num_users), m, replace=False) #idxs_users = [76, 54, 80, 33, 85, 84, 5, 73, 12, 91] num_users = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30] idxs_users = np.random.choice(range(30), m, replace=False) for idx in idxs_users: print("======================================================== client id : ", idxs_users) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, accuracy, list_rm, list_rv = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) #print("---------------------------", loss) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) local_accuracies.append(copy.deepcopy(accuracy)) ''' print(list_rm.count(None)) if(list_rm.count(None) == 0): local_BN_Statistics.append(list_rm) rm_dict[idx] = list_rm rv_dict[idx] = list_rv '''
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys()) global_model.to(device) global_weights = global_model.state_dict() if args.frac == -1: m = args.cpr if m > len(user_list): raise ValueError( f"Clients Per Round: {args.cpr} is greater than number of users: {len(user_list)}" ) else: m = max(int(args.frac * len(user_list)), 1) print(f"Training {m} users each round") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): local_weights, local_losses = [], [] print(f"Global Training Round: {epoch + 1}/{args.epochs}") if args.sample_dist == "uniform": sampled_users = random.sample(user_list, m) else: xs = np.linspace(-args.sigm_domain, args.sigm_domain, len(user_list)) sigmdist = 1 / (1 + np.exp(-xs)) sampled_users = np.random.choice(user_list, m, p=sigmdist / sigmdist.sum()) for user in tqdm(sampled_users): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights(copy.deepcopy(global_model)) local_weights.append(copy.deepcopy(w)) local_losses.append(loss) # update global weights global_weights = average_weights(local_weights) global_model.load_state_dict(global_weights) train_loss.append(sum(local_losses) / len(local_losses)) # Calculate avg training accuracy over all users at every epoch test_acc, test_loss = [], [] for user in user_list: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=global_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
def main(): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() args = adatok.arguments(args) exp_details(args) if args.gpu: torch.cuda.set_device(args.gpu) device = 'cuda' if args.gpu else 'cpu' # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) if adatok.data.image_initialization == True: adatok.data.image_initialization = False return # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() #print(global_model) # copy weights global_weights = global_model.state_dict() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 2 val_loss_pre, counter = 0, 0 for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] #print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() for c in range(args.num_users): local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) acc, loss = local_model.inference(model=global_model) list_acc.append(acc) list_loss.append(loss) train_accuracy.append(sum(list_acc) / len(list_acc)) # print global training loss after every 'i' rounds '''if (epoch+1) % print_every == 0: print(f' \nAvg Training Stats after {epoch+1} global rounds:') print(f'Training Loss : {np.mean(np.array(train_loss))}') print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))''' # Test inference after completion of training for i in adatok.data.test_groups_in_binary: adatok.data.actual_test_group_in_binary = i test_acc, test_loss = test_inference(args, global_model, test_dataset) print("Resoults") print(epoch) print(adatok.data.actual_train_group_in_binary) print(adatok.data.actual_test_group_in_binary) print(test_acc) print(test_loss) '''
print( "Residue workers: ", str(workers) ) # Printing the ids of workers which are still listening for next round's communication. for i in workers: print("Sending exit signal to residue worker: ", str(i)) comm.send(-1, dest=i, tag=i) break # break out of the epoch loop. elif epoch < args.epochs - 1: # Add the encrypted weights global_weights = average_weights_enc( ) # Add the encrypted weights received elif rank >= 1: local_model = LocalUpdate(args=args, dataset=train_dataset, u_id=rank, idxs=user_groups, logger=logger) u_step = 0 model = comm.recv( source=0, tag=rank) # global model is received from the parameter server print("Worker ", str(rank), " received global model") flattened_lengths = { param_tensor: model.state_dict()[param_tensor].numel() for param_tensor in model.state_dict() } shapes = { param_tensor: list(model.state_dict()[param_tensor].size()) for param_tensor in model.state_dict() } # dictionary of shapes of tensors
train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 5 val_loss_pre, counter = 0, 0 # tqdm进度条功能 progress bar for epoch in tqdm(range(args.epochs)): print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() # 设置成训练模式 idxs_users = range(args.num_users) for idx in idxs_users: print("Training at user %d/%d." % (idx + 1, args.num_users)) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, global_model = local_model.update_weights( model=global_model, global_round=epoch) # update global weights将下个模型要用的模型改成上一个模型的初始值 # global_model.load_state_dict(w) # loss_avg = sum(local_losses) / len(local_losses) # train_loss.append(loss_avg) # Calculate avg training accuracy over all users at every epoch list_acc, list_loss = [], [] global_model.eval() # for c in range(args.num_users): # local_model = LocalUpdate(args=args, dataset=train_dataset,
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys())[:100] nusers = len(user_list) cluster_models = [copy.deepcopy(global_model)] del global_model cluster_models[0].to(device) cluster_assignments = [ user_list.copy() ] # all users assigned to single cluster_model in beginning if args.cfl_wsharing: shaccumulator = Accumulator() if args.frac == -1: m = args.cpr if m > nusers: raise ValueError( f"Clients Per Round: {args.cpr} is greater than number of users: {nusers}" ) else: m = max(int(args.frac * nusers), 1) print(f"Training {m} users each round") print(f"Trying to split after every {args.cfl_split_every} rounds") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): # CFL if (epoch + 1) % args.cfl_split_every == 0: all_losses = [] new_cluster_models, new_cluster_assignments = [], [] for cidx, (cluster_model, assignments) in enumerate( tzip(cluster_models, cluster_assignments, desc="Try to split each cluster")): # First, train all models in cluster local_weights = [] for user in tqdm(assignments, desc="Train ALL users in the cluster", leave=False): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model), local_ep_override=args.cfl_local_epochs) local_weights.append(copy.deepcopy(w)) all_losses.append(loss) # record shared weights so far if args.cfl_wsharing: shaccumulator.add(local_weights) weight_updates = subtract_weights(local_weights, cluster_model.state_dict(), args) similarities = pairwise_cossim(weight_updates) max_norm = compute_max_update_norm(weight_updates) mean_norm = compute_mean_update_norm(weight_updates) # wandb.log({"mean_norm / eps1": mean_norm, "max_norm / eps2": max_norm}, commit=False) split = mean_norm < args.cfl_e1 and max_norm > args.cfl_e2 and len( assignments) > args.cfl_min_size print(f"CIDX: {cidx}[{len(assignments)}] elem") print( f"mean_norm: {(mean_norm):.4f}; max_norm: {(max_norm):.4f}" ) print(f"split? {split}") if split: c1, c2 = cluster_clients(similarities) assignments1 = [assignments[i] for i in c1] assignments2 = [assignments[i] for i in c2] new_cluster_assignments += [assignments1, assignments2] print( f"Cluster[{cidx}][{len(assignments)}] -> ({len(assignments1)}, {len(assignments2)})" ) local_weights1 = [local_weights[i] for i in c1] local_weights2 = [local_weights[i] for i in c2] cluster_model.load_state_dict( average_weights(local_weights1)) new_cluster_models.append(cluster_model) cluster_model2 = copy.deepcopy(cluster_model) cluster_model2.load_state_dict( average_weights(local_weights2)) new_cluster_models.append(cluster_model2) else: cluster_model.load_state_dict( average_weights(local_weights)) new_cluster_models.append(cluster_model) new_cluster_assignments.append(assignments) # Write everything cluster_models = new_cluster_models if args.cfl_wsharing: shaccumulator.write(cluster_models) shaccumulator.flush() cluster_assignments = new_cluster_assignments train_loss.append(sum(all_losses) / len(all_losses)) # Regular FedAvg else: all_losses = [] # Do FedAvg for each cluster for cluster_model, assignments in tzip( cluster_models, cluster_assignments, desc="Train each cluster through FedAvg"): if args.sample_dist == "uniform": sampled_users = random.sample(assignments, m) else: xs = np.linspace(-args.sigm_domain, args.sigm_domain, len(assignments)) sigmdist = 1 / (1 + np.exp(-xs)) sampled_users = np.random.choice(assignments, m, p=sigmdist / sigmdist.sum()) local_weights = [] for user in tqdm(sampled_users, desc="Training Selected Users", leave=False): local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model)) local_weights.append(copy.deepcopy(w)) all_losses.append(loss) # update global and shared weights if args.cfl_wsharing: shaccumulator.add(local_weights) new_cluster_weights = average_weights(local_weights) cluster_model.load_state_dict(new_cluster_weights) if args.cfl_wsharing: shaccumulator.write(cluster_models) shaccumulator.flush() train_loss.append(sum(all_losses) / len(all_losses)) # Calculate avg training accuracy over all users at every epoch # regardless if it was a CFL step or not test_acc, test_loss = [], [] for cluster_model, assignments in zip(cluster_models, cluster_assignments): for user in assignments: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=cluster_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]), "Clusters": len(cluster_models) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
val_loss_pre, counter = 0, 0 # testing accuracy for global model testing_accuracy = [0] for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) # Update local model idx w, loss = local_model.dp_sgd(model=copy.deepcopy(global_model), global_round=epoch, norm_bound=args.norm_bound, noise_scale=args.noise_scale) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # average local model weights, update global model global_weights = average_weights(local_weights) global_model.load_state_dict(global_weights) # test accuracy
cv_loss, cv_acc = [], [] print_every = 5 val_loss_pre, counter = 0, 0 # tqdm进度条功能 progress bar for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) # 从num_users个user中随机选取frac部分的用户用于训练 idxs_users = np.random.choice(range(args.num_users), m, replace=False) print("Users selected:", idxs_users) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, t_model = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights,这里的global_model只是取被选择的local_model的平均值 global_weights = average_weights(local_weights, args) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg)
def poisoned_1to7_NoDefense(seed=1): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates print("Evil norms:") for idx in idxs_users[0:args.nb_attackers]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.poisoned_1to7( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print(zeta) # Non-adversarial updates print("Good norms:") for idx in idxs_users[args.nb_attackers:]: print(idx) local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print(zeta) # average local updates average_del_w = average_weights(local_del_w) # Update global model: w_{t+1} = w_{t} + average_del_w for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w global_model.load_state_dict(global_weights) # test accuracy, backdoor accuracy test_acc, test_loss, back_acc = test_inference1to7( args, global_model, test_dataset) testing_accuracy.append(test_acc) backdoor_accuracy.append(back_acc) print("Test & Backdoor accuracy") print(testing_accuracy) print(backdoor_accuracy) # save accuracy np.savetxt( '../save/1to7Attack/TestAcc/NoDefense_{}_{}_seed{}.txt'.format( args.dataset, args.model, s), testing_accuracy) np.savetxt( '../save/1to7Attack/BackAcc/NoDefense_{}_{}_seed{}.txt'.format( args.dataset, args.model, s), backdoor_accuracy)
def train(args, global_model, raw_data_train, raw_data_test): start_time = time.time() user_list = list(raw_data_train[2].keys()) user_weights = [None for _ in range(len(user_list))] user_assignments = [i % args.clusters for i in range(len(user_list))] # global_model.to(device) # global_weights = global_model.state_dict() global_models = [copy.deepcopy(global_model) for _ in range(args.clusters)] for m in global_models: m.to(device) # if args.frac == -1: # m = args.cpr # if m > len(user_list): # raise ValueError(f"Clients Per Round: {args.cpr} is greater than number of users: {len(user_list)}") # else: # m = max(int(args.frac * len(user_list)), 1) # print(f"Training {m} users each round") train_loss, train_accuracy = [], [] for epoch in range(args.epochs): print(f"Global Training Round: {epoch + 1}/{args.epochs}") local_losses = [] for modelidx, cluster_model in tqdm(enumerate(global_models)): local_weights = [] for useridx, (user, user_assign) in enumerate( zip(user_list, user_assignments)): if user_assign == modelidx: local_model = LocalUpdate(args=args, raw_data=raw_data_train, user=user) w, loss = local_model.update_weights( copy.deepcopy(cluster_model)) local_weights.append(w) local_losses.append(loss) user_weights[useridx] = w if local_weights: cluster_model.load_state_dict(average_weights(local_weights)) train_loss.append(sum(local_losses) / len(local_losses)) # sampled_users = random.sample(user_list, m) # for user in tqdm(sampled_users): # FedSEM cluster reassignment step print(f"Calculating User Assignments") dists = np.zeros((len(user_list), len(global_models))) for cidx, cluster_model in enumerate(global_models): for ridx, user_weight in enumerate(user_weights): dists[ridx, cidx] = weight_dist(user_weight, cluster_model.state_dict()) user_assignments = list(np.argmin(dists, axis=1)) print("Cluster: number of clients in that cluster index") print(Counter(user_assignments)) print(f"") # Calculate avg training accuracy over all users at every epoch test_acc, test_loss = [], [] for modelidx, cluster_model in enumerate(global_models): local_weights = [] for user, user_assign in zip(user_list, user_assignments): if modelidx == user_assign: local_model = LocalUpdate(args=args, raw_data=raw_data_test, user=user) acc, loss = local_model.inference(model=cluster_model) test_acc.append(acc) test_loss.append(loss) train_accuracy.append(sum(test_acc) / len(test_acc)) wandb.log({ "Train Loss": train_loss[-1], "Test Accuracy": (100 * train_accuracy[-1]) }) print( f"Train Loss: {train_loss[-1]:.4f}\t Test Accuracy: {(100 * train_accuracy[-1]):.2f}%" ) print(f"Results after {args.epochs} global rounds of training:") print("Avg Train Accuracy: {:.2f}%".format(100 * train_accuracy[-1])) print(f"Total Run Time: {(time.time() - start_time):0.4f}")
np.save(f, np.array([100 * round(test_acc, 6)])) for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] # print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) count = 0 for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model)) # inverting the gradient if cheat[idx] == -1: for key in w: w[key] = 2 * global_model.state_dict()[key] - w[key] # weightening the contribution if args.weight != 0.: for key in w: w[key] = global_model.state_dict()[key] + ( w[key] - global_model.state_dict()[key]) * weight[epoch, idx]
""" model.train() tells your model that you are training the model. So effectively layers like dropout, batchnorm etc. which behave different on the train and test procedures know what is going on and hence can behave accordingly. More details: It sets the mode to train (see source code). You can call either model.eval() or model.train(mode=False) to tell that you are testing. It is somewhat intuitive to expect train function to train model but it does not do that. It just sets the mode. """ # ============== TRAIN ============== global_model.train() m = max(int(args.frac * args.num_users), 1) # C = args.frac. Setting number of clients m for training idxs_users = np.random.choice( range(args.num_users), m, replace=False ) # args.num_users=100 total clients. Choosing a random array of indices. Subset of clients. for idx in idxs_users: # For each client in the subset. local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( # update_weights() contain multiple prints model=copy.deepcopy(global_model), global_round=epoch) # w = local model weights local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # Averaging m local client weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) # Performance measure
def poisoned_pixel_CDP(norm_bound, noise_scale, nb_attackers, seed=1): start_time = time.time() # define paths path_project = os.path.abspath('..') logger = SummaryWriter('../logs') args = args_parser() exp_details(args) # set seed torch.manual_seed(seed) np.random.seed(seed) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load dataset and user groups train_dataset, test_dataset, user_groups = get_dataset(args) # BUILD MODEL if args.model == 'cnn': # Convolutional neural netork if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) else: exit('Error: unrecognized model') # Set the model to train and send it to device. global_model.to(device) global_model.train() print(global_model) # copy weights global_weights = global_model.state_dict() # load poisoned model backdoor_model = copy.deepcopy(global_model) backdoor_model.load_state_dict(torch.load('../save/poison_model.pth')) # testing accuracy for global model testing_accuracy = [0.1] backdoor_accuracy = [0.1] for epoch in tqdm(range(args.epochs)): local_del_w, local_norms = [], [] print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) # Adversary updates print("Evil") for idx in idxs_users[0:nb_attackers]: # backdoor model w = copy.deepcopy(backdoor_model) # compute change in parameters and norm zeta = 0 for del_w, w_old in zip(w.parameters(), global_model.parameters()): del_w.data = del_w.data - copy.deepcopy(w_old.data) zeta += del_w.norm(2).item()**2 zeta = zeta**(1. / 2) del_w = w.state_dict() print("EVIL") print(zeta) # add to global round local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) # Non-adversarial updates for idx in idxs_users[nb_attackers:]: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) del_w, zeta = local_model.update_weights( model=copy.deepcopy(global_model), change=1) local_del_w.append(copy.deepcopy(del_w)) local_norms.append(copy.deepcopy(zeta)) print("good") #print(zeta) # norm bound (e.g. median of norms) clip_factor = norm_bound #min(norm_bound, np.median(local_norms)) print(clip_factor) # clip updates for i in range(len(idxs_users)): for param in local_del_w[i].values(): print(max(1, local_norms[i] / clip_factor)) param /= max(1, local_norms[i] / clip_factor) # average local model updates average_del_w = average_weights(local_del_w) # Update model and add noise # w_{t+1} = w_{t} + avg(del_w1 + del_w2 + ... + del_wc) + Noise for param, param_del_w in zip(global_weights.values(), average_del_w.values()): param += param_del_w param += torch.randn( param.size()) * noise_scale * norm_bound / len(idxs_users) global_model.load_state_dict(global_weights) # test accuracy test_acc, test_loss, backdoor = test_backdoor_pixel( args, global_model, test_dataset) testing_accuracy.append(test_acc) backdoor_accuracy.append(backdoor) print("Testing & Backdoor accuracies") print(testing_accuracy) print(backdoor_accuracy) # save test accuracy np.savetxt( '../save/PixelAttack/TestAcc/iid_GDP_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), testing_accuracy) np.savetxt( '../save/PixelAttack/BackdoorAcc/iid_GDP_{}_{}_clip{}_scale{}_attackers{}_seed{}.txt' .format(args.dataset, args.model, norm_bound, noise_scale, nb_attackers, s), backdoor_accuracy)
def main_test(args): start_time = time.time() now = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S') # define paths logger = SummaryWriter('../logs') # easydict 사용하는 경우 주석처리 # args = args_parser() # checkpoint 생성위치 args.save_path = os.path.join(args.save_path, args.exp_folder) if not os.path.exists(args.save_path): os.makedirs(args.save_path) save_path_tmp = os.path.join(args.save_path, 'tmp_{}'.format(now)) if not os.path.exists(save_path_tmp): os.makedirs(save_path_tmp) SAVE_PATH = os.path.join(args.save_path, '{}_{}_T[{}]_C[{}]_iid[{}]_E[{}]_B[{}]'. format(args.dataset, args.model, args.epochs, args.frac, args.iid, args.local_ep, args.local_bs)) # 시드 고정 torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) # torch.cuda.set_device(0) device = torch.device("cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu") cpu_device = torch.device('cpu') # log 파일 생성 log_path = os.path.join('../logs', args.exp_folder) if not os.path.exists(log_path): os.makedirs(log_path) loggertxt = get_logger( os.path.join(log_path, '{}_{}_{}_{}.log'.format(args.model, args.optimizer, args.norm, now))) logging.info(args) # csv csv_save = '../csv/' + now csv_path = os.path.join(csv_save, 'accuracy.csv') csv_logger_keys = ['train_loss', 'accuracy'] csvlogger = CSVLogger(csv_path, csv_logger_keys) # load dataset and user groups train_dataset, test_dataset, client_loader_dict = get_dataset(args) # cifar-100의 경우 자동 설정 if args.dataset == 'cifar100': args.num_classes = 100 # BUILD MODEL if args.model == 'cnn': # Convolutional neural network if args.dataset == 'mnist': global_model = CNNMnist(args=args) elif args.dataset == 'fmnist': global_model = CNNFashion_Mnist(args=args) elif args.dataset == 'cifar': global_model = CNNCifar(args=args) elif args.dataset == 'cifar100': global_model = CNNCifar(args=args) elif args.model == 'mlp': # Multi-layer preceptron img_size = train_dataset[0][0].shape len_in = 1 for x in img_size: len_in *= x global_model = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes) elif args.model == 'cnn_vc': global_model = CNNCifar_fedVC(args=args) elif args.model == 'cnn_vcbn': global_model = CNNCifar_VCBN(args=args) elif args.model == 'cnn_vcgn': global_model = CNNCifar_VCGN(args=args) elif args.model == 'resnet18_ws': global_model = resnet18(num_classes=args.num_classes, weight_stand=1) elif args.model == 'resnet18': global_model = resnet18(num_classes=args.num_classes, weight_stand=0) elif args.model == 'resnet32': global_model = ResNet32_test(num_classes=args.num_classes) elif args.model == 'resnet18_mabn': global_model = resnet18_mabn(num_classes=args.num_classes) elif args.model == 'vgg': global_model = vgg11() elif args.model == 'cnn_ws': global_model = CNNCifar_WS(args=args) else: exit('Error: unrecognized model') # Set the model to train and send it to device. loggertxt.info(global_model) # fedBN처럼 gn no communication 용 client_models = [copy.deepcopy(global_model) for idx in range(args.num_users)] # copy weights global_weights = global_model.state_dict() global_model.to(device) global_model.train() # Training train_loss, train_accuracy = [], [] val_acc_list, net_list = [], [] # how does help BN 확인용 client_loss = [[] for i in range(args.num_users)] client_conv_grad = [[] for i in range(args.num_users)] client_fc_grad = [[] for i in range(args.num_users)] client_total_grad_norm = [[] for i in range(args.num_users)] # 전체 loss 추적용 -how does help BN # 재시작 if args.resume: checkpoint = torch.load(SAVE_PATH) global_model.load_state_dict(checkpoint['global_model']) if args.hold_normalize: for client_idx in range(args.num_users): client_models[client_idx].load_state_dict(checkpoint['model_{}'.format(client_idx)]) else: for client_idx in range(args.num_users): client_models[client_idx].load_state_dict(checkpoint['global_model']) resume_iter = int(checkpoint['a_iter']) + 1 print('Resume trainig form epoch {}'.format(resume_iter)) else: resume_iter = 0 # learning rate scheduler #scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, gamma=0.1,step_size=500) # start training for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] if args.verbose: print(f'\n | Global Training Round : {epoch + 1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: """ for key in global_model.state_dict().keys(): if args.hold_normalize: if 'bn' not in key: client_models[idx].state_dict()[key].data.copy_(global_model.state_dict()[key]) else: client_models[idx].state_dict()[key].data.copy_(global_model.state_dict()[key]) """ torch.cuda.empty_cache() local_model = LocalUpdate(args=args, logger=logger, train_loader=client_loader_dict[idx], device=device) w, loss, batch_loss, conv_grad, fc_grad, total_gard_norm = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch, idx_user=idx) local_weights.append(copy.deepcopy(w)) # client의 1 epoch에서의 평균 loss값 ex)0.35(즉, batch loss들의 평균) local_losses.append(copy.deepcopy(loss)) # 전체 round scheduler # scheduler.step() # loss graph용 -> client당 loss값 진행 저장 -> 모두 client별로 저장. client_loss[idx].append(batch_loss) client_conv_grad[idx].append(conv_grad) client_fc_grad[idx].append(fc_grad) client_total_grad_norm[idx].append(total_gard_norm) # print(total_gard_norm) # gn, bn 복사 # client_models[idx].load_state_dict(w) del local_model del w # update global weights global_weights = average_weights(local_weights, client_loader_dict, idxs_users) # update global weights # opt = OptRepo.name2cls('adam')(global_model.parameters(), lr=0.01, betas=(0.9, 0.99), eps=1e-3) opt = OptRepo.name2cls('sgd')(global_model.parameters(), lr=10, momentum=0.9) opt.zero_grad() opt_state = opt.state_dict() global_weights = aggregation(global_weights, global_model) global_model.load_state_dict(global_weights) opt = OptRepo.name2cls('sgd')(global_model.parameters(), lr=10, momentum=0.9) # opt = OptRepo.name2cls('adam')(global_model.parameters(), lr=0.01, betas=(0.9, 0.99), eps=1e-3) opt.load_state_dict(opt_state) opt.step() loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg) global_model.eval() # for c in range(args.num_users): # local_model = LocalUpdate(args=args, dataset=train_dataset, # idxs=user_groups[idx], logger=logger) # acc, loss = local_model.inference(model=global_model) # list_acc.append(acc) # list_loss.append(loss) # train_accuracy.append(sum(list_acc)/len(list_acc)) train_accuracy = test_inference(args, global_model, test_dataset, device=device) val_acc_list.append(train_accuracy) # print global training loss after every 'i' rounds # if (epoch+1) % print_every == 0: loggertxt.info(f' \nAvg Training Stats after {epoch + 1} global rounds:') loggertxt.info(f'Training Loss : {loss_avg}') loggertxt.info('Train Accuracy: {:.2f}% \n'.format(100 * train_accuracy)) csvlogger.write_row([loss_avg, 100 * train_accuracy]) if (epoch + 1) % 100 == 0: tmp_save_path = os.path.join(save_path_tmp, 'tmp_{}.pt'.format(epoch+1)) torch.save(global_model.state_dict(),tmp_save_path) # Test inference after completion of training test_acc = test_inference(args, global_model, test_dataset, device=device) print(' Saving checkpoints to {}...'.format(SAVE_PATH)) if args.hold_normalize: client_dict = {} for idx, model in enumerate(client_models): client_dict['model_{}'.format(idx)] = model.state_dict() torch.save(client_dict, SAVE_PATH) else: torch.save({'global_model': global_model.state_dict()}, SAVE_PATH) loggertxt.info(f' \n Results after {args.epochs} global rounds of training:') # loggertxt.info("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1])) loggertxt.info("|---- Test Accuracy: {:.2f}%".format(100 * test_acc)) # frac이 1이 아닐경우 잘 작동하지않음. # batch_loss_list = np.array(client_loss).sum(axis=0) / args.num_users # conv_grad_list = np.array(client_conv_grad).sum(axis=0) / args.num_users # fc_grad_list = np.array(client_fc_grad).sum(axis=0) / args.num_users # total_grad_list = np.array(client_total_grad_norm).sum(axis=0) /args.num_users # client의 avg를 구하고 싶었으나 현재는 client 0만 확인 # client마다 batch가 다를 경우 bug 예상 return train_loss, val_acc_list, client_loss[0], client_conv_grad[0], client_fc_grad[0], client_total_grad_norm[0]
val_acc_list, net_list = [], [] cv_loss, cv_acc = [], [] print_every = 2 val_loss_pre, counter = 0, 0 for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch) local_weights.append(copy.deepcopy(w)) local_losses.append(copy.deepcopy(loss)) # update global weights global_weights = average_weights(local_weights) # update global weights global_model.load_state_dict(global_weights) loss_avg = sum(local_losses) / len(local_losses) train_loss.append(loss_avg)
#how does help BN 확인용 client_loss = [[] for i in range(args.num_users)] client_conv_grad = [[] for i in range(args.num_users)] client_fc_grad = [[] for i in range(args.num_users)] for epoch in tqdm(range(args.epochs)): local_weights, local_losses = [], [] print(f'\n | Global Training Round : {epoch+1} |\n') global_model.train() m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger) w, loss, batch_loss, conv_grad, fc_grad = local_model.update_weights( model=copy.deepcopy(global_model), global_round=epoch, idx_user=idx) local_weights.append(copy.deepcopy(w)) # client의 1epoch에서의 평균 loss값 ex)0.35(즉, batch loss들의 평균) local_losses.append(copy.deepcopy(loss)) # loss graph용 -> client당 loss값 진행 저장 client_loss[idx].append(batch_loss) client_conv_grad[idx].append(conv_grad) client_fc_grad[idx].append(fc_grad) #loggergrad.info('user:{} , total_gradient_norm:{}'.format(idx, log_grad))