def create_client_server(): num_items = int(len(dataset_train) / args.num_users) clients, all_idxs = [], [i for i in range(len(dataset_train))] net_glob = CNNMnist(args=args).to(args.device) #平分训练数据,i.i.d. #初始化同一个参数的模型 for i in range(args.num_users): new_idxs = set(np.random.choice(all_idxs, num_items, replace=False)) all_idxs = list(set(all_idxs) - new_idxs) new_client = Client(args=args, dataset=dataset_train, idxs=new_idxs, w=copy.deepcopy(net_glob.state_dict())) clients.append(new_client) server = Server(args=args, w=copy.deepcopy(net_glob.state_dict())) return clients, server
class FL_client(): def __init__(self, args): if args.dataset == 'cifar': self.net = CNNCifar(args=args).to(args.device) else: self.net = CNNMnist(args=args).to(args.device) self.net.train() self.loss_func = nn.CrossEntropyLoss() self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr) self.args = args self.w_glob = [] # key exchange self.x = self.gx = 0 self.keys = defaultdict(int) def set_data(self, dataset, idxs): self.data = DataLoader(DatasetSplit(dataset, idxs), batch_size=self.args.local_bs, shuffle=True) def load_state(self, state_dict): self.net.load_state_dict(state_dict) def train(self): epoch_loss = [] for _ in range(self.args.local_ep): batch_loss = [] for _, (images, labels) in enumerate(self.data): images, labels = images.to(self.args.device), labels.to( self.args.device) pred = self.net(images) loss = self.loss_func(pred, labels) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_loss.append(loss.item()) epoch_loss.append(sum(batch_loss) / len(batch_loss)) return self.net.state_dict(), sum(epoch_loss) / len(epoch_loss)
len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob1.train() net_glob5.train() net_glob10.train() net_glob15.train() net_glob20.train() net_glob25.train() net_glob30.train() # copy weights w_glob = net_glob.state_dict() w_glob1 = net_glob1.state_dict() w_glob5 = net_glob5.state_dict() w_glob10 = net_glob10.state_dict() w_glob15 = net_glob15.state_dict() w_glob20 = net_glob20.state_dict() w_glob25 = net_glob25.state_dict() w_glob30 = net_glob30.state_dict() # training - NO ATTACK loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], []
dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob1.train() net_glob5.train() net_glob10.train() net_glob15.train() net_glob20.train() net_glob25.train() net_glob30.train() # copy weights w_glob = net_glob.state_dict() w_glob1 = net_glob1.state_dict() w_glob5 = net_glob5.state_dict() w_glob10 = net_glob10.state_dict() w_glob15 = net_glob15.state_dict() w_glob20 = net_glob20.state_dict() w_glob25 = net_glob25.state_dict() w_glob30 = net_glob30.state_dict() # training - NO ATTACK loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], []
len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob1.train() net_glob5.train() net_glob7.train() net_glob10.train() # copy weights w_glob = net_glob.state_dict() w_glob1 = net_glob1.state_dict() w_glob5 = net_glob5.state_dict() w_glob7 = net_glob7.state_dict() w_glob10 = net_glob10.state_dict() # training - NO ATTACK loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], [] #VIVEK constant attack experiment - 1 MALICIOUS loss_train_1 = [] fixed_agent_1 = random.randint(0,10) #random agent between 0 and 31 is fixed updates_recorded_1 = False
user_epoch=dict_userepoch[idx], diff_w_old = diff_w_old ) for i in list(w_ema.keys()): diff_w_old_dic.append(diff_w_ema[i]) epoch_comu.append( comu_w / (comu_w + comu_w_ema) ) if args.dataset == 'mnist': net_glob_new = CNNMnist(args=args).to(args.device) else: net_glob_new = CNNCifar(args=args).to(args.device) w_new = net_glob_new.state_dict() w_new.update(w_dic) w_new.update(w_ema_dic) w = copy.deepcopy(w_new) w_locals.append(copy.deepcopy(w)) w_ema_locals.append(copy.deepcopy(w_ema)) loss_locals.append(copy.deepcopy(loss)) loss_consistent_locals.append(copy.deepcopy(loss_consistent)) glob_comu.append(sum(epoch_comu)/len(epoch_comu)) diff_w_old = get_median(diff_w_old_dic, iter, args) w_glob = FedAvg(w_locals)
def main_worker(gpu, ngpus_per_node, args): print("gpu:", gpu) args.gpu = gpu if args.rank == 0: #(第一台服务器只有三台GPU,需要特殊处理) newrank = args.rank * ngpus_per_node + gpu else: newrank = args.rank * ngpus_per_node + gpu - 1 #初始化,使用tcp方式进行通信 print("begin init") dist.init_process_group(init_method=args.init_method, backend="nccl", world_size=args.world_size, rank=newrank) print("end init") #建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group group = [] for i in range(1, args.world_size): group.append(dist.new_group([0, i])) allgroup = dist.new_group([i for i in range(args.world_size)]) if newrank == 0: """ server""" print("使用{}号服务器的第{}块GPU作为server".format(args.rank, gpu)) #在模型训练期间,server只负责整合参数并分发,不参与任何计算 #设置cpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') net = CNNMnist().to(args.device) w_avg = copy.deepcopy(net.state_dict()) for j in range(args.epochs): if j == args.epochs - 1: for i in w_avg.keys(): temp = w_avg[i].to(args.device) w_avg[i] = average_gradients(temp, group, allgroup) else: for i in w_avg.keys(): temp = w_avg[i].to(args.device) average_gradients(temp, group, allgroup) torch.save(w_avg, 'w_wag') net.load_state_dict(w_avg) #加载测试数据 trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_test = datasets.MNIST('data/', train=False, download=True, transform=trans_mnist) test_set = torch.utils.data.DataLoader(dataset_test, batch_size=args.bs) test_accuracy, test_loss = test(net, test_set, args) print("Testing accuracy: {:.2f}".format(test_accuracy)) print("Testing loss: {:.2f}".format(test_loss)) else: """clents""" print("使用{}号服务器的第{}块GPU作为第{}个client".format(args.rank, gpu, newrank)) #设置gpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') print("begin train...") net = CNNMnist().to(args.device) print(net) data = torch.load("data/distributed/data_of_client{}".format(newrank)) bsz = 64 train_set = torch.utils.data.DataLoader(data, batch_size=bsz) optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.5) num_batches = ceil(len(train_set.dataset) / float(bsz)) start = time.time() for epoch in range(args.epochs): for iter in range(3): epoch_loss = 0.0 for data, target in train_set: data, target = data.to(args.device), target.to(args.device) data, target = Variable(data), Variable(target) optimizer.zero_grad() output = net(data) loss = F.nll_loss(output, target) epoch_loss += loss.item() loss.backward() optimizer.step() if iter == 3 - 1: print('Rank ', dist.get_rank(), ', epoch ', epoch, ': ', epoch_loss / num_batches) """federated learning""" w_avg = copy.deepcopy(net.state_dict()) for k in w_avg.keys(): print("k:", k) temp = average_gradients(w_avg[k].to(args.device), group, allgroup) w_avg[k] = temp net.load_state_dict(w_avg) end = time.time() print(" training time:{}".format((end - start))) train_accuracy, train_loss = test(net, train_set, args) print("Training accuracy: {:.2f}".format(train_accuracy)) print("Training loss: {:.2f}".format(train_loss))
# total_norm += param_norm.item() ** p # total_norm = total_norm ** (1. / p) # return total_norm def norm1(x, p): "First-pass implementation of p-norm." return (abs(x)**p).sum()**(1. / p) #print(norm1(net_glob.state_dict(),1)) #net_glob.load_state_dict() m = net_glob.train() net_glob.eval() #print(type(net_glob.state_dict())) #print(net_glob.state_dict().keys()) #data = list(net_glob.state_dict().items()) #an_array = np.array(data) #for layer in net_glob.ordered_layers: # norm_grad = layer.weight.grad.norm() # tone = f + ((norm_grad.numpy()) * 100.0) best_state = copy.deepcopy(net_glob.state_dict()) #print(net_glob.grad) print(pnorm(net_glob, 2)) #print(torch.norm(net_glob, dim=None, p=2)) #print(net_glob.state_dict().grad.norm(1)) #print(torch.nn.utils.clip_grad_norm(net_glob, 1, 1))
class Client(): def __init__(self, args, dataset=None, idxs=None, w=None, C=0.5, sigma=0.05): self.args = args self.loss_func = nn.CrossEntropyLoss() self.ldr_train = DataLoader(DatasetSplit(dataset, idxs), batch_size=self.args.local_bs, shuffle=True) self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w) self.C = C self.sigma = sigma if self.args.mode == 'Paillier': self.pub = pub self.priv = priv def train(self): w_old = copy.deepcopy(self.model.state_dict()) net = copy.deepcopy(self.model) net.train() #train and update optimizer = torch.optim.SGD(net.parameters(), lr=self.args.lr, momentum=self.args.momentum) for iter in range(self.args.local_ep): batch_loss = [] for batch_idx, (images, labels) in enumerate(self.ldr_train): images, labels = images.to(self.args.device), labels.to( self.args.device) net.zero_grad() log_probs = net(images) loss = self.loss_func(log_probs, labels) loss.backward() optimizer.step() batch_loss.append(loss.item()) w_new = net.state_dict() update_w = {} if self.args.mode == 'plain': for k in w_new.keys(): update_w[k] = w_new[k] - w_old[k] # 1. part one # DP mechanism elif self.args.mode == 'DP': for k in w_new.keys(): # calculate update_w update_w[k] = w_new[k] - w_old[k] # clip the update update_w[k] = update_w[k] / max( 1, torch.norm(update_w[k], 2) / self.C) # add noise ,cause update_w might reveal user's data also ,we should add noise before send to server update_w[k] += np.random.normal(0.0, self.sigma**2 * self.C**2) # 2. part two # Paillier enc elif self.args.mode == 'Paillier': print(len(w_new.keys())) for k in w_new.keys(): print("start ", k, flush=True) update_w[k] = w_new[k] - w_old[k] update_w_list = update_w[k].view(-1).cpu().tolist() for iter, w in enumerate(update_w_list): update_w_list[iter] = self.pub.encrypt(w) update_w[k] = update_w_list print("end ", flush=True) else: exit() return update_w, sum(batch_loss) / len(batch_loss) def update(self, w_glob): if self.args.mode == 'plain': self.model.load_state_dict(w_glob) elif self.args.mode == 'DP': self.model.load_state_dict(w_glob) elif self.args.mode == 'Paillier': w_glob_ciph = copy.deepcopy(w_glob) for k in w_glob_ciph.keys(): for iter, item in enumerate(w_glob_ciph[k]): w_glob_ciph[k][iter] = self.priv.decrypt(item) shape = list(self.model.state_dict()[k].size()) w_glob_ciph[k] = torch.FloatTensor(w_glob_ciph[k]).to( self.args.device).view(*shape) self.model.state_dict()[k] += w_glob_ciph[k] else: exit()
print(net_glob) net_glob.train() net_glob5.train() net_glob10.train() #STRUCTURE: KEY = ROUND, VAL = [training_loss, {agentId:flattended_updates}] malicious_structure5 = defaultdict() malicious_structure10 = defaultdict() #STRUCTURE: KEY = ROUND, VAL = [training_loss, {agentId: flattended_updates}] non_malicious_structure = defaultdict() non_malicious_structure5 = defaultdict() non_malicious_structure10 = defaultdict() # copy weights w_glob = net_glob.state_dict() w_glob5 = net_glob5.state_dict() w_glob10 = net_glob10.state_dict() # training - NO ATTACK loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], [] #VIVEK constant attack experiment - 5 MALICIOUS loss_train_5 = [] fixed_agent_5 = random.sample(range(32), 5) updates_recorded_mapping_5 = defaultdict(bool) for i in fixed_agent_5:
class Server(): def __init__(self, args, w): self.args = args self.clients_update_w = [] self.clients_loss = [] self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w) def FedAvg(self): # 1. part one # DP mechanism # cause we choose to add noise at client end,the fedavg should be the same as plain if self.args.mode == 'plain' or self.args.mode == 'DP': update_w_avg = copy.deepcopy(self.clients_update_w[0]) for k in update_w_avg.keys(): for i in range(1, len(self.clients_update_w)): update_w_avg[k] += self.clients_update_w[i][k] update_w_avg[k] = torch.div(update_w_avg[k], len(self.clients_update_w)) self.model.state_dict()[k] += update_w_avg[k] return copy.deepcopy(self.model.state_dict()), sum( self.clients_loss) / len(self.clients_loss) # 2. part two # Paillier add elif self.args.mode == 'Paillier': update_w_avg = copy.deepcopy(self.clients_update_w[0]) for k in update_w_avg.keys(): client_num = len(self.clients_update_w) for i in range(1, client_num): for iter in range(len(update_w_avg[k])): update_w_avg[k][iter] += self.clients_update_w[i][k][ iter] for iter in range(len(update_w_avg[k])): update_w_avg[k][iter] /= client_num return update_w_avg, sum(self.clients_loss) / len( self.clients_loss) else: exit() def test(self, datatest): self.model.eval() # testing test_loss = 0 correct = 0 data_loader = DataLoader(datatest, batch_size=self.args.bs) for idx, (data, target) in enumerate(data_loader): if self.args.gpu != -1: data, target = data.cuda(), target.cuda() log_probs = self.model(data) # sum up batch loss test_loss += F.cross_entropy(log_probs, target, reduction='sum').item() # get the index of the max log-probability y_pred = log_probs.data.max(1, keepdim=True)[1] correct += y_pred.eq( target.data.view_as(y_pred)).long().cpu().sum() test_loss /= len(data_loader.dataset) accuracy = 100.00 * correct / len(data_loader.dataset) return accuracy, test_loss