def __init__(self, args): if args.dataset == 'cifar': self.net = CNNCifar(args=args).to(args.device) else: self.net = CNNMnist(args=args).to(args.device) self.net.train() self.loss_func = nn.CrossEntropyLoss() self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr) self.args = args self.w_glob = [] # key exchange self.x = self.gx = 0 self.keys = defaultdict(int)
def __init__(self, args, dataset=None, idxs=None, w=None, C=0.5, sigma=0.05): self.args = args self.loss_func = nn.CrossEntropyLoss() self.ldr_train = DataLoader(DatasetSplit(dataset, idxs), batch_size=self.args.local_bs, shuffle=True) self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w) self.C = C self.sigma = sigma if self.args.mode == 'Paillier': self.pub = pub self.priv = priv
def create_client_server(): num_items = int(len(dataset_train) / args.num_users) clients, all_idxs = [], [i for i in range(len(dataset_train))] net_glob = CNNMnist(args=args).to(args.device) #平分训练数据,i.i.d. #初始化同一个参数的模型 for i in range(args.num_users): new_idxs = set(np.random.choice(all_idxs, num_items, replace=False)) all_idxs = list(set(all_idxs) - new_idxs) new_client = Client(args=args, dataset=dataset_train, idxs=new_idxs, w=copy.deepcopy(net_glob.state_dict())) clients.append(new_client) server = Server(args=args, w=copy.deepcopy(net_glob.state_dict())) return clients, server
def build_model(args): # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) elif args.model == 'LeNet' and args.dataset == 'traffic': net_glob = LeNet(args=args).to(args.device) else: exit('Error: unrecognized model') return net_glob
def get_model(args): if args.model == 'cnn' and args.dataset in ['cifar10', 'cifar100']: net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) elif args.model == 'mlp' and args.dataset == 'mnist': net_glob = MLP(dim_in=784, dim_hidden=256, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) return net_glob
def build_model(): # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) # elif args.model == 'mlp': # len_in = 1 # for x in img_size: # len_in *= x # net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') return net_glob
class FL_client(): def __init__(self, args): if args.dataset == 'cifar': self.net = CNNCifar(args=args).to(args.device) else: self.net = CNNMnist(args=args).to(args.device) self.net.train() self.loss_func = nn.CrossEntropyLoss() self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr) self.args = args self.w_glob = [] # key exchange self.x = self.gx = 0 self.keys = defaultdict(int) def set_data(self, dataset, idxs): self.data = DataLoader(DatasetSplit(dataset, idxs), batch_size=self.args.local_bs, shuffle=True) def load_state(self, state_dict): self.net.load_state_dict(state_dict) def train(self): epoch_loss = [] for _ in range(self.args.local_ep): batch_loss = [] for _, (images, labels) in enumerate(self.data): images, labels = images.to(self.args.device), labels.to( self.args.device) pred = self.net(images) loss = self.loss_func(pred, labels) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_loss.append(loss.item()) epoch_loss.append(sum(batch_loss) / len(batch_loss)) return self.net.state_dict(), sum(epoch_loss) / len(epoch_loss)
dataset_valid = dataset_test if args.iid == 'noniid_ssl' and args.dataset == 'cifar': dict_users, dict_users_labeled, pseudo_label = noniid_ssl(dataset_train, args.num_users, args.label_rate) else: dict_users, dict_users_labeled, pseudo_label = sample(dataset_train, args.num_users, args.label_rate, args.iid) if args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_ema_glob = CNNCifar(args=args).to(args.device) elif args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_ema_glob = CNNMnist(args=args).to(args.device) elif args.dataset == 'svhn': net_glob = CNNCifar(args=args).to(args.device) net_ema_glob = CNNCifar(args=args).to(args.device) else: exit('Error: unrecognized model') net_glob.train() net_ema_glob.train() # copy weights w_glob = net_glob.state_dict() w_ema_glob = net_ema_glob.state_dict()
def modelBuild(): """ Build the basic training network and return the related args. """ # build model args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) # sample users if args.iid: # allocate the dataset index to users dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users = mnist_noniid(dataset_train, args.num_users) elif args.dataset == 'cifar': trans_cifar = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') print("The para of iid is " + str(args.iid)) img_size = dataset_train[0][0].shape if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print("********************************") print(net_glob) print("********************************") return net_glob, args, dataset_train, dataset_test, dict_users
test_set = datasets.CIFAR10('./data/cifar', train=False, download=False, transform=transform) else: exit('Error: unrecognized dataset...') # split dataset {user_id: [list of data index]} dict_users_train, ratio = noniid_train(train_set, args.num_users) dict_users_test = noniid_test(test_set, args.num_users, ratio) print('Data finished...') # load global model net_glob = CNNCifar( args=args).to(args.device) if args.dataset == 'cifar' else CNNMnist( args=args).to(args.device) net_glob.train() # parameters w_glob = net_glob.state_dict() loss_train = [] # meta-learning for global initial parameters for epoch in range(args.meta_epochs): loss_locals = [] w_locals = [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) for idx in idxs_users: client = Client(args=args,
def main_worker(gpu, ngpus_per_node, args): print("gpu:", gpu) args.gpu = gpu if args.rank == 0: #(第一台服务器只有三台GPU,需要特殊处理) newrank = args.rank * ngpus_per_node + gpu else: newrank = args.rank * ngpus_per_node + gpu - 1 #初始化,使用tcp方式进行通信 print("begin init") dist.init_process_group(init_method=args.init_method, backend="nccl", world_size=args.world_size, rank=newrank) print("end init") #建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group group = [] for i in range(1, args.world_size): group.append(dist.new_group([0, i])) allgroup = dist.new_group([i for i in range(args.world_size)]) if newrank == 0: """ server""" print("使用{}号服务器的第{}块GPU作为server".format(args.rank, gpu)) #在模型训练期间,server只负责整合参数并分发,不参与任何计算 #设置cpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') net = CNNMnist().to(args.device) w_avg = copy.deepcopy(net.state_dict()) for j in range(args.epochs): if j == args.epochs - 1: for i in w_avg.keys(): temp = w_avg[i].to(args.device) w_avg[i] = average_gradients(temp, group, allgroup) else: for i in w_avg.keys(): temp = w_avg[i].to(args.device) average_gradients(temp, group, allgroup) torch.save(w_avg, 'w_wag') net.load_state_dict(w_avg) #加载测试数据 trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_test = datasets.MNIST('data/', train=False, download=True, transform=trans_mnist) test_set = torch.utils.data.DataLoader(dataset_test, batch_size=args.bs) test_accuracy, test_loss = test(net, test_set, args) print("Testing accuracy: {:.2f}".format(test_accuracy)) print("Testing loss: {:.2f}".format(test_loss)) else: """clents""" print("使用{}号服务器的第{}块GPU作为第{}个client".format(args.rank, gpu, newrank)) #设置gpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') print("begin train...") net = CNNMnist().to(args.device) print(net) data = torch.load("data/distributed/data_of_client{}".format(newrank)) bsz = 64 train_set = torch.utils.data.DataLoader(data, batch_size=bsz) optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.5) num_batches = ceil(len(train_set.dataset) / float(bsz)) start = time.time() for epoch in range(args.epochs): for iter in range(3): epoch_loss = 0.0 for data, target in train_set: data, target = data.to(args.device), target.to(args.device) data, target = Variable(data), Variable(target) optimizer.zero_grad() output = net(data) loss = F.nll_loss(output, target) epoch_loss += loss.item() loss.backward() optimizer.step() if iter == 3 - 1: print('Rank ', dist.get_rank(), ', epoch ', epoch, ': ', epoch_loss / num_batches) """federated learning""" w_avg = copy.deepcopy(net.state_dict()) for k in w_avg.keys(): print("k:", k) temp = average_gradients(w_avg[k].to(args.device), group, allgroup) w_avg[k] = temp net.load_state_dict(w_avg) end = time.time() print(" training time:{}".format((end - start))) train_accuracy, train_loss = test(net, train_set, args) print("Training accuracy: {:.2f}".format(train_accuracy)) print("Training loss: {:.2f}".format(train_loss))
train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob5 = CNNMnist(args=args).to(args.device) net_glob10 = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob5.train() net_glob10.train()
if args.iid == 'noniid_ssl' and args.dataset == 'cifar': dict_users, dict_users_labeled, pseudo_label = noniid_ssl( dataset_train_weak, args.num_users, args.label_rate) else: dict_users, dict_users_labeled, pseudo_label = sample( dataset_train_weak, args.num_users, args.label_rate, args.iid) if args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_glob_helper_1 = CNNCifar(args=args).to(args.device) net_glob_helper_2 = CNNCifar(args=args).to(args.device) net_glob_valid = CNNCifar(args=args).to(args.device) elif args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob_helper_1 = CNNMnist(args=args).to(args.device) net_glob_helper_2 = CNNMnist(args=args).to(args.device) net_glob_valid = CNNMnist(args=args).to(args.device) elif args.dataset == 'svhn': net_glob = CNNCifar(args=args).to(args.device) net_glob_helper_1 = CNNCifar(args=args).to(args.device) net_glob_helper_2 = CNNCifar(args=args).to(args.device) net_glob_valid = CNNCifar(args=args).to(args.device) else: exit('Error: unrecognized model') print("\n Begin Train") net_glob.train()
trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset_train = datasets.CIFAR10('data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob1 = CNNMnist(args=args).to(args.device) net_glob5 = CNNMnist(args=args).to(args.device) net_glob7 = CNNMnist(args=args).to(args.device) net_glob10 = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob1.train() net_glob5.train()
###ANALYZING END ### if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob1 = CNNMnist(args=args).to(args.device) net_glob5 = CNNMnist(args=args).to(args.device) net_glob10 = CNNMnist(args=args).to(args.device) net_glob15 = CNNMnist(args=args).to(args.device) net_glob20 = CNNMnist(args=args).to(args.device) net_glob25 = CNNMnist(args=args).to(args.device) net_glob30 = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model')
train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() # copy weights w_glob = net_glob.state_dict() # training - NO ATTACK
if (key_item_1[0] == key_item_2[0]): print('Mismtach found at', key_item_1[0]) else: raise Exception if models_differ == 0: print('Models match perfectly! :)') args = args_parser() args.gpu = -1 args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') #trans_fmnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) #dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist) #dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users) net_glob = CNNMnist(args=args).to(args.device) #print(net_glob) m = net_glob m.train() for p1, p2 in zip(m.parameters(), net_glob.parameters()): if p1.data.ne(p2.data).sum() > 0: print(False) print(True) #local_mainFL = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[5]) #w_mainFL, loss_mainFL=local_mainFL.train(net=copy.deepcopy(net_glob_mainFL).to(args.device)) #compare_models(net_glob, w_mainFL)
download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users, args.seed) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_local = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_local = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_local = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') #Let's train the model few epochs first n_epochs = 3 batch_size_train = 64 batch_size_test = 1000 learning_rate = 0.01
def main_worker(gpu, ngpus_per_node, args): print("gpu:", gpu) args.gpu = gpu if args.rank == 0: # (第一台服务器只有三台GPU,需要特殊处理) newrank = args.rank * ngpus_per_node + gpu else: newrank = args.rank * ngpus_per_node + gpu-1 # 初始化,使用tcp方式进行通信 dist.init_process_group(init_method=args.init_method, backend="nccl", world_size=args.world_size, rank=newrank) # 建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group group = [] for i in range(1, args.world_size): group.append(dist.new_group([0, i])) allgroup = dist.new_group([i for i in range(args.world_size)]) if newrank == 0: """ server""" print("{}号服务器的第{}块GPU作为server".format(args.rank, gpu)) # 在模型训练期间,server只负责整合参数并分发,不参与任何计算 # 设置cpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # 加载测试数据 trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_test = datasets.MNIST('data/', train=False, download=True, transform=trans_mnist) test_set = torch.utils.data.DataLoader(dataset_test, batch_size=args.bs) """calculate influence function""" model = CNNMnist().to(args.device) model.load_state_dict(torch.load('w_wag')) test_id = 0 # 选择的test数据id data, target = test_set.dataset[test_id] data = test_set.collate_fn([data]) target = test_set.collate_fn([target]) print("begin grad") grad_test = grad_z(data, target, model, gpu, create_graph=False) # grad_test print("end grad") v = grad_test """server与client交互计算s_test(采用rka算法)""" #计算模型总参数 num_parameters=0 for i in list(model.parameters()): # 首先求出每个tensor中所含参数的个数 temp = 1 for j in i.size(): temp *= j num_parameters+=temp # 向client发送grad_test for i in range(args.world_size - 1): print("send grad_test to client:", i+1) for j in v: temp = j dist.broadcast(src=0, tensor=temp, group=group[i]) for k in range(args.num_sample_rka): #向client发送采样id id=torch.tensor(random.randint(0,num_parameters-1)).to(args.device) for i in range(args.world_size-1): dist.broadcast(src=0,tensor=id,group=group[i]) # 从server接收二阶导 sec_grad = [] second_grad = [torch.zeros(list(model.parameters())[i].size()).to(args.device) for i in range(len(list(model.parameters())))] for i in range(args.world_size - 1): temp = copy.deepcopy(second_grad) for j in temp: dist.broadcast(src=i + 1, tensor=j, group=group[i]) sec_grad.append(temp) # 整合二阶导,然后分发给client e_second_grad = sec_grad[0] for i in range(1, args.world_size - 1): e_second_grad = [i + j for i, j in six.moves.zip(e_second_grad, sec_grad[i])] e_second_grad = [i / (args.world_size - 1) for i in e_second_grad] for j in e_second_grad: temp = j dist.broadcast(src=0, tensor=temp, group=allgroup) """交互结束""" # 从client接收influence print("rec influence") allinfluence = [] influence = torch.tensor([i for i in range(4285)], dtype=torch.float32) influence = influence.to(args.device) for i in range(args.world_size - 1): dist.broadcast(src=i + 1, tensor=influence, group=group[i]) temp = copy.deepcopy(influence) allinfluence.append(temp) torch.save(allinfluence, 'influence/influence') else: """clents""" print("{}号服务器的第{}号GPU作为第{}个client".format(args.rank, gpu, newrank)) # 设置gpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # 加载训练数据 data = torch.load("data/distributedData/data_of_client{}".format(newrank)) bsz = 64 train_set = torch.utils.data.DataLoader(data, batch_size=bsz) model = CNNMnist().to(args.device) model.load_state_dict(torch.load('w_wag')) # 加载模型 data, target = train_set.dataset[0] data = train_set.collate_fn([data]) target = train_set.collate_fn([target]) grad_v = grad_z(data, target, model, gpu=gpu,create_graph=False) grad_test = copy.deepcopy(grad_v) """calculate influence function""" """ 和server交互计算s_test,可以循环迭代(采用rka算法)""" # 从server接收grad_test for i in grad_test: dist.broadcast(src=0, tensor=i, group=group[newrank - 1]) stest = copy.deepcopy(grad_test) for k in range(args.num_sample_rka): #从server接收采样id,计算二阶导 id=torch.tensor([0]).to(args.device).to(args.device) dist.broadcast(src=0,tensor=id,group=group[newrank - 1]) idt= id.item() second_grad=hessian(model,train_set,idt,gpu=args.gpu) #向server发送二阶导 for i in second_grad: temp = i dist.broadcast(src=newrank, tensor=temp, group=group[newrank - 1]) # 从server接收最终的二阶导 for i in second_grad: temp = i dist.broadcast(src=0, tensor=temp, group=allgroup) # 使用rka算法计算stest stest = rka(stest, second_grad, grad_test) s_test_fin = stest """"s_test计算结束,得到最终的s_test_fin,开始计算influence""" print("client:", newrank, "calculate influence") n = len(train_set.dataset) influence = np.array([i for i in range(n)], dtype='float32') for i in utility.create_progressbar(len(train_set.dataset), desc='influence', start=0): # 计算grad data, target = train_set.dataset[i] data = train_set.collate_fn([data]) target = train_set.collate_fn([target]) grad_z_vec = grad_z(data, target, model, gpu=gpu) # 计算influence inf_tmp = -sum( [torch.sum(k * j).data.cpu().numpy() for k, j in six.moves.zip(grad_z_vec, s_test_fin)]) / n influence[i] = inf_tmp influence = torch.tensor(influence).to(args.device) # 向服务器发送influence print("client:", newrank, "send influence to server") dist.broadcast(src=newrank, tensor=influence, group=group[newrank - 1]) print("client:", newrank, "end send influence to server")
class Client(): def __init__(self, args, dataset=None, idxs=None, w=None, C=0.5, sigma=0.05): self.args = args self.loss_func = nn.CrossEntropyLoss() self.ldr_train = DataLoader(DatasetSplit(dataset, idxs), batch_size=self.args.local_bs, shuffle=True) self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w) self.C = C self.sigma = sigma if self.args.mode == 'Paillier': self.pub = pub self.priv = priv def train(self): w_old = copy.deepcopy(self.model.state_dict()) net = copy.deepcopy(self.model) net.train() #train and update optimizer = torch.optim.SGD(net.parameters(), lr=self.args.lr, momentum=self.args.momentum) for iter in range(self.args.local_ep): batch_loss = [] for batch_idx, (images, labels) in enumerate(self.ldr_train): images, labels = images.to(self.args.device), labels.to( self.args.device) net.zero_grad() log_probs = net(images) loss = self.loss_func(log_probs, labels) loss.backward() optimizer.step() batch_loss.append(loss.item()) w_new = net.state_dict() update_w = {} if self.args.mode == 'plain': for k in w_new.keys(): update_w[k] = w_new[k] - w_old[k] # 1. part one # DP mechanism elif self.args.mode == 'DP': for k in w_new.keys(): # calculate update_w update_w[k] = w_new[k] - w_old[k] # clip the update update_w[k] = update_w[k] / max( 1, torch.norm(update_w[k], 2) / self.C) # add noise ,cause update_w might reveal user's data also ,we should add noise before send to server update_w[k] += np.random.normal(0.0, self.sigma**2 * self.C**2) # 2. part two # Paillier enc elif self.args.mode == 'Paillier': print(len(w_new.keys())) for k in w_new.keys(): print("start ", k, flush=True) update_w[k] = w_new[k] - w_old[k] update_w_list = update_w[k].view(-1).cpu().tolist() for iter, w in enumerate(update_w_list): update_w_list[iter] = self.pub.encrypt(w) update_w[k] = update_w_list print("end ", flush=True) else: exit() return update_w, sum(batch_loss) / len(batch_loss) def update(self, w_glob): if self.args.mode == 'plain': self.model.load_state_dict(w_glob) elif self.args.mode == 'DP': self.model.load_state_dict(w_glob) elif self.args.mode == 'Paillier': w_glob_ciph = copy.deepcopy(w_glob) for k in w_glob_ciph.keys(): for iter, item in enumerate(w_glob_ciph[k]): w_glob_ciph[k][iter] = self.priv.decrypt(item) shape = list(self.model.state_dict()[k].size()) w_glob_ciph[k] = torch.FloatTensor(w_glob_ciph[k]).to( self.args.device).view(*shape) self.model.state_dict()[k] += w_glob_ciph[k] else: exit()
def main(): manualSeed = 1 np.random.seed(manualSeed) random.seed(manualSeed) torch.manual_seed(manualSeed) # if you are suing GPU torch.cuda.manual_seed(manualSeed) torch.cuda.manual_seed_all(manualSeed) torch.backends.cudnn.enabled = False torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True # parse args args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) # sample users if args.iid: dict_users_DCFL = mnist_iid(dataset_train, args.num_users) else: dict_users_DCFL, dict_labels_counter = mnist_noniid( dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter elif args.dataset == 'cifar': trans_cifar = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users_DCFL = cifar_iid(dataset_train, args.num_users) dict_users_mainFL = dict_users_DCFL dict_labels_counter_mainFL = dict() dict_labels_counter = dict() else: dict_users_DCFL, dict_labels_counter = cifar_noniid( dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter elif args.dataset == 'fmnist': trans_fmnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist) dataset_test = datasets.FashionMNIST('../data/fmnist', train=False, download=True, transform=trans_fmnist) if args.iid: print("iid") dict_users_DCFL = mnist_iid(dataset_train, args.num_users) else: print("non iid") dict_users_DCFL, dict_labels_counter = mnist_noniid( dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # Small shared dataset test_ds, valid_ds_before = torch.utils.data.random_split( dataset_test, (9500, 500)) small_shared_dataset = create_shared_dataset(valid_ds_before, 200) optimal_delay = 1.0 # Start process for each fraction of c for c_counter in range(3, 3 + 1, 2): if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) # net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) # net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'fmnist': net_glob = CNNFashion_Mnist(args=args).to(args.device) # net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') # Saving data data_Global_main = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_MainFL = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } data_Global_DCFL = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_DCFL = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } data_Global_G1 = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_G1 = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } data_Global_G2 = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_G2 = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } data_Global_Muhammed = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_Muhammed = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } data_Global_Cho = { "C": [], "Round": [], "Average Loss Train": [], "SDS Loss": [], "SDS Accuracy": [], "Workers Number": [], "Large Test Loss": [], "Large Test Accuracy": [], "Communication Cost": [] } Final_LargeDataSetTest_Cho = { "C": [], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy": [], "Total Rounds": [], "Communication Cost": [] } net_glob.train() net_glob_mainFL = copy.deepcopy(net_glob) net_glob_G1 = copy.deepcopy(net_glob) net_glob_G2 = copy.deepcopy(net_glob) cost = np.random.rand(args.num_users) R_G1 = 5 args.frac = (c_counter / 10) # Main FL loss_main, dict_workers_index, Final_LargeDataSetTest_MainFL_temp, data_Global_main_temp = mainFl( net_glob_mainFL, dict_users_mainFL, dict_labels_counter_mainFL, args, cost, dataset_train, dataset_test, small_shared_dataset) Final_LargeDataSetTest_MainFL = merge( Final_LargeDataSetTest_MainFL, Final_LargeDataSetTest_MainFL_temp) data_Global_main = merge(data_Global_main, data_Global_main_temp) # with open(os.path.join(OUT_DIR, f"dict_users_mainFL-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file: # pickle.dump(dict_users_mainFL, file) # with open(os.path.join(OUT_DIR, f"dict_users_mainFL-C-{args.frac}-{args.dataset}.pkl"), 'rb') as file: # dict_users_mainFL = pickle.load(file) # with open(os.path.join(OUT_DIR, f"workers_index-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file: # pickle.dump(dict_workers_index, file) # with open(os.path.join(OUT_DIR, f"cost-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file: # pickle.dump(cost, file) # with open(os.path.join(OUT_DIR, f"cost-C-{args.frac}-{args.dataset}.pkl"), 'rb') as file: # cost = pickle.load(file) # print(cost) # with open(os.path.join(OUT_DIR, f"GoalLoss-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file: # pickle.dump(loss_main, file) date = datetime.now() _dir = os.path.join(OUT_DIR, str(date.date())) if not os.path.exists(_dir): os.makedirs(_dir) save_time = time.strftime("%Y%m%d-%H%M%S") Final_LargeDataSetTest_MainFL = pd.DataFrame.from_dict( Final_LargeDataSetTest_MainFL) data_Global_main = pd.DataFrame.from_dict(data_Global_main) Final_LargeDataSetTest_MainFL.to_csv( os.path.join( _dir, f"{save_time}-{args.dataset}-Final_LargeDataSetTest_MainFL.csv" )) data_Global_main.to_csv( os.path.join(_dir, f"{save_time}-{args.dataset}-data_Global_main.csv")) # Proposed G1 Final_LargeDataSetTest_G1_temp, data_Global_G1_temp = Proposed_G1( net_glob_G1, dict_workers_index, dict_users_DCFL, dict_labels_counter_mainFL, args, cost, dataset_train, dataset_test, small_shared_dataset, loss_main, R_G1, optimal_delay) Final_LargeDataSetTest_G1 = merge(Final_LargeDataSetTest_G1, Final_LargeDataSetTest_G1_temp) data_Global_G1 = merge(data_Global_G1, data_Global_G1_temp) Final_LargeDataSetTest_G1 = pd.DataFrame.from_dict( Final_LargeDataSetTest_G1) data_Global_G1 = pd.DataFrame.from_dict(data_Global_G1) Final_LargeDataSetTest_G1.to_csv( os.path.join( _dir, f"{save_time}-{args.dataset}-Final_LargeDataSetTest_G1.csv")) data_Global_G1.to_csv( os.path.join(_dir, f"{save_time}-{args.dataset}-data_Global_G1.csv")) print("G1 alg is done")
def main(): # parse args args = args_parser() args.device = torch.device('cuda:{}'.format( args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) print("type of test dataset", type(dataset_test)) # sample users if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid( dataset_train, args.num_users) dict_users_2, dict_labels_counter_2 = dict_users, dict_labels_counter #dict_users, dict_labels_counter = mnist_noniid_unequal(dataset_train, args.num_users) elif args.dataset == 'cifar': trans_cifar = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: exit('Error: only consider IID setting in CIFAR10') else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_glob_2 = CNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob_2 = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') #print(net_glob) #net_glob.train() acc_test, loss_test = test_img(net_glob, dataset_test, args) print("val test finished") print("{:.2f}".format(acc_test)) temp = net_glob #net_glob_2 = net_glob temp_2 = net_glob_2 # copy weights w_glob = net_glob.state_dict() # training loss_train = [] cv_loss, cv_acc = [], [] val_loss_pre, counter = 0, 0 net_best = None best_loss = None val_acc_list, net_list = [], [] Loss_local_each_global_total = [] test_ds, valid_ds = torch.utils.data.random_split(dataset_test, (9500, 500)) loss_workers_total = np.zeros(shape=(args.num_users, args.epochs)) label_workers = { i: np.array([], dtype='int64') for i in range(args.num_users) } workers_percent = [] workers_count = 0 acc_test_global, loss_test_global = test_img(x, valid_ds, args) selected_users_index = [] for idx in range(args.num_users): # print("train started") local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) # print(w) # print("train completed") # temp = FedAvg(w) temp.load_state_dict(w) temp.eval() acc_test_local, loss_test_local = test_img(temp, valid_ds, args) loss_workers_total[idx, iter] = acc_test_local if workers_count >= (args.num_users / 2): break elif acc_test_local >= (0.7 * acc_test_global): selected_users_index.append(idx) for iter in range(args.epochs): print("round started") Loss_local_each_global = [] loss_workers = np.zeros((args.num_users, args.epochs)) w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) #idxs_users = np.random.choice(range(args.num_users), m, replace=False) #if iter % 5 == 0: # Minoo x = net_glob x.eval() Loss_local_each_global_total.append(acc_test_global) for idx in selected_users_index: #print("train started") local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) #print(w) #print("train completed") #temp = FedAvg(w) temp.load_state_dict(w) temp.eval() acc_test_local, loss_test_local = test_img(temp, valid_ds, args) loss_workers_total[idx, iter] = acc_test_local if workers_count >= (args.num_users / 2): break elif acc_test_local >= (0.7 * acc_test_global): w_locals.append(copy.deepcopy(w)) loss_locals.append(copy.deepcopy(loss)) print("Update Received") workers_count += 1 # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) print("round completed") loss_avg = sum(loss_locals) / len(loss_locals) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) workers_percent.append(workers_count) # plot loss curve plt.figure() plt.plot(range(len(workers_percent)), workers_percent) plt.ylabel('train_loss') plt.savefig( './save/Newfed_WorkersPercent_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # print(loss_workers_total) # plot loss curve # plt.figure() # plt.plot(range(len(loss_train)), loss_train) # plt.ylabel('train_loss') # plt.savefig('./save/Newfed_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() for i in range(args.num_users): plot = plt.plot(range(len(loss_workers_total[i, :])), loss_workers_total[i, :], label="Worker {}".format(i)) plot5 = plt.plot(range(len(Loss_local_each_global_total)), Loss_local_each_global_total, color='000000', label="Global") plt.legend(loc='best') plt.ylabel('Small Test Set Accuracy of workers') plt.xlabel('Number of Rounds') plt.savefig( './save/NewFed_2workers_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() # bins = np.linspace(0, 9, 3) # a = dict_labels_counter[:, 0].ravel() # print(type(a)) # b = dict_labels_counter[:, 1].ravel() # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9'] # # Set plot parameters # fig, ax = plt.subplots() # width = 0.1 # width of bar # x = np.arange(10) # ax.bar(x, dict_labels_counter[:, 0], width, color='#000080', label='Worker 1') # ax.bar(x + width, dict_labels_counter[:, 1], width, color='#73C2FB', label='Worker 2') # ax.bar(x + 2*width, dict_labels_counter[:, 2], width, color='#ff0000', label='Worker 3') # ax.bar(x + 3*width, dict_labels_counter[:, 3], width, color='#32CD32', label='Worker 4') # ax.set_ylabel('Number of Labels') # ax.set_xticks(x + width + width / 2) # ax.set_xticklabels(x_labels) # ax.set_xlabel('Labels') # ax.legend() # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) # fig.tight_layout() # plt.savefig( # './save/Newfed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, # args.iid)) # testing print("testing started") net_glob.eval() print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) print("val test finished") #print("Training accuracy: {:.2f}".format(acc_train)) #print("Testing accuracy: {:.2f}".format(acc_test)) print("{:.2f}".format(acc_test_final)) #print("{:.2f".format(Loss_local_each_worker)) # training w_glob_2 = net_glob_2.state_dict() loss_train_2 = [] cv_loss_2, cv_acc_2 = [], [] val_loss_pre_2, counter_2 = 0, 0 net_best_2 = None best_loss_2 = None val_acc_list_2, net_list_2 = [], [] Loss_local_each_global_total_2 = [] loss_workers_total_2 = np.zeros(shape=(args.num_users, args.epochs)) label_workers_2 = { i: np.array([], dtype='int64') for i in range(args.num_users) } for iter in range(args.epochs): print("round started") Loss_local_each_global_2 = [] loss_workers_2 = np.zeros((args.num_users, args.epochs)) w_locals_2, loss_locals_2 = [], [] m_2 = max(int(args.frac * args.num_users), 1) idxs_users_2 = np.random.choice(range(args.num_users), m_2, replace=False) # Minoo x_2 = net_glob_2 x_2.eval() acc_test_global_2, loss_test_global_2 = test_img(x_2, valid_ds, args) Loss_local_each_global_total_2.append(acc_test_global_2) for idx in idxs_users_2: #print("train started") local_2 = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_2[idx]) w_2, loss_2 = local_2.train( net=copy.deepcopy(net_glob_2).to(args.device)) #print(w) #print("train completed") w_locals_2.append(copy.deepcopy(w_2)) loss_locals_2.append(copy.deepcopy(loss_2)) #temp = FedAvg(w) temp_2.load_state_dict(w_2) temp_2.eval() acc_test_local_2, loss_test_local_2 = test_img( temp_2, valid_ds, args) loss_workers_total_2[idx, iter] = acc_test_local_2 # update global weights w_glob_2 = FedAvg(w_locals_2) # copy weight to net_glob net_glob_2.load_state_dict(w_glob_2) print("round completed") loss_avg_2 = sum(loss_locals_2) / len(loss_locals_2) print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_2)) loss_train_2.append(loss_avg_2) print("round completed") # plot loss curve plt.figure() plt.plot(range(len(loss_train_2)), loss_train_2, color='#000000', label="Main FL") plt.plot(range(len(loss_train)), loss_train, color='#ff0000', label="Centralized Algorithm") plt.ylabel('train_loss') plt.savefig('./save/main_fed_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # print(loss_workers_total) plt.figure() for i in range(args.num_users): plot = plt.plot(range(len(loss_workers_total_2[i, :])), loss_workers_total_2[i, :], label="Worker {}".format(i)) plot5 = plt.plot(range(len(Loss_local_each_global_total_2)), Loss_local_each_global_total_2, color='000000', label="Global") plt.legend(loc='best') plt.ylabel('Small Test Set Accuracy of workers') plt.xlabel('Number of Rounds') plt.savefig('./save/mainfed_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format( args.dataset, args.model, args.epochs, args.frac, args.iid)) # plt.figure() # bins = np.linspace(0, 9, 3) # a = dict_labels_counter_2[:, 0].ravel() # print(type(a)) # b = dict_labels_counter_2[:, 1].ravel() # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9'] # # Set plot parameters # fig, ax = plt.subplots() # width = 0.1 # width of bar # x = np.arange(10) # ax.bar(x, dict_labels_counter_2[:, 0], width, color='#000080', label='Worker 1') # ax.bar(x + width, dict_labels_counter_2[:, 1], width, color='#73C2FB', label='Worker 2') # ax.bar(x + 2*width, dict_labels_counter_2[:, 2], width, color='#ff0000', label='Worker 3') # ax.bar(x + 3*width, dict_labels_counter_2[:, 3], width, color='#32CD32', label='Worker 4') # ax.set_ylabel('Number of Labels') # ax.set_xticks(x + width + width / 2) # ax.set_xticklabels(x_labels) # ax.set_xlabel('Labels') # ax.legend() # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) # fig.tight_layout() # plt.savefig( # './save/main_fed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, # args.iid)) # testing print("testing started") net_glob.eval() print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) print("val test finished") #print("Training accuracy: {:.2f}".format(acc_train)) #print("Testing accuracy: {:.2f}".format(acc_test)) print("{:.2f}".format(acc_test_final)) #print("{:.2f".format(Loss_local_each_worker)) return loss_test_final, loss_train_final
def __init__(self, args, w): self.args = args self.clients_update_w = [] self.clients_loss = [] self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w)
def main_worker(gpu, ngpus_per_node, args): print("gpu:", gpu) args.gpu = gpu if args.rank == 0: #(第一台服务器只有三台GPU,需要特殊处理) newrank = args.rank * ngpus_per_node + gpu else: newrank = args.rank * ngpus_per_node + gpu - 1 #初始化,使用tcp方式进行通信 dist.init_process_group(init_method=args.init_method, backend="nccl", world_size=args.world_size, rank=newrank) #建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group group = [] for i in range(1, args.world_size): group.append(dist.new_group([0, i])) allgroup = dist.new_group([i for i in range(args.world_size)]) if newrank == 0: """ server""" print("{}号服务器的第{}块GPU作为server".format(args.rank, gpu)) #在模型训练期间,server只负责整合参数并分发,不参与任何计算 #设置cpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') #加载测试数据 trans_mnist = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset_test = datasets.MNIST('data/', train=False, download=True, transform=trans_mnist) test_set = torch.utils.data.DataLoader(dataset_test, batch_size=args.bs) """calculate influence function""" model = CNNMnist().to(args.device) model.load_state_dict(torch.load('w_wag')) test_id = 0 #选择的test数据id data, target = test_set.dataset[test_id] data = test_set.collate_fn([data]) target = test_set.collate_fn([target]) print("begin grad") grad_test = grad_z(data, target, model, gpu, create_graph=False) #v初始值 print("end grad") v = grad_test s_test = [] """server与client交互计算s_test""" for i in range(args.world_size - 1): #id_client=random.randint(1,args.world_size) #选择client #向选择的client发送当前v print("send v to client:", i + 1) for j in v: temp = j dist.broadcast(src=0, tensor=temp, group=group[i]) #当client计算完成,从client接收v,准备发给下一个client print("rec v from client:", i + 1) v_new = copy.deepcopy(v) for j in v_new: temp = j dist.broadcast(src=i + 1, tensor=temp, group=group[i]) s_test.append(v_new) #s_test计算结束,将最终s_test发送给全体client e_s_test = s_test[0] for i in range(1, args.world_size - 1): e_s_test = [i + j for i, j in six.moves.zip(e_s_test, s_test[i])] for j in e_s_test: temp = j dist.broadcast(src=0, tensor=temp, group=allgroup) """交互结束""" #从client接收influence print("rec influence") allinfluence = [] influence = torch.tensor([i for i in range(4285)], dtype=torch.float32) influence = influence.to(args.device) for i in range(args.world_size - 1): dist.broadcast(src=i + 1, tensor=influence, group=group[i]) temp = copy.deepcopy(influence) allinfluence.append(temp) torch.save(allinfluence, 'influence') else: """clents""" print("{}号服务器的第{}号GPU作为第{}个client".format(args.rank, gpu, newrank)) #设置gpu args.device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') #加载训练数据 data = torch.load("data/distributed/data_of_client{}".format(newrank)) bsz = 64 train_set = torch.utils.data.DataLoader(data, batch_size=bsz) model = CNNMnist().to(args.device) model.load_state_dict(torch.load('w_wag')) #加载模型 data, target = train_set.dataset[0] data = train_set.collate_fn([data]) target = train_set.collate_fn([target]) grad_v = grad_z(data, target, model, gpu=gpu) v = grad_v """calculate influence function""" v_new = [] #从server接收v """ 和server交互计算s_test,可以循环迭代(当前只进行了一次迭代,没有循环)""" for i in v: temp = i dist.broadcast(src=0, tensor=temp, group=group[newrank - 1]) v_new.append(temp) s_test = stest(v_new, model, train_set, gpu, damp=0.01, scale=1000.0, repeat=5) #计算s_test #向server发送s_test,进行下一次迭代 for i in s_test: temp = copy.copy(i) dist.broadcast(src=newrank, tensor=temp, group=group[newrank - 1]) #迭代完成后,从server接收最终的s_test,计算influence function s_test_fin = [] for i in s_test: temp = copy.copy(i) dist.broadcast(src=0, tensor=temp, group=allgroup) s_test_fin.append(temp) """s_test计算结束,得到最终的s_test_fin,开始计算influence""" print("client:", newrank, "calculate influence") n = len(train_set.dataset) influence = np.array([i for i in range(n)], dtype='float32') for i in utility.create_progressbar(len(train_set.dataset), desc='influence', start=0): #计算grad data, target = train_set.dataset[i] data = train_set.collate_fn([data]) target = train_set.collate_fn([target]) grad_z_vec = grad_z(data, target, model, gpu=gpu) #计算influence inf_tmp = -sum([ torch.sum(k * j).data.cpu().numpy() for k, j in six.moves.zip(grad_z_vec, s_test_fin) ]) / n influence[i] = inf_tmp influence = torch.tensor(influence).to(args.device) #向服务器发送influence print("client:", newrank, "send influence to server") dist.broadcast(src=newrank, tensor=influence, group=group[newrank - 1]) print("client:", newrank, "end send influence to server")
train_set = datasets.MNIST(root='./data/mnist', train=True, download=False, transform=transform) test_set = datasets.MNIST(root='./data/mnist', train=False, download=False, transform=transform) elif args.dataset == 'cifar': transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) train_set = datasets.CIFAR10('./data/cifar', train=True, download=False, transform=transform) test_set = datasets.CIFAR10('./data/cifar', train=False, download=False, transform=transform) else: exit('Error: unrecognized dataset...') dict_users_train, ratio = noniid_train2(train_set, args.num_users) dict_users_test = noniid_test(test_set, args.num_users, ratio) print('Data finished...') # load global model net_glob = CNNCifar(args=args).to(args.device) if args.dataset == 'cifar' else CNNMnist(args=args).to(args.device) net_glob.train() # parameters w_glob = net_glob.state_dict() # test each of clients test_acc = [0 for i in range(args.num_users)] test_loss = [0 for i in range(args.num_users)] for idx in range(args.num_users): # every time start with the same global parameters net_glob.load_state_dict(w_glob) client = Client(args=args, dataset=train_set, idxs=dict_users_train[idx], bs=args.train_bs) w_client = client.local_train(net=copy.deepcopy(net_glob).to(args.device)) client = Client(args=args, dataset=test_set, idxs=dict_users_test[idx], bs=args.test_bs)
dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: dict_users = cifar_noniid(dataset_train, args.num_users, min_train=200, max_train=1000, main_label_prop=0.8, other=9) else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': # global_net = CNNCifar(args=args).to(args.device) global_net = CNNCifarPlus(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': global_net = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x global_net = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(global_net) global_net.train() # start time # time_start = time.time()
def main(): # parse args args = args_parser() args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu') # load dataset and split users if args.dataset == 'mnist': trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist) dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist) # sample users if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter elif args.dataset == 'cifar': trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar) dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar) if args.iid: dict_users = cifar_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = cifar_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter elif args.dataset == 'fmnist': trans_fmnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist) dataset_test = datasets.FashionMNIST('../data/fmnist', train=False, download=True, transform=trans_fmnist) if args.iid: dict_users = mnist_iid(dataset_train, args.num_users) else: dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users) dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape acc_full_distributed = [] acc_full_main = [] loss_full_ditributed = [] loss_full_main = [] SD_acc_full_distributed = [] SD_acc_full_main = [] SD_loss_full_ditributed = [] SD_loss_full_main = [] workers_percent_full_distributed = [] workers_percent_full_main = [] variable_start = 0.1 variable_end = 1.0 while_counter = 0.1 counter_array = [] Accuracy_Fraction = [] Workers_Fraction = [] accuracy_fraction_each_round_newFL = 0 workers_fraction_each_round_newFL = 0 accuracy_fraction_each_round_mainFL = 0 workers_fraction_each_round_mainFL = 0 data_main = {} data_DCFL = {} data_Global_main = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [], "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]} data_Global_DCFL = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [], "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]} Final_LargeDataSetTest_DCFL = {"C":[], "Test Accuracy":[], "Test Loss":[], "Train Loss":[], "Train Accuracy":[], "Total Rounds":[]} Final_LargeDataSetTest_MainFL = {"C":[], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy":[]} # build model args.frac = variable_start test_ds, valid_ds_before = torch.utils.data.random_split(dataset_test, (9500, 500)) valid_ds = create_shared_dataset(valid_ds_before, 200) #while variable_start <= variable_end: for c_counter in range(1, 11, 3): if args.model == 'cnn' and args.dataset == 'cifar': net_glob = CNNCifar(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'cnn' and args.dataset == 'fmnist': net_glob = CNNFashion_Mnist(args=args).to(args.device) net_glob_mainFL = copy.deepcopy(net_glob) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') counter_array.append((c_counter/10)) args.frac = (c_counter/10) ######saving index of workers dict_workers_index = defaultdict(list) #############Main FL w_glob_mainFL = net_glob_mainFL.state_dict() loss_train_mainFL = [] # cv_loss_2, cv_acc_2 = [], [] # val_loss_pre_2, counter_2 = 0, 0 # net_best_2 = None # best_loss_2 = None # val_acc_list_2, net_list_2 = [], [] Loss_local_each_global_total_mainFL = [] Accuracy_local_each_global_total_mainFL = [] loss_workers_total_mainFL = np.zeros(shape=(args.num_users, args.epochs)) label_workers_mainFL = {i: np.array([], dtype='int64') for i in range(args.num_users)} validation_test_mainFed = [] acc_test, loss_test = test_img(net_glob_mainFL, dataset_test, args) workers_participation_main_fd = np.zeros((args.num_users, args.epochs)) workers_percent_main = [] # for iter in range(args.epochs): net_glob_mainFL.eval() acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args) while_counter_mainFL = loss_test_final_mainFL iter_mainFL = 0 workers_mainFL = [] for i in range(args.num_users): workers_mainFL.append(i) temp_netglob_mainFL = net_glob_mainFL while iter_mainFL < (args.epochs/2): data_main['round_{}'.format(iter_mainFL)] = [] # data_Global_main['round_{}'.format(iter)] = [] # print("round started") Loss_local_each_global_mainFL = [] loss_workers_mainFL = np.zeros((args.num_users, args.epochs)) w_locals_mainFL, loss_locals_mainFL = [], [] m_mainFL = max(int(args.frac * args.num_users), 1) idxs_users_mainFL = np.random.choice(range(args.num_users), m_mainFL, replace=False) list_of_random_workers = random.sample(workers_mainFL, m_mainFL) for i in range(len(list_of_random_workers)): dict_workers_index[iter_mainFL].append(list_of_random_workers[i]) x_mainFL = net_glob_mainFL x_mainFL.eval() acc_test_global_mainFL, loss_test_global_mainFL = test_img(x_mainFL, valid_ds, args) Loss_local_each_global_total_mainFL.append(loss_test_global_mainFL) Accuracy_local_each_global_total_mainFL.append(acc_test_global_mainFL) SD_acc_full_main.append(acc_test_global_mainFL) SD_loss_full_main.append(loss_test_global_mainFL) workers_count_mainFL = 0 temp_accuracy = np.zeros(1) temp_loss_test = np.zeros(1) temp_loss_train = np.zeros(1) for idx in list_of_random_workers: # print("train started") local_mainFL = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_mainFL[idx]) w_mainFL, loss_mainFL = local_mainFL.train(net=copy.deepcopy(net_glob_mainFL).to(args.device)) # print(w) # print("train completed") w_locals_mainFL.append(copy.deepcopy(w_mainFL)) loss_locals_mainFL.append(copy.deepcopy(loss_mainFL)) # temp = FedAvg(w) temp_netglob_mainFL.load_state_dict(w_mainFL) temp_netglob_mainFL.eval() print(pnorm_2(temp_netglob_mainFL, 2)) acc_test_local_mainFL, loss_test_local_mainFL = test_img(temp_netglob_mainFL, valid_ds, args) temp_accuracy[0] = acc_test_local_mainFL temp_loss_test[0] = loss_test_local_mainFL temp_loss_train[0] = loss_mainFL loss_workers_total_mainFL[idx, iter_mainFL] = acc_test_local_mainFL workers_participation_main_fd[idx][iter_mainFL] = 1 workers_count_mainFL += 1 data_main['round_{}'.format(iter_mainFL)].append({ 'C': args.frac, 'User ID': idx, # 'Local Update': copy.deepcopy(w_mainFL), 'Loss Train': temp_loss_train[0], 'Loss Test': temp_loss_test[0], 'Accuracy': temp_accuracy[0] }) # update global weights w_glob_mainFL = FedAvg(w_locals_mainFL) # copy weight to net_glob net_glob_mainFL.load_state_dict(w_glob_mainFL) # print("round completed") loss_avg_mainFL = sum(loss_locals_mainFL) / len(loss_locals_mainFL) # print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_mainFL)) loss_train_mainFL.append(loss_avg_mainFL) # print("round completed") acc_test_round_mainfed, loss_test_round_mainfed = test_img(net_glob_mainFL, dataset_test, args) validation_test_mainFed.append(acc_test_round_mainfed) workers_percent_main.append(workers_count_mainFL / args.num_users) # plot workers percent of participating print(iter_mainFL, " round main fl finished") acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args) while_counter_mainFL = loss_test_final_mainFL data_Global_main["Round"].append(iter_mainFL) data_Global_main["C"].append(args.frac) data_Global_main["Average Loss Train"].append(float(loss_avg_mainFL)) data_Global_main["Average Loss Test"].append(float(loss_test_global_mainFL)) data_Global_main["Accuracy Test"].append(float(acc_test_global_mainFL)) data_Global_main["Workers Number"].append(float(workers_count_mainFL)) data_Global_main["Large Test Loss"].append(float(loss_test_final_mainFL)) data_Global_main["Large Test Accuracy"].append(float(acc_test_final_mainFL)) iter_mainFL = iter_mainFL + 1 workers_percent_final_mainFL = np.zeros(args.num_users) workers_name_mainFL = np.empty(args.num_users) for i in range(len(workers_participation_main_fd[:, 1])): workers_percent_final_mainFL[i] = sum(workers_participation_main_fd[i, :]) / args.epochs workers_name_mainFL[i] = i net_glob_mainFL.eval() # print("train test started") acc_train_final_main, loss_train_final_main = test_img(net_glob_mainFL, dataset_train, args) # print("train test finished") acc_test_final_main, loss_test_final_main = test_img(net_glob_mainFL, dataset_test, args) Final_LargeDataSetTest_MainFL["C"].append(args.frac) Final_LargeDataSetTest_MainFL["Test Loss"].append(float(loss_test_final_main)) Final_LargeDataSetTest_MainFL["Test Accuracy"].append(float(acc_test_final_main)) Final_LargeDataSetTest_MainFL["Train Loss"].append(float(loss_train_final_main)) Final_LargeDataSetTest_MainFL["Train Accuracy"].append(float(acc_train_final_main)) # copy weights w_glob = net_glob.state_dict() temp_after = copy.deepcopy(net_glob) temp_before = copy.deepcopy(net_glob) # training loss_train = [] # cv_loss, cv_acc = [], [] # val_loss_pre, counter = 0, 0 # net_best = None # best_loss = None # val_acc_list, net_list = [], [] Loss_local_each_global_total = [] # valid_ds = create_shared_dataset(dataset_test, 500) loss_workers_total = np.zeros(shape=(args.num_users, args.epochs)) label_workers = {i: np.array([], dtype='int64') for i in range(args.num_users)} workers_percent_dist = [] validation_test_newFed = [] workers_participation = np.zeros((args.num_users, args.epochs)) workers = [] for i in range(args.num_users): workers.append(i) counter_threshold_decrease = np.zeros(args.epochs) Global_Accuracy_Tracker = np.zeros(args.epochs) Global_Loss_Tracker = np.zeros(args.epochs) threshold = 0.5 alpha = 0.5 ##decrease parameter beta = 0.1 ##delta accuracy controller gamma = 0.5 ##threshold decrease parameter Goal_Loss = float(loss_test_final_main) #for iter in range(args.epochs): net_glob.eval() acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = float(loss_test_final) iter = 0 total_rounds_dcfl = 0 while (while_counter + 0.01) > Goal_Loss and iter <= args.epochs: data_DCFL['round_{}'.format(iter)] = [] Loss_local_each_global = [] loss_workers = np.zeros((args.num_users, args.epochs)) w_locals, loss_locals = [], [] m = max(int(args.frac * args.num_users), 1) idxs_users = np.random.choice(range(args.num_users), m, replace=False) counter_threshold = 0 print(iter, " in dist FL started") #if iter % 5 == 0: x = copy.deepcopy(net_glob) x.eval() acc_test_global, loss_test_global = test_img(x, valid_ds, args) Loss_local_each_global_total.append(acc_test_global) Global_Accuracy_Tracker[iter] = acc_test_global Global_Loss_Tracker[iter] = loss_test_global if iter > 0 & (Global_Loss_Tracker[iter-1] - Global_Loss_Tracker[iter] <= beta): threshold = threshold - gamma if threshold == 0.0: threshold = 1.0 print("threshold decreased to", threshold) workers_count = 0 SD_acc_full_distributed.append(acc_test_global) SD_loss_full_ditributed.append(loss_test_global) temp_w_locals = [] temp_workers_loss = np.empty(args.num_users) temp_workers_accuracy = np.empty(args.num_users) temp_workers_loss_test = np.empty(args.num_users) temp_workers_loss_differenc = np.empty(args.num_users) temp_workers_accuracy_differenc = np.empty(args.num_users) flag = np.zeros(args.num_users) list_of_random_workers_newfl = [] if iter < (args.epochs/2): for key, value in dict_workers_index.items(): # print(value) if key == iter: list_of_random_workers_newfl = dict_workers_index[key] else: list_of_random_workers_newfl = random.sample(workers, m) for idx in list_of_random_workers_newfl: #print("train started") # before starting train temp_before = copy.deepcopy(net_glob) # temp_before.load_state_dict(w) temp_before.eval() acc_test_local_before, loss_test_local_before = test_img(temp_before, valid_ds, args) local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx]) w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device)) #print(w) #print("train completed") #print("type of idx is ", type(temp_w_locals)) temp_w_locals.append(copy.deepcopy(w)) temp_workers_loss[idx] = copy.deepcopy(loss) temp_after = copy.deepcopy(net_glob) temp_after.load_state_dict(w) temp_after.eval() acc_test_local_after, loss_test_local_after = test_img(temp_after, valid_ds, args) loss_workers_total[idx, iter] = loss_test_local_after temp_workers_accuracy[idx] = acc_test_local_after temp_workers_loss_test[idx] = loss_test_local_after temp_workers_loss_differenc[idx] = loss_test_local_before - loss_test_local_after temp_workers_accuracy_differenc[idx] = acc_test_local_after - acc_test_local_before print("train finished") while len(w_locals) < 1: #print("recieving started") index = 0 for idx in list_of_random_workers_newfl: #print("acc is ", temp_workers_accuracy[idx]) # print(temp_workers_loss_differenc) if workers_count >= m: break elif temp_workers_loss_differenc[idx] >= (threshold) \ and temp_workers_loss_differenc[idx] > 0 \ and flag[idx]==0: print("Update Received") w_locals.append(copy.deepcopy(temp_w_locals[index])) #print(temp_w_locals[index]) loss_locals.append(temp_workers_loss[idx]) flag[idx] = 1 workers_count += 1 workers_participation[idx][iter] = 1 data_DCFL['round_{}'.format(iter)].append({ 'C': args.frac, 'User ID': idx, 'Loss Train': loss_workers_total[idx, iter], 'Loss Test': temp_workers_loss[idx], 'Accuracy': temp_workers_accuracy[idx] }) index += 1 if len(w_locals) < 1: threshold = threshold / 2 if threshold == -np.inf: threshold = 1 print("threshold increased to ", threshold) # update global weights w_glob = FedAvg(w_locals) # copy weight to net_glob net_glob.load_state_dict(w_glob) #print("round completed") loss_avg = sum(loss_locals) / len(loss_locals) loss_train.append(loss_avg) workers_percent_dist.append(workers_count/args.num_users) counter_threshold_decrease[iter] = counter_threshold print(iter, " round dist fl finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) while_counter = loss_test_final data_Global_DCFL["Round"].append(iter) data_Global_DCFL["C"].append(args.frac) data_Global_DCFL["Average Loss Train"].append(loss_avg) data_Global_DCFL["Accuracy Test"].append(Global_Accuracy_Tracker[iter]) data_Global_DCFL["Average Loss Test"].append(Global_Loss_Tracker[iter]) data_Global_DCFL["Workers Number"].append(workers_count) data_Global_DCFL["Large Test Loss"].append(float(loss_test_final)) data_Global_DCFL["Large Test Accuracy"].append(float(acc_test_final)) total_rounds_dcfl = iter iter = iter + 1 #plot workers percent of participating workers_percent_final = np.zeros(args.num_users) workers_name = np.empty(args.num_users) #print(workers_participation) for i in range(len(workers_participation[:, 1])): workers_percent_final[i] = sum(workers_participation[i, :])/args.epochs workers_name[i] = i workers_fraction_each_round_newFL = sum(workers_percent_final)/len(workers_percent_final) # testing #print("testing started") net_glob.eval() #print("train test started") acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args) #print("train test finished") acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args) acc_full_distributed.append(acc_test_final) loss_full_ditributed.append(loss_test_final) Final_LargeDataSetTest_DCFL["C"].append(args.frac) Final_LargeDataSetTest_DCFL["Test Loss"].append(float(loss_test_final)) Final_LargeDataSetTest_DCFL["Test Accuracy"].append(float(acc_test_final)) Final_LargeDataSetTest_DCFL["Train Loss"].append(float(loss_train_final)) Final_LargeDataSetTest_DCFL["Train Accuracy"].append(float(acc_train_final)) Final_LargeDataSetTest_DCFL["Total Rounds"].append(int(total_rounds_dcfl)) variable_start = variable_start + while_counter print("C is ", c_counter/10) with open('CIFAR_100users_data_main_1229-2020.json', 'w') as outfile: json.dump(data_main, outfile) with open('CIFAR_100users_data_DCFL_1229-2020.json', 'w') as outfile: json.dump(data_DCFL, outfile) with open('CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile: json.dump(data_Global_DCFL, outfile) with open('CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile: json.dump(data_Global_main, outfile) with open('Final-CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile: json.dump(Final_LargeDataSetTest_MainFL, outfile) with open('Final-CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile: json.dump(Final_LargeDataSetTest_DCFL, outfile) return 1
else: exit('Error: unrecognized dataset') img_size = dataset_train[0][0].shape # build model if args.model == 'cnn' and args.dataset == 'cifar': net_glob = customCNNCifar(args=args).to(args.device) net_glob1 = customCNNCifar(args=args).to(args.device) net_glob5 = customCNNCifar(args=args).to(args.device) net_glob10 = customCNNCifar(args=args).to(args.device) net_glob15 = customCNNCifar(args=args).to(args.device) net_glob20 = customCNNCifar(args=args).to(args.device) net_glob25 = customCNNCifar(args=args).to(args.device) net_glob30 = customCNNCifar(args=args).to(args.device) elif args.model == 'cnn' and args.dataset == 'mnist': net_glob = CNNMnist(args=args).to(args.device) elif args.model == 'mlp': len_in = 1 for x in img_size: len_in *= x net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device) else: exit('Error: unrecognized model') print(net_glob) net_glob.train() net_glob1.train() net_glob5.train() net_glob10.train() net_glob15.train() net_glob20.train()
class Server(): def __init__(self, args, w): self.args = args self.clients_update_w = [] self.clients_loss = [] self.model = CNNMnist(args=args).to(args.device) self.model.load_state_dict(w) def FedAvg(self): # 1. part one # DP mechanism # cause we choose to add noise at client end,the fedavg should be the same as plain if self.args.mode == 'plain' or self.args.mode == 'DP': update_w_avg = copy.deepcopy(self.clients_update_w[0]) for k in update_w_avg.keys(): for i in range(1, len(self.clients_update_w)): update_w_avg[k] += self.clients_update_w[i][k] update_w_avg[k] = torch.div(update_w_avg[k], len(self.clients_update_w)) self.model.state_dict()[k] += update_w_avg[k] return copy.deepcopy(self.model.state_dict()), sum( self.clients_loss) / len(self.clients_loss) # 2. part two # Paillier add elif self.args.mode == 'Paillier': update_w_avg = copy.deepcopy(self.clients_update_w[0]) for k in update_w_avg.keys(): client_num = len(self.clients_update_w) for i in range(1, client_num): for iter in range(len(update_w_avg[k])): update_w_avg[k][iter] += self.clients_update_w[i][k][ iter] for iter in range(len(update_w_avg[k])): update_w_avg[k][iter] /= client_num return update_w_avg, sum(self.clients_loss) / len( self.clients_loss) else: exit() def test(self, datatest): self.model.eval() # testing test_loss = 0 correct = 0 data_loader = DataLoader(datatest, batch_size=self.args.bs) for idx, (data, target) in enumerate(data_loader): if self.args.gpu != -1: data, target = data.cuda(), target.cuda() log_probs = self.model(data) # sum up batch loss test_loss += F.cross_entropy(log_probs, target, reduction='sum').item() # get the index of the max log-probability y_pred = log_probs.data.max(1, keepdim=True)[1] correct += y_pred.eq( target.data.view_as(y_pred)).long().cpu().sum() test_loss /= len(data_loader.dataset) accuracy = 100.00 * correct / len(data_loader.dataset) return accuracy, test_loss