Ejemplo n.º 1
0
 def __init__(self, args):
     if args.dataset == 'cifar':
         self.net = CNNCifar(args=args).to(args.device)
     else:
         self.net = CNNMnist(args=args).to(args.device)
     self.net.train()
     self.loss_func = nn.CrossEntropyLoss()
     self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr)
     self.args = args
     self.w_glob = []
     # key exchange
     self.x = self.gx = 0
     self.keys = defaultdict(int)
Ejemplo n.º 2
0
 def __init__(self,
              args,
              dataset=None,
              idxs=None,
              w=None,
              C=0.5,
              sigma=0.05):
     self.args = args
     self.loss_func = nn.CrossEntropyLoss()
     self.ldr_train = DataLoader(DatasetSplit(dataset, idxs),
                                 batch_size=self.args.local_bs,
                                 shuffle=True)
     self.model = CNNMnist(args=args).to(args.device)
     self.model.load_state_dict(w)
     self.C = C
     self.sigma = sigma
     if self.args.mode == 'Paillier':
         self.pub = pub
         self.priv = priv
Ejemplo n.º 3
0
def create_client_server():
    num_items = int(len(dataset_train) / args.num_users)
    clients, all_idxs = [], [i for i in range(len(dataset_train))]
    net_glob = CNNMnist(args=args).to(args.device)

    #平分训练数据,i.i.d.
    #初始化同一个参数的模型
    for i in range(args.num_users):
        new_idxs = set(np.random.choice(all_idxs, num_items, replace=False))
        all_idxs = list(set(all_idxs) - new_idxs)
        new_client = Client(args=args,
                            dataset=dataset_train,
                            idxs=new_idxs,
                            w=copy.deepcopy(net_glob.state_dict()))
        clients.append(new_client)

    server = Server(args=args, w=copy.deepcopy(net_glob.state_dict()))

    return clients, server
Ejemplo n.º 4
0
def build_model(args):
    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
    elif args.model == 'LeNet' and args.dataset == 'traffic':
        net_glob = LeNet(args=args).to(args.device)
    else:
        exit('Error: unrecognized model')
    return net_glob
Ejemplo n.º 5
0
def get_model(args):
    if args.model == 'cnn' and args.dataset in ['cifar10', 'cifar100']:
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp' and args.dataset == 'mnist':
        net_glob = MLP(dim_in=784, dim_hidden=256,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    print(net_glob)

    return net_glob
Ejemplo n.º 6
0
def build_model():
    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
    # elif args.model == 'mlp':
    #     len_in = 1
    #     for x in img_size:
    #         len_in *= x
    #     net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    return net_glob
Ejemplo n.º 7
0
class FL_client():
    def __init__(self, args):
        if args.dataset == 'cifar':
            self.net = CNNCifar(args=args).to(args.device)
        else:
            self.net = CNNMnist(args=args).to(args.device)
        self.net.train()
        self.loss_func = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr)
        self.args = args
        self.w_glob = []
        # key exchange
        self.x = self.gx = 0
        self.keys = defaultdict(int)

    def set_data(self, dataset, idxs):
        self.data = DataLoader(DatasetSplit(dataset, idxs),
                               batch_size=self.args.local_bs,
                               shuffle=True)

    def load_state(self, state_dict):
        self.net.load_state_dict(state_dict)

    def train(self):
        epoch_loss = []
        for _ in range(self.args.local_ep):
            batch_loss = []
            for _, (images, labels) in enumerate(self.data):
                images, labels = images.to(self.args.device), labels.to(
                    self.args.device)
                pred = self.net(images)
                loss = self.loss_func(pred, labels)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                batch_loss.append(loss.item())
            epoch_loss.append(sum(batch_loss) / len(batch_loss))
        return self.net.state_dict(), sum(epoch_loss) / len(epoch_loss)
Ejemplo n.º 8
0
    dataset_valid = dataset_test

    if args.iid == 'noniid_ssl' and args.dataset == 'cifar':
        dict_users, dict_users_labeled, pseudo_label = noniid_ssl(dataset_train, args.num_users, args.label_rate)
    else:
        dict_users, dict_users_labeled, pseudo_label = sample(dataset_train, args.num_users, args.label_rate, args.iid)



    if args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
        net_ema_glob = CNNCifar(args=args).to(args.device)

    elif args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_ema_glob = CNNMnist(args=args).to(args.device)

    elif args.dataset == 'svhn':
        net_glob = CNNCifar(args=args).to(args.device)
        net_ema_glob = CNNCifar(args=args).to(args.device)

    else:
        exit('Error: unrecognized model')

    net_glob.train()
    net_ema_glob.train()

    # copy weights
    w_glob = net_glob.state_dict()
    w_ema_glob = net_ema_glob.state_dict()
Ejemplo n.º 9
0
def modelBuild():
    """
    Build the basic training network and return the related args.
    """
    # build model
    args = args_parser()
    args.device = torch.device('cuda:{}'.format(
        args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

    # load dataset and split users
    if args.dataset == 'mnist':
        trans_mnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_train = datasets.MNIST('../data/mnist/',
                                       train=True,
                                       download=True,
                                       transform=trans_mnist)
        dataset_test = datasets.MNIST('../data/mnist/',
                                      train=False,
                                      download=True,
                                      transform=trans_mnist)
        # sample users
        if args.iid:
            # allocate the dataset index to users
            dict_users = mnist_iid(dataset_train, args.num_users)
        else:
            dict_users = mnist_noniid(dataset_train, args.num_users)
    elif args.dataset == 'cifar':
        trans_cifar = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        dataset_train = datasets.CIFAR10('../data/cifar',
                                         train=True,
                                         download=True,
                                         transform=trans_cifar)
        dataset_test = datasets.CIFAR10('../data/cifar',
                                        train=False,
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')

    print("The para of iid is " + str(args.iid))

    img_size = dataset_train[0][0].shape
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=200,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')

    print("********************************")
    print(net_glob)
    print("********************************")

    return net_glob, args, dataset_train, dataset_test, dict_users
Ejemplo n.º 10
0
        test_set = datasets.CIFAR10('./data/cifar',
                                    train=False,
                                    download=False,
                                    transform=transform)
    else:
        exit('Error: unrecognized dataset...')

    # split dataset {user_id: [list of data index]}
    dict_users_train, ratio = noniid_train(train_set, args.num_users)
    dict_users_test = noniid_test(test_set, args.num_users, ratio)

    print('Data finished...')

    # load global model
    net_glob = CNNCifar(
        args=args).to(args.device) if args.dataset == 'cifar' else CNNMnist(
            args=args).to(args.device)
    net_glob.train()

    # parameters
    w_glob = net_glob.state_dict()

    loss_train = []

    # meta-learning for global initial parameters
    for epoch in range(args.meta_epochs):
        loss_locals = []
        w_locals = []
        m = max(int(args.frac * args.num_users), 1)
        idxs_users = np.random.choice(range(args.num_users), m, replace=False)
        for idx in idxs_users:
            client = Client(args=args,
Ejemplo n.º 11
0
def main_worker(gpu, ngpus_per_node, args):
    print("gpu:", gpu)
    args.gpu = gpu
    if args.rank == 0:  #(第一台服务器只有三台GPU,需要特殊处理)
        newrank = args.rank * ngpus_per_node + gpu
    else:
        newrank = args.rank * ngpus_per_node + gpu - 1
    #初始化,使用tcp方式进行通信
    print("begin init")
    dist.init_process_group(init_method=args.init_method,
                            backend="nccl",
                            world_size=args.world_size,
                            rank=newrank)
    print("end init")

    #建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group
    group = []
    for i in range(1, args.world_size):
        group.append(dist.new_group([0, i]))
    allgroup = dist.new_group([i for i in range(args.world_size)])

    if newrank == 0:
        """ server"""

        print("使用{}号服务器的第{}块GPU作为server".format(args.rank, gpu))

        #在模型训练期间,server只负责整合参数并分发,不参与任何计算
        #设置cpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu)
            if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

        net = CNNMnist().to(args.device)
        w_avg = copy.deepcopy(net.state_dict())
        for j in range(args.epochs):
            if j == args.epochs - 1:
                for i in w_avg.keys():
                    temp = w_avg[i].to(args.device)
                    w_avg[i] = average_gradients(temp, group, allgroup)
            else:
                for i in w_avg.keys():
                    temp = w_avg[i].to(args.device)
                    average_gradients(temp, group, allgroup)
        torch.save(w_avg, 'w_wag')
        net.load_state_dict(w_avg)
        #加载测试数据
        trans_mnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_test = datasets.MNIST('data/',
                                      train=False,
                                      download=True,
                                      transform=trans_mnist)
        test_set = torch.utils.data.DataLoader(dataset_test,
                                               batch_size=args.bs)
        test_accuracy, test_loss = test(net, test_set, args)
        print("Testing accuracy: {:.2f}".format(test_accuracy))
        print("Testing loss: {:.2f}".format(test_loss))

    else:
        """clents"""

        print("使用{}号服务器的第{}块GPU作为第{}个client".format(args.rank, gpu, newrank))

        #设置gpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu)
            if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

        print("begin train...")
        net = CNNMnist().to(args.device)
        print(net)
        data = torch.load("data/distributed/data_of_client{}".format(newrank))
        bsz = 64
        train_set = torch.utils.data.DataLoader(data, batch_size=bsz)

        optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.5)
        num_batches = ceil(len(train_set.dataset) / float(bsz))
        start = time.time()
        for epoch in range(args.epochs):
            for iter in range(3):
                epoch_loss = 0.0
                for data, target in train_set:
                    data, target = data.to(args.device), target.to(args.device)
                    data, target = Variable(data), Variable(target)
                    optimizer.zero_grad()
                    output = net(data)
                    loss = F.nll_loss(output, target)
                    epoch_loss += loss.item()
                    loss.backward()
                    optimizer.step()
                if iter == 3 - 1:
                    print('Rank ', dist.get_rank(), ', epoch ', epoch, ': ',
                          epoch_loss / num_batches)
            """federated learning"""
            w_avg = copy.deepcopy(net.state_dict())

            for k in w_avg.keys():
                print("k:", k)
                temp = average_gradients(w_avg[k].to(args.device), group,
                                         allgroup)
                w_avg[k] = temp
            net.load_state_dict(w_avg)

        end = time.time()
        print(" training time:{}".format((end - start)))

        train_accuracy, train_loss = test(net, train_set, args)
        print("Training accuracy: {:.2f}".format(train_accuracy))
        print("Training loss: {:.2f}".format(train_loss))
                                        train=False,
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_glob5 = CNNMnist(args=args).to(args.device)
        net_glob10 = CNNMnist(args=args).to(args.device)

    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=64,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    print(net_glob)
    net_glob.train()
    net_glob5.train()
    net_glob10.train()
Ejemplo n.º 13
0
    if args.iid == 'noniid_ssl' and args.dataset == 'cifar':
        dict_users, dict_users_labeled, pseudo_label = noniid_ssl(
            dataset_train_weak, args.num_users, args.label_rate)
    else:
        dict_users, dict_users_labeled, pseudo_label = sample(
            dataset_train_weak, args.num_users, args.label_rate, args.iid)

    if args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
        net_glob_helper_1 = CNNCifar(args=args).to(args.device)
        net_glob_helper_2 = CNNCifar(args=args).to(args.device)
        net_glob_valid = CNNCifar(args=args).to(args.device)

    elif args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_glob_helper_1 = CNNMnist(args=args).to(args.device)
        net_glob_helper_2 = CNNMnist(args=args).to(args.device)
        net_glob_valid = CNNMnist(args=args).to(args.device)
    elif args.dataset == 'svhn':
        net_glob = CNNCifar(args=args).to(args.device)
        net_glob_helper_1 = CNNCifar(args=args).to(args.device)
        net_glob_helper_2 = CNNCifar(args=args).to(args.device)
        net_glob_valid = CNNCifar(args=args).to(args.device)

    else:
        exit('Error: unrecognized model')

    print("\n Begin Train")

    net_glob.train()
        trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        dataset_train = datasets.CIFAR10('data/cifar', train=True, download=True, transform=trans_cifar)
        dataset_test = datasets.CIFAR10('data/cifar', train=False, download=True, transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_glob1 = CNNMnist(args=args).to(args.device)
        net_glob5 = CNNMnist(args=args).to(args.device)
        net_glob7 = CNNMnist(args=args).to(args.device)
        net_glob10 = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    print(net_glob)
    net_glob.train()
    net_glob1.train()
    net_glob5.train()
Ejemplo n.º 15
0
        ###ANALYZING END
        ###

        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_glob1 = CNNMnist(args=args).to(args.device)
        net_glob5 = CNNMnist(args=args).to(args.device)
        net_glob10 = CNNMnist(args=args).to(args.device)
        net_glob15 = CNNMnist(args=args).to(args.device)
        net_glob20 = CNNMnist(args=args).to(args.device)
        net_glob25 = CNNMnist(args=args).to(args.device)
        net_glob30 = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=64,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
Ejemplo n.º 16
0
                                        train=False,
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=64,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    print(net_glob)
    net_glob.train()

    # copy weights
    w_glob = net_glob.state_dict()

    # training - NO ATTACK
Ejemplo n.º 17
0
            if (key_item_1[0] == key_item_2[0]):
                print('Mismtach found at', key_item_1[0])
            else:
                raise Exception
    if models_differ == 0:
        print('Models match perfectly! :)')


args = args_parser()
args.gpu = -1
args.device = torch.device('cuda:{}'.format(
    args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')
#trans_fmnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
#dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist)
#dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users)
net_glob = CNNMnist(args=args).to(args.device)
#print(net_glob)

m = net_glob
m.train()

for p1, p2 in zip(m.parameters(), net_glob.parameters()):
    if p1.data.ne(p2.data).sum() > 0:
        print(False)
print(True)

#local_mainFL = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[5])
#w_mainFL, loss_mainFL=local_mainFL.train(net=copy.deepcopy(net_glob_mainFL).to(args.device))
#compare_models(net_glob, w_mainFL)

Ejemplo n.º 18
0
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users, args.seed)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model

    if args.model == 'cnn' and args.dataset == 'cifar':
        net_local = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_local = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_local = MLP(dim_in=len_in, dim_hidden=64,
                        dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')

    #Let's train the model few epochs first

    n_epochs = 3
    batch_size_train = 64
    batch_size_test = 1000
    learning_rate = 0.01
Ejemplo n.º 19
0
def main_worker(gpu, ngpus_per_node, args):

    print("gpu:", gpu)
    args.gpu = gpu
    if args.rank == 0:  # (第一台服务器只有三台GPU,需要特殊处理)
        newrank = args.rank * ngpus_per_node + gpu
    else:
        newrank = args.rank * ngpus_per_node + gpu-1
    # 初始化,使用tcp方式进行通信
    dist.init_process_group(init_method=args.init_method, backend="nccl", world_size=args.world_size, rank=newrank)

    # 建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group
    group = []
    for i in range(1, args.world_size):
        group.append(dist.new_group([0, i]))
    allgroup = dist.new_group([i for i in range(args.world_size)])

    if newrank == 0:
        """ server"""

        print("{}号服务器的第{}块GPU作为server".format(args.rank, gpu))

        # 在模型训练期间,server只负责整合参数并分发,不参与任何计算
        # 设置cpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

        # 加载测试数据
        trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
        dataset_test = datasets.MNIST('data/', train=False, download=True, transform=trans_mnist)
        test_set = torch.utils.data.DataLoader(dataset_test, batch_size=args.bs)

        """calculate influence function"""
        model = CNNMnist().to(args.device)
        model.load_state_dict(torch.load('w_wag'))

        test_id = 0  # 选择的test数据id
        data, target = test_set.dataset[test_id]
        data = test_set.collate_fn([data])
        target = test_set.collate_fn([target])

        print("begin grad")
        grad_test = grad_z(data, target, model, gpu, create_graph=False)  # grad_test
        print("end grad")
        v = grad_test


        """server与client交互计算s_test(采用rka算法)"""
        #计算模型总参数
        num_parameters=0
        for i in list(model.parameters()):
            # 首先求出每个tensor中所含参数的个数
            temp = 1
            for j in i.size():
                temp *= j
            num_parameters+=temp
        # 向client发送grad_test
        for i in range(args.world_size - 1):
            print("send grad_test to client:", i+1)
            for j in v:
                temp = j
                dist.broadcast(src=0, tensor=temp, group=group[i])

        for k in range(args.num_sample_rka):

            #向client发送采样id
            id=torch.tensor(random.randint(0,num_parameters-1)).to(args.device)
            for i in range(args.world_size-1):
                dist.broadcast(src=0,tensor=id,group=group[i])

            # 从server接收二阶导
            sec_grad = []
            second_grad = [torch.zeros(list(model.parameters())[i].size()).to(args.device) for i in
                           range(len(list(model.parameters())))]
            for i in range(args.world_size - 1):
                temp = copy.deepcopy(second_grad)
                for j in temp:
                    dist.broadcast(src=i + 1, tensor=j, group=group[i])
                sec_grad.append(temp)

            # 整合二阶导,然后分发给client
            e_second_grad = sec_grad[0]
            for i in range(1, args.world_size - 1):
                e_second_grad = [i + j for i, j in six.moves.zip(e_second_grad, sec_grad[i])]
            e_second_grad = [i / (args.world_size - 1) for i in e_second_grad]
            for j in e_second_grad:
                temp = j
                dist.broadcast(src=0, tensor=temp, group=allgroup)
        """交互结束"""

        # 从client接收influence
        print("rec influence")
        allinfluence = []
        influence = torch.tensor([i for i in range(4285)], dtype=torch.float32)
        influence = influence.to(args.device)

        for i in range(args.world_size - 1):
            dist.broadcast(src=i + 1, tensor=influence, group=group[i])
            temp = copy.deepcopy(influence)
            allinfluence.append(temp)
        torch.save(allinfluence, 'influence/influence')


    else:
        """clents"""

        print("{}号服务器的第{}号GPU作为第{}个client".format(args.rank, gpu, newrank))

        # 设置gpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')
        # 加载训练数据
        data = torch.load("data/distributedData/data_of_client{}".format(newrank))
        bsz = 64
        train_set = torch.utils.data.DataLoader(data, batch_size=bsz)
        model = CNNMnist().to(args.device)
        model.load_state_dict(torch.load('w_wag'))  # 加载模型
        data, target = train_set.dataset[0]
        data = train_set.collate_fn([data])
        target = train_set.collate_fn([target])
        grad_v = grad_z(data, target, model, gpu=gpu,create_graph=False)
        grad_test = copy.deepcopy(grad_v)

        """calculate influence function"""

        """ 和server交互计算s_test,可以循环迭代(采用rka算法)"""

        # 从server接收grad_test
        for i in grad_test:
            dist.broadcast(src=0, tensor=i, group=group[newrank - 1])

        stest = copy.deepcopy(grad_test)
        for k in range(args.num_sample_rka):
            #从server接收采样id,计算二阶导
            id=torch.tensor([0]).to(args.device).to(args.device)
            dist.broadcast(src=0,tensor=id,group=group[newrank - 1])
            idt= id.item()
            second_grad=hessian(model,train_set,idt,gpu=args.gpu)

            #向server发送二阶导
            for i in second_grad:
                temp = i
                dist.broadcast(src=newrank, tensor=temp, group=group[newrank - 1])

            # 从server接收最终的二阶导
            for i in second_grad:
                temp = i
                dist.broadcast(src=0, tensor=temp, group=allgroup)
            # 使用rka算法计算stest
            stest = rka(stest, second_grad, grad_test)

            s_test_fin = stest
            """"s_test计算结束,得到最终的s_test_fin,开始计算influence"""

        print("client:", newrank, "calculate influence")
        n = len(train_set.dataset)
        influence = np.array([i for i in range(n)], dtype='float32')
        for i in utility.create_progressbar(len(train_set.dataset), desc='influence', start=0):
            # 计算grad
            data, target = train_set.dataset[i]
            data = train_set.collate_fn([data])
            target = train_set.collate_fn([target])
            grad_z_vec = grad_z(data, target, model, gpu=gpu)
            # 计算influence
            inf_tmp = -sum(
                [torch.sum(k * j).data.cpu().numpy() for k, j in six.moves.zip(grad_z_vec, s_test_fin)]) / n
            influence[i] = inf_tmp
        influence = torch.tensor(influence).to(args.device)
        # 向服务器发送influence
        print("client:", newrank, "send influence to server")
        dist.broadcast(src=newrank, tensor=influence, group=group[newrank - 1])
        print("client:", newrank, "end send influence to server")
Ejemplo n.º 20
0
class Client():
    def __init__(self,
                 args,
                 dataset=None,
                 idxs=None,
                 w=None,
                 C=0.5,
                 sigma=0.05):
        self.args = args
        self.loss_func = nn.CrossEntropyLoss()
        self.ldr_train = DataLoader(DatasetSplit(dataset, idxs),
                                    batch_size=self.args.local_bs,
                                    shuffle=True)
        self.model = CNNMnist(args=args).to(args.device)
        self.model.load_state_dict(w)
        self.C = C
        self.sigma = sigma
        if self.args.mode == 'Paillier':
            self.pub = pub
            self.priv = priv

    def train(self):
        w_old = copy.deepcopy(self.model.state_dict())
        net = copy.deepcopy(self.model)

        net.train()

        #train and update
        optimizer = torch.optim.SGD(net.parameters(),
                                    lr=self.args.lr,
                                    momentum=self.args.momentum)
        for iter in range(self.args.local_ep):
            batch_loss = []
            for batch_idx, (images, labels) in enumerate(self.ldr_train):
                images, labels = images.to(self.args.device), labels.to(
                    self.args.device)
                net.zero_grad()
                log_probs = net(images)
                loss = self.loss_func(log_probs, labels)
                loss.backward()
                optimizer.step()
                batch_loss.append(loss.item())

        w_new = net.state_dict()

        update_w = {}
        if self.args.mode == 'plain':
            for k in w_new.keys():
                update_w[k] = w_new[k] - w_old[k]
        # 1. part one
        #     DP mechanism
        elif self.args.mode == 'DP':
            for k in w_new.keys():
                # calculate update_w
                update_w[k] = w_new[k] - w_old[k]
                # clip the update
                update_w[k] = update_w[k] / max(
                    1,
                    torch.norm(update_w[k], 2) / self.C)
                # add noise ,cause update_w might reveal user's data also ,we should add noise before send to server
                update_w[k] += np.random.normal(0.0, self.sigma**2 * self.C**2)
        # 2. part two
        #     Paillier enc
        elif self.args.mode == 'Paillier':
            print(len(w_new.keys()))
            for k in w_new.keys():
                print("start  ", k, flush=True)
                update_w[k] = w_new[k] - w_old[k]
                update_w_list = update_w[k].view(-1).cpu().tolist()
                for iter, w in enumerate(update_w_list):
                    update_w_list[iter] = self.pub.encrypt(w)
                update_w[k] = update_w_list
                print("end ", flush=True)
        else:
            exit()
        return update_w, sum(batch_loss) / len(batch_loss)

    def update(self, w_glob):
        if self.args.mode == 'plain':
            self.model.load_state_dict(w_glob)
        elif self.args.mode == 'DP':
            self.model.load_state_dict(w_glob)
        elif self.args.mode == 'Paillier':
            w_glob_ciph = copy.deepcopy(w_glob)
            for k in w_glob_ciph.keys():
                for iter, item in enumerate(w_glob_ciph[k]):
                    w_glob_ciph[k][iter] = self.priv.decrypt(item)
                shape = list(self.model.state_dict()[k].size())
                w_glob_ciph[k] = torch.FloatTensor(w_glob_ciph[k]).to(
                    self.args.device).view(*shape)
                self.model.state_dict()[k] += w_glob_ciph[k]
        else:
            exit()
Ejemplo n.º 21
0
def main():

    manualSeed = 1

    np.random.seed(manualSeed)
    random.seed(manualSeed)
    torch.manual_seed(manualSeed)
    # if you are suing GPU
    torch.cuda.manual_seed(manualSeed)
    torch.cuda.manual_seed_all(manualSeed)

    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    # parse args
    args = args_parser()
    args.device = torch.device('cuda:{}'.format(
        args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

    # load dataset and split users
    if args.dataset == 'mnist':
        trans_mnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_train = datasets.MNIST('../data/mnist/',
                                       train=True,
                                       download=True,
                                       transform=trans_mnist)
        dataset_test = datasets.MNIST('../data/mnist/',
                                      train=False,
                                      download=True,
                                      transform=trans_mnist)
        # sample users
        if args.iid:
            dict_users_DCFL = mnist_iid(dataset_train, args.num_users)
        else:
            dict_users_DCFL, dict_labels_counter = mnist_noniid(
                dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter
    elif args.dataset == 'cifar':
        trans_cifar = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        dataset_train = datasets.CIFAR10('../data/cifar',
                                         train=True,
                                         download=True,
                                         transform=trans_cifar)
        dataset_test = datasets.CIFAR10('../data/cifar',
                                        train=False,
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users_DCFL = cifar_iid(dataset_train, args.num_users)
            dict_users_mainFL = dict_users_DCFL
            dict_labels_counter_mainFL = dict()
            dict_labels_counter = dict()
        else:
            dict_users_DCFL, dict_labels_counter = cifar_noniid(
                dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter
    elif args.dataset == 'fmnist':
        trans_fmnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_train = datasets.FashionMNIST('../data/fmnist',
                                              train=True,
                                              download=True,
                                              transform=trans_fmnist)
        dataset_test = datasets.FashionMNIST('../data/fmnist',
                                             train=False,
                                             download=True,
                                             transform=trans_fmnist)
        if args.iid:
            print("iid")
            dict_users_DCFL = mnist_iid(dataset_train, args.num_users)
        else:
            print("non iid")
            dict_users_DCFL, dict_labels_counter = mnist_noniid(
                dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users_DCFL, dict_labels_counter
    else:
        exit('Error: unrecognized dataset')

    img_size = dataset_train[0][0].shape

    # Small shared dataset
    test_ds, valid_ds_before = torch.utils.data.random_split(
        dataset_test, (9500, 500))
    small_shared_dataset = create_shared_dataset(valid_ds_before, 200)

    optimal_delay = 1.0

    # Start process for each fraction of c
    for c_counter in range(3, 3 + 1, 2):
        if args.model == 'cnn' and args.dataset == 'cifar':
            net_glob = CNNCifar(args=args).to(args.device)
            # net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'cnn' and args.dataset == 'mnist':
            net_glob = CNNMnist(args=args).to(args.device)
            # net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'cnn' and args.dataset == 'fmnist':
            net_glob = CNNFashion_Mnist(args=args).to(args.device)
            # net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'mlp':
            len_in = 1
            for x in img_size:
                len_in *= x
            net_glob = MLP(dim_in=len_in,
                           dim_hidden=200,
                           dim_out=args.num_classes).to(args.device)
        else:
            exit('Error: unrecognized model')

        # Saving data
        data_Global_main = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_MainFL = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        data_Global_DCFL = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_DCFL = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        data_Global_G1 = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_G1 = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        data_Global_G2 = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_G2 = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        data_Global_Muhammed = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_Muhammed = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        data_Global_Cho = {
            "C": [],
            "Round": [],
            "Average Loss Train": [],
            "SDS Loss": [],
            "SDS Accuracy": [],
            "Workers Number": [],
            "Large Test Loss": [],
            "Large Test Accuracy": [],
            "Communication Cost": []
        }
        Final_LargeDataSetTest_Cho = {
            "C": [],
            "Test Accuracy": [],
            "Test Loss": [],
            "Train Loss": [],
            "Train Accuracy": [],
            "Total Rounds": [],
            "Communication Cost": []
        }

        net_glob.train()
        net_glob_mainFL = copy.deepcopy(net_glob)
        net_glob_G1 = copy.deepcopy(net_glob)
        net_glob_G2 = copy.deepcopy(net_glob)
        cost = np.random.rand(args.num_users)

        R_G1 = 5
        args.frac = (c_counter / 10)

        # Main FL
        loss_main, dict_workers_index, Final_LargeDataSetTest_MainFL_temp, data_Global_main_temp = mainFl(
            net_glob_mainFL, dict_users_mainFL, dict_labels_counter_mainFL,
            args, cost, dataset_train, dataset_test, small_shared_dataset)

        Final_LargeDataSetTest_MainFL = merge(
            Final_LargeDataSetTest_MainFL, Final_LargeDataSetTest_MainFL_temp)
        data_Global_main = merge(data_Global_main, data_Global_main_temp)

        # with open(os.path.join(OUT_DIR, f"dict_users_mainFL-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file:
        #     pickle.dump(dict_users_mainFL, file)

        # with open(os.path.join(OUT_DIR, f"dict_users_mainFL-C-{args.frac}-{args.dataset}.pkl"), 'rb') as file:
        #     dict_users_mainFL = pickle.load(file)

        # with open(os.path.join(OUT_DIR, f"workers_index-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file:
        #     pickle.dump(dict_workers_index, file)

        # with open(os.path.join(OUT_DIR, f"cost-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file:
        #     pickle.dump(cost, file)

        # with open(os.path.join(OUT_DIR, f"cost-C-{args.frac}-{args.dataset}.pkl"), 'rb') as file:
        #     cost = pickle.load(file)

        # print(cost)

        # with open(os.path.join(OUT_DIR, f"GoalLoss-C-{args.frac}-{args.dataset}.pkl"), 'wb') as file:
        #     pickle.dump(loss_main, file)

        date = datetime.now()
        _dir = os.path.join(OUT_DIR, str(date.date()))
        if not os.path.exists(_dir):
            os.makedirs(_dir)
        save_time = time.strftime("%Y%m%d-%H%M%S")

        Final_LargeDataSetTest_MainFL = pd.DataFrame.from_dict(
            Final_LargeDataSetTest_MainFL)
        data_Global_main = pd.DataFrame.from_dict(data_Global_main)
        Final_LargeDataSetTest_MainFL.to_csv(
            os.path.join(
                _dir,
                f"{save_time}-{args.dataset}-Final_LargeDataSetTest_MainFL.csv"
            ))
        data_Global_main.to_csv(
            os.path.join(_dir,
                         f"{save_time}-{args.dataset}-data_Global_main.csv"))

        # Proposed G1
        Final_LargeDataSetTest_G1_temp, data_Global_G1_temp = Proposed_G1(
            net_glob_G1, dict_workers_index, dict_users_DCFL,
            dict_labels_counter_mainFL, args, cost, dataset_train,
            dataset_test, small_shared_dataset, loss_main, R_G1, optimal_delay)
        Final_LargeDataSetTest_G1 = merge(Final_LargeDataSetTest_G1,
                                          Final_LargeDataSetTest_G1_temp)
        data_Global_G1 = merge(data_Global_G1, data_Global_G1_temp)

        Final_LargeDataSetTest_G1 = pd.DataFrame.from_dict(
            Final_LargeDataSetTest_G1)
        data_Global_G1 = pd.DataFrame.from_dict(data_Global_G1)
        Final_LargeDataSetTest_G1.to_csv(
            os.path.join(
                _dir,
                f"{save_time}-{args.dataset}-Final_LargeDataSetTest_G1.csv"))
        data_Global_G1.to_csv(
            os.path.join(_dir,
                         f"{save_time}-{args.dataset}-data_Global_G1.csv"))

        print("G1 alg is done")
Ejemplo n.º 22
0
def main():
    # parse args
    args = args_parser()
    args.device = torch.device('cuda:{}'.format(
        args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

    # load dataset and split users
    if args.dataset == 'mnist':
        trans_mnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_train = datasets.MNIST('../data/mnist/',
                                       train=True,
                                       download=True,
                                       transform=trans_mnist)
        dataset_test = datasets.MNIST('../data/mnist/',
                                      train=False,
                                      download=True,
                                      transform=trans_mnist)
        print("type of test dataset", type(dataset_test))
        # sample users
        if args.iid:
            dict_users = mnist_iid(dataset_train, args.num_users)
        else:
            dict_users, dict_labels_counter = mnist_noniid(
                dataset_train, args.num_users)
            dict_users_2, dict_labels_counter_2 = dict_users, dict_labels_counter
            #dict_users, dict_labels_counter = mnist_noniid_unequal(dataset_train, args.num_users)
    elif args.dataset == 'cifar':
        trans_cifar = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        dataset_train = datasets.CIFAR10('../data/cifar',
                                         train=True,
                                         download=True,
                                         transform=trans_cifar)
        dataset_test = datasets.CIFAR10('../data/cifar',
                                        train=False,
                                        download=True,
                                        transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            exit('Error: only consider IID setting in CIFAR10')
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = CNNCifar(args=args).to(args.device)
        net_glob_2 = CNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)
        net_glob_2 = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=200,
                       dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')

    #print(net_glob)

    #net_glob.train()

    acc_test, loss_test = test_img(net_glob, dataset_test, args)
    print("val test finished")
    print("{:.2f}".format(acc_test))
    temp = net_glob

    #net_glob_2 = net_glob
    temp_2 = net_glob_2

    # copy weights
    w_glob = net_glob.state_dict()

    # training
    loss_train = []
    cv_loss, cv_acc = [], []
    val_loss_pre, counter = 0, 0
    net_best = None
    best_loss = None
    val_acc_list, net_list = [], []

    Loss_local_each_global_total = []

    test_ds, valid_ds = torch.utils.data.random_split(dataset_test,
                                                      (9500, 500))
    loss_workers_total = np.zeros(shape=(args.num_users, args.epochs))
    label_workers = {
        i: np.array([], dtype='int64')
        for i in range(args.num_users)
    }

    workers_percent = []
    workers_count = 0
    acc_test_global, loss_test_global = test_img(x, valid_ds, args)
    selected_users_index = []

    for idx in range(args.num_users):
        # print("train started")
        local = LocalUpdate(args=args,
                            dataset=dataset_train,
                            idxs=dict_users[idx])
        w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device))
        # print(w)
        # print("train completed")

        # temp = FedAvg(w)
        temp.load_state_dict(w)
        temp.eval()
        acc_test_local, loss_test_local = test_img(temp, valid_ds, args)
        loss_workers_total[idx, iter] = acc_test_local

        if workers_count >= (args.num_users / 2):
            break
        elif acc_test_local >= (0.7 * acc_test_global):
            selected_users_index.append(idx)

    for iter in range(args.epochs):
        print("round started")
        Loss_local_each_global = []
        loss_workers = np.zeros((args.num_users, args.epochs))
        w_locals, loss_locals = [], []
        m = max(int(args.frac * args.num_users), 1)
        #idxs_users = np.random.choice(range(args.num_users), m, replace=False)

        #if iter % 5 == 0:
        # Minoo
        x = net_glob
        x.eval()

        Loss_local_each_global_total.append(acc_test_global)

        for idx in selected_users_index:
            #print("train started")
            local = LocalUpdate(args=args,
                                dataset=dataset_train,
                                idxs=dict_users[idx])
            w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device))
            #print(w)
            #print("train completed")

            #temp = FedAvg(w)
            temp.load_state_dict(w)
            temp.eval()
            acc_test_local, loss_test_local = test_img(temp, valid_ds, args)
            loss_workers_total[idx, iter] = acc_test_local

            if workers_count >= (args.num_users / 2):
                break
            elif acc_test_local >= (0.7 * acc_test_global):
                w_locals.append(copy.deepcopy(w))
                loss_locals.append(copy.deepcopy(loss))
                print("Update Received")
                workers_count += 1

        # update global weights
        w_glob = FedAvg(w_locals)

        # copy weight to net_glob
        net_glob.load_state_dict(w_glob)

        print("round completed")
        loss_avg = sum(loss_locals) / len(loss_locals)
        print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg))
        loss_train.append(loss_avg)
        workers_percent.append(workers_count)

    # plot loss curve
    plt.figure()
    plt.plot(range(len(workers_percent)), workers_percent)
    plt.ylabel('train_loss')
    plt.savefig(
        './save/Newfed_WorkersPercent_0916_{}_{}_{}_C{}_iid{}.png'.format(
            args.dataset, args.model, args.epochs, args.frac, args.iid))
    # print(loss_workers_total)

    # plot loss curve
    # plt.figure()
    # plt.plot(range(len(loss_train)), loss_train)
    # plt.ylabel('train_loss')
    # plt.savefig('./save/Newfed_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac, args.iid))
    #

    plt.figure()
    for i in range(args.num_users):
        plot = plt.plot(range(len(loss_workers_total[i, :])),
                        loss_workers_total[i, :],
                        label="Worker {}".format(i))
    plot5 = plt.plot(range(len(Loss_local_each_global_total)),
                     Loss_local_each_global_total,
                     color='000000',
                     label="Global")
    plt.legend(loc='best')
    plt.ylabel('Small Test Set Accuracy of workers')
    plt.xlabel('Number of Rounds')
    plt.savefig(
        './save/NewFed_2workers_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format(
            args.dataset, args.model, args.epochs, args.frac, args.iid))

    # plt.figure()
    # bins = np.linspace(0, 9, 3)
    # a = dict_labels_counter[:, 0].ravel()
    # print(type(a))
    # b = dict_labels_counter[:, 1].ravel()
    # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9']
    # # Set plot parameters
    # fig, ax = plt.subplots()
    # width = 0.1  # width of bar
    # x = np.arange(10)
    # ax.bar(x, dict_labels_counter[:, 0], width, color='#000080', label='Worker 1')
    # ax.bar(x + width, dict_labels_counter[:, 1], width, color='#73C2FB', label='Worker 2')
    # ax.bar(x + 2*width, dict_labels_counter[:, 2], width, color='#ff0000', label='Worker 3')
    # ax.bar(x + 3*width, dict_labels_counter[:, 3], width, color='#32CD32', label='Worker 4')
    # ax.set_ylabel('Number of Labels')
    # ax.set_xticks(x + width + width / 2)
    # ax.set_xticklabels(x_labels)
    # ax.set_xlabel('Labels')
    # ax.legend()
    # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
    # fig.tight_layout()
    # plt.savefig(
    #     './save/Newfed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac,
    #                                                                args.iid))

    # testing
    print("testing started")
    net_glob.eval()
    print("train test started")
    acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args)
    print("train test finished")
    acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args)
    print("val test finished")
    #print("Training accuracy: {:.2f}".format(acc_train))
    #print("Testing accuracy: {:.2f}".format(acc_test))
    print("{:.2f}".format(acc_test_final))
    #print("{:.2f".format(Loss_local_each_worker))

    # training
    w_glob_2 = net_glob_2.state_dict()

    loss_train_2 = []
    cv_loss_2, cv_acc_2 = [], []
    val_loss_pre_2, counter_2 = 0, 0
    net_best_2 = None
    best_loss_2 = None
    val_acc_list_2, net_list_2 = [], []

    Loss_local_each_global_total_2 = []

    loss_workers_total_2 = np.zeros(shape=(args.num_users, args.epochs))
    label_workers_2 = {
        i: np.array([], dtype='int64')
        for i in range(args.num_users)
    }

    for iter in range(args.epochs):
        print("round started")
        Loss_local_each_global_2 = []
        loss_workers_2 = np.zeros((args.num_users, args.epochs))
        w_locals_2, loss_locals_2 = [], []
        m_2 = max(int(args.frac * args.num_users), 1)
        idxs_users_2 = np.random.choice(range(args.num_users),
                                        m_2,
                                        replace=False)

        # Minoo
        x_2 = net_glob_2
        x_2.eval()
        acc_test_global_2, loss_test_global_2 = test_img(x_2, valid_ds, args)
        Loss_local_each_global_total_2.append(acc_test_global_2)

        for idx in idxs_users_2:
            #print("train started")
            local_2 = LocalUpdate(args=args,
                                  dataset=dataset_train,
                                  idxs=dict_users_2[idx])
            w_2, loss_2 = local_2.train(
                net=copy.deepcopy(net_glob_2).to(args.device))
            #print(w)
            #print("train completed")
            w_locals_2.append(copy.deepcopy(w_2))
            loss_locals_2.append(copy.deepcopy(loss_2))
            #temp = FedAvg(w)
            temp_2.load_state_dict(w_2)
            temp_2.eval()
            acc_test_local_2, loss_test_local_2 = test_img(
                temp_2, valid_ds, args)
            loss_workers_total_2[idx, iter] = acc_test_local_2

        # update global weights
        w_glob_2 = FedAvg(w_locals_2)

        # copy weight to net_glob
        net_glob_2.load_state_dict(w_glob_2)

        print("round completed")
        loss_avg_2 = sum(loss_locals_2) / len(loss_locals_2)
        print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_2))
        loss_train_2.append(loss_avg_2)
        print("round completed")

        # plot loss curve
    plt.figure()
    plt.plot(range(len(loss_train_2)),
             loss_train_2,
             color='#000000',
             label="Main FL")
    plt.plot(range(len(loss_train)),
             loss_train,
             color='#ff0000',
             label="Centralized Algorithm")
    plt.ylabel('train_loss')
    plt.savefig('./save/main_fed_0916_{}_{}_{}_C{}_iid{}.png'.format(
        args.dataset, args.model, args.epochs, args.frac, args.iid))
    # print(loss_workers_total)

    plt.figure()
    for i in range(args.num_users):
        plot = plt.plot(range(len(loss_workers_total_2[i, :])),
                        loss_workers_total_2[i, :],
                        label="Worker {}".format(i))
    plot5 = plt.plot(range(len(Loss_local_each_global_total_2)),
                     Loss_local_each_global_total_2,
                     color='000000',
                     label="Global")
    plt.legend(loc='best')
    plt.ylabel('Small Test Set Accuracy of workers')
    plt.xlabel('Number of Rounds')
    plt.savefig('./save/mainfed_Acc_0916_{}_{}_{}_C{}_iid{}.png'.format(
        args.dataset, args.model, args.epochs, args.frac, args.iid))

    # plt.figure()
    # bins = np.linspace(0, 9, 3)
    # a = dict_labels_counter_2[:, 0].ravel()
    # print(type(a))
    # b = dict_labels_counter_2[:, 1].ravel()
    # x_labels = ['0', '1', '2', '3','4','5','6','7','8','9']
    # # Set plot parameters
    # fig, ax = plt.subplots()
    # width = 0.1  # width of bar
    # x = np.arange(10)
    # ax.bar(x, dict_labels_counter_2[:, 0], width, color='#000080', label='Worker 1')
    # ax.bar(x + width, dict_labels_counter_2[:, 1], width, color='#73C2FB', label='Worker 2')
    # ax.bar(x + 2*width, dict_labels_counter_2[:, 2], width, color='#ff0000', label='Worker 3')
    # ax.bar(x + 3*width, dict_labels_counter_2[:, 3], width, color='#32CD32', label='Worker 4')
    # ax.set_ylabel('Number of Labels')
    # ax.set_xticks(x + width + width / 2)
    # ax.set_xticklabels(x_labels)
    # ax.set_xlabel('Labels')
    # ax.legend()
    # plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)
    # fig.tight_layout()
    # plt.savefig(
    #     './save/main_fed_2workersLabels_0916_{}_{}_{}_C{}_iid{}.png'.format(args.dataset, args.model, args.epochs, args.frac,
    #                                                                args.iid))

    # testing
    print("testing started")
    net_glob.eval()
    print("train test started")
    acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args)
    print("train test finished")
    acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args)
    print("val test finished")
    #print("Training accuracy: {:.2f}".format(acc_train))
    #print("Testing accuracy: {:.2f}".format(acc_test))
    print("{:.2f}".format(acc_test_final))
    #print("{:.2f".format(Loss_local_each_worker))

    return loss_test_final, loss_train_final
Ejemplo n.º 23
0
 def __init__(self, args, w):
     self.args = args
     self.clients_update_w = []
     self.clients_loss = []
     self.model = CNNMnist(args=args).to(args.device)
     self.model.load_state_dict(w)
Ejemplo n.º 24
0
def main_worker(gpu, ngpus_per_node, args):
    print("gpu:", gpu)
    args.gpu = gpu
    if args.rank == 0:  #(第一台服务器只有三台GPU,需要特殊处理)
        newrank = args.rank * ngpus_per_node + gpu
    else:
        newrank = args.rank * ngpus_per_node + gpu - 1
    #初始化,使用tcp方式进行通信
    dist.init_process_group(init_method=args.init_method,
                            backend="nccl",
                            world_size=args.world_size,
                            rank=newrank)

    #建立通信group,rank=0作为server,用broadcast模拟send和rec,需要server和每个client建立group
    group = []
    for i in range(1, args.world_size):
        group.append(dist.new_group([0, i]))
    allgroup = dist.new_group([i for i in range(args.world_size)])

    if newrank == 0:
        """ server"""

        print("{}号服务器的第{}块GPU作为server".format(args.rank, gpu))

        #在模型训练期间,server只负责整合参数并分发,不参与任何计算
        #设置cpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu)
            if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

        #加载测试数据
        trans_mnist = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        dataset_test = datasets.MNIST('data/',
                                      train=False,
                                      download=True,
                                      transform=trans_mnist)
        test_set = torch.utils.data.DataLoader(dataset_test,
                                               batch_size=args.bs)
        """calculate influence function"""
        model = CNNMnist().to(args.device)
        model.load_state_dict(torch.load('w_wag'))

        test_id = 0  #选择的test数据id
        data, target = test_set.dataset[test_id]
        data = test_set.collate_fn([data])
        target = test_set.collate_fn([target])

        print("begin grad")
        grad_test = grad_z(data, target, model, gpu, create_graph=False)  #v初始值
        print("end grad")
        v = grad_test
        s_test = []
        """server与client交互计算s_test"""
        for i in range(args.world_size - 1):
            #id_client=random.randint(1,args.world_size) #选择client
            #向选择的client发送当前v
            print("send v to client:", i + 1)
            for j in v:
                temp = j
                dist.broadcast(src=0, tensor=temp, group=group[i])
            #当client计算完成,从client接收v,准备发给下一个client
            print("rec v from client:", i + 1)
            v_new = copy.deepcopy(v)
            for j in v_new:
                temp = j
                dist.broadcast(src=i + 1, tensor=temp, group=group[i])
            s_test.append(v_new)

#s_test计算结束,将最终s_test发送给全体client
        e_s_test = s_test[0]
        for i in range(1, args.world_size - 1):
            e_s_test = [i + j for i, j in six.moves.zip(e_s_test, s_test[i])]
        for j in e_s_test:
            temp = j
            dist.broadcast(src=0, tensor=temp, group=allgroup)
        """交互结束"""

        #从client接收influence
        print("rec influence")
        allinfluence = []
        influence = torch.tensor([i for i in range(4285)], dtype=torch.float32)
        influence = influence.to(args.device)

        for i in range(args.world_size - 1):
            dist.broadcast(src=i + 1, tensor=influence, group=group[i])
            temp = copy.deepcopy(influence)
            allinfluence.append(temp)
        torch.save(allinfluence, 'influence')
    else:
        """clents"""

        print("{}号服务器的第{}号GPU作为第{}个client".format(args.rank, gpu, newrank))

        #设置gpu
        args.device = torch.device(
            'cuda:{}'.format(args.gpu)
            if torch.cuda.is_available() and args.gpu != -1 else 'cpu')
        #加载训练数据
        data = torch.load("data/distributed/data_of_client{}".format(newrank))
        bsz = 64
        train_set = torch.utils.data.DataLoader(data, batch_size=bsz)
        model = CNNMnist().to(args.device)
        model.load_state_dict(torch.load('w_wag'))  #加载模型
        data, target = train_set.dataset[0]
        data = train_set.collate_fn([data])
        target = train_set.collate_fn([target])
        grad_v = grad_z(data, target, model, gpu=gpu)
        v = grad_v
        """calculate influence function"""
        v_new = []
        #从server接收v
        """ 和server交互计算s_test,可以循环迭代(当前只进行了一次迭代,没有循环)"""
        for i in v:
            temp = i
            dist.broadcast(src=0, tensor=temp, group=group[newrank - 1])
            v_new.append(temp)
        s_test = stest(v_new,
                       model,
                       train_set,
                       gpu,
                       damp=0.01,
                       scale=1000.0,
                       repeat=5)  #计算s_test
        #向server发送s_test,进行下一次迭代
        for i in s_test:
            temp = copy.copy(i)
            dist.broadcast(src=newrank, tensor=temp, group=group[newrank - 1])
        #迭代完成后,从server接收最终的s_test,计算influence function
        s_test_fin = []
        for i in s_test:
            temp = copy.copy(i)
            dist.broadcast(src=0, tensor=temp, group=allgroup)
            s_test_fin.append(temp)
        """s_test计算结束,得到最终的s_test_fin,开始计算influence"""
        print("client:", newrank, "calculate influence")
        n = len(train_set.dataset)
        influence = np.array([i for i in range(n)], dtype='float32')
        for i in utility.create_progressbar(len(train_set.dataset),
                                            desc='influence',
                                            start=0):

            #计算grad
            data, target = train_set.dataset[i]
            data = train_set.collate_fn([data])
            target = train_set.collate_fn([target])
            grad_z_vec = grad_z(data, target, model, gpu=gpu)
            #计算influence
            inf_tmp = -sum([
                torch.sum(k * j).data.cpu().numpy()
                for k, j in six.moves.zip(grad_z_vec, s_test_fin)
            ]) / n
            influence[i] = inf_tmp
        influence = torch.tensor(influence).to(args.device)
        #向服务器发送influence
        print("client:", newrank, "send influence to server")
        dist.broadcast(src=newrank, tensor=influence, group=group[newrank - 1])
        print("client:", newrank, "end send influence to server")
        train_set = datasets.MNIST(root='./data/mnist', train=True, download=False, transform=transform)
        test_set = datasets.MNIST(root='./data/mnist', train=False, download=False, transform=transform)
    elif args.dataset == 'cifar':
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        train_set = datasets.CIFAR10('./data/cifar', train=True, download=False, transform=transform)
        test_set = datasets.CIFAR10('./data/cifar', train=False, download=False, transform=transform)
    else:
        exit('Error: unrecognized dataset...')

    dict_users_train, ratio = noniid_train2(train_set, args.num_users)
    dict_users_test = noniid_test(test_set, args.num_users, ratio)

    print('Data finished...')

    # load global model
    net_glob = CNNCifar(args=args).to(args.device) if args.dataset == 'cifar' else CNNMnist(args=args).to(args.device)
    net_glob.train()

    # parameters
    w_glob = net_glob.state_dict()

    # test each of clients
    test_acc = [0 for i in range(args.num_users)]
    test_loss = [0 for i in range(args.num_users)]
    for idx in range(args.num_users):
        # every time start with the same global parameters
        net_glob.load_state_dict(w_glob)
        client = Client(args=args, dataset=train_set, idxs=dict_users_train[idx], bs=args.train_bs)
        w_client = client.local_train(net=copy.deepcopy(net_glob).to(args.device))

        client = Client(args=args, dataset=test_set, idxs=dict_users_test[idx], bs=args.test_bs)
Ejemplo n.º 26
0
        dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            dict_users = cifar_noniid(dataset_train, args.num_users, min_train=200, max_train=1000, main_label_prop=0.8, other=9)
    else:
        exit('Error: unrecognized dataset')

    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        # global_net = CNNCifar(args=args).to(args.device)
        global_net = CNNCifarPlus(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        global_net = CNNMnist(args=args).to(args.device)
    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        global_net = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')


    print(global_net)

    global_net.train()

    # start time
    # time_start = time.time()
Ejemplo n.º 27
0
def main():
    # parse args
    args = args_parser()
    args.device = torch.device('cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu != -1 else 'cpu')

    # load dataset and split users
    if args.dataset == 'mnist':
        trans_mnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
        dataset_train = datasets.MNIST('../data/mnist/', train=True, download=True, transform=trans_mnist)
        dataset_test = datasets.MNIST('../data/mnist/', train=False, download=True, transform=trans_mnist)
        # sample users
        if args.iid:
            dict_users = mnist_iid(dataset_train, args.num_users)
        else:
            dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter
    elif args.dataset == 'cifar':
        trans_cifar = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        dataset_train = datasets.CIFAR10('../data/cifar', train=True, download=True, transform=trans_cifar)
        dataset_test = datasets.CIFAR10('../data/cifar', train=False, download=True, transform=trans_cifar)
        if args.iid:
            dict_users = cifar_iid(dataset_train, args.num_users)
        else:
            dict_users, dict_labels_counter = cifar_noniid(dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter
    elif args.dataset == 'fmnist':
        trans_fmnist = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
        dataset_train = datasets.FashionMNIST('../data/fmnist', train=True, download=True, transform=trans_fmnist)
        dataset_test = datasets.FashionMNIST('../data/fmnist', train=False, download=True, transform=trans_fmnist)
        if args.iid:
            dict_users = mnist_iid(dataset_train, args.num_users)
        else:
            dict_users, dict_labels_counter = mnist_noniid(dataset_train, args.num_users)
            dict_users_mainFL, dict_labels_counter_mainFL = dict_users, dict_labels_counter
    else:
        exit('Error: unrecognized dataset')


    img_size = dataset_train[0][0].shape

    acc_full_distributed = []
    acc_full_main = []
    loss_full_ditributed = []
    loss_full_main = []

    SD_acc_full_distributed = []
    SD_acc_full_main = []
    SD_loss_full_ditributed = []
    SD_loss_full_main = []

    workers_percent_full_distributed = []
    workers_percent_full_main = []
    variable_start = 0.1
    variable_end = 1.0
    while_counter = 0.1
    counter_array = []
    Accuracy_Fraction = []
    Workers_Fraction = []

    accuracy_fraction_each_round_newFL = 0
    workers_fraction_each_round_newFL = 0
    accuracy_fraction_each_round_mainFL = 0
    workers_fraction_each_round_mainFL = 0

    data_main = {}
    data_DCFL = {}
    data_Global_main = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [],
                        "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]}
    data_Global_DCFL = {"C": [], "Round":[], "Average Loss Train": [], "Average Loss Test": [], "Accuracy Test": [],
                        "Workers Number": [], "Large Test Loss":[], "Large Test Accuracy":[]}
    Final_LargeDataSetTest_DCFL = {"C":[], "Test Accuracy":[], "Test Loss":[], "Train Loss":[], "Train Accuracy":[],
                                   "Total Rounds":[]}
    Final_LargeDataSetTest_MainFL = {"C":[], "Test Accuracy": [], "Test Loss": [], "Train Loss": [], "Train Accuracy":[]}



    # build model
    args.frac = variable_start

    test_ds, valid_ds_before = torch.utils.data.random_split(dataset_test, (9500, 500))
    valid_ds = create_shared_dataset(valid_ds_before, 200)

    #while variable_start <= variable_end:
    for c_counter in range(1, 11, 3):
        if args.model == 'cnn' and args.dataset == 'cifar':
            net_glob = CNNCifar(args=args).to(args.device)
            net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'cnn' and args.dataset == 'mnist':
            net_glob = CNNMnist(args=args).to(args.device)
            net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'cnn' and args.dataset == 'fmnist':
            net_glob = CNNFashion_Mnist(args=args).to(args.device)
            net_glob_mainFL = copy.deepcopy(net_glob)
        elif args.model == 'mlp':
            len_in = 1
            for x in img_size:
                len_in *= x
            net_glob = MLP(dim_in=len_in, dim_hidden=200, dim_out=args.num_classes).to(args.device)
        else:
            exit('Error: unrecognized model')

        counter_array.append((c_counter/10))
        args.frac = (c_counter/10)

        ######saving index of workers
        dict_workers_index = defaultdict(list)


        #############Main FL

        w_glob_mainFL = net_glob_mainFL.state_dict()

        loss_train_mainFL = []
        # cv_loss_2, cv_acc_2 = [], []
        # val_loss_pre_2, counter_2 = 0, 0
        # net_best_2 = None
        # best_loss_2 = None
        # val_acc_list_2, net_list_2 = [], []

        Loss_local_each_global_total_mainFL = []
        Accuracy_local_each_global_total_mainFL = []

        loss_workers_total_mainFL = np.zeros(shape=(args.num_users, args.epochs))
        label_workers_mainFL = {i: np.array([], dtype='int64') for i in range(args.num_users)}

        validation_test_mainFed = []
        acc_test, loss_test = test_img(net_glob_mainFL, dataset_test, args)
        workers_participation_main_fd = np.zeros((args.num_users, args.epochs))
        workers_percent_main = []

        # for iter in range(args.epochs):
        net_glob_mainFL.eval()
        acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args)
        while_counter_mainFL = loss_test_final_mainFL
        iter_mainFL = 0

        workers_mainFL = []
        for i in range(args.num_users):
            workers_mainFL.append(i)

        temp_netglob_mainFL = net_glob_mainFL

        while iter_mainFL < (args.epochs/2):

            data_main['round_{}'.format(iter_mainFL)] = []
            # data_Global_main['round_{}'.format(iter)] = []
            # print("round started")
            Loss_local_each_global_mainFL = []
            loss_workers_mainFL = np.zeros((args.num_users, args.epochs))
            w_locals_mainFL, loss_locals_mainFL = [], []
            m_mainFL = max(int(args.frac * args.num_users), 1)
            idxs_users_mainFL = np.random.choice(range(args.num_users), m_mainFL, replace=False)
            list_of_random_workers = random.sample(workers_mainFL, m_mainFL)
            for i in range(len(list_of_random_workers)):
                dict_workers_index[iter_mainFL].append(list_of_random_workers[i])

            x_mainFL = net_glob_mainFL
            x_mainFL.eval()
            acc_test_global_mainFL, loss_test_global_mainFL = test_img(x_mainFL, valid_ds, args)
            Loss_local_each_global_total_mainFL.append(loss_test_global_mainFL)
            Accuracy_local_each_global_total_mainFL.append(acc_test_global_mainFL)
            SD_acc_full_main.append(acc_test_global_mainFL)
            SD_loss_full_main.append(loss_test_global_mainFL)

            workers_count_mainFL = 0
            temp_accuracy = np.zeros(1)
            temp_loss_test = np.zeros(1)
            temp_loss_train = np.zeros(1)
            for idx in list_of_random_workers:
                # print("train started")
                local_mainFL = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users_mainFL[idx])
                w_mainFL, loss_mainFL = local_mainFL.train(net=copy.deepcopy(net_glob_mainFL).to(args.device))
                # print(w)
                # print("train completed")
                w_locals_mainFL.append(copy.deepcopy(w_mainFL))
                loss_locals_mainFL.append(copy.deepcopy(loss_mainFL))
                # temp = FedAvg(w)
                temp_netglob_mainFL.load_state_dict(w_mainFL)
                temp_netglob_mainFL.eval()
                print(pnorm_2(temp_netglob_mainFL, 2))
                acc_test_local_mainFL, loss_test_local_mainFL = test_img(temp_netglob_mainFL, valid_ds, args)
                temp_accuracy[0] = acc_test_local_mainFL
                temp_loss_test[0] = loss_test_local_mainFL
                temp_loss_train[0] = loss_mainFL
                loss_workers_total_mainFL[idx, iter_mainFL] = acc_test_local_mainFL
                workers_participation_main_fd[idx][iter_mainFL] = 1
                workers_count_mainFL += 1
                data_main['round_{}'.format(iter_mainFL)].append({
                    'C': args.frac,
                    'User ID': idx,
                    # 'Local Update': copy.deepcopy(w_mainFL),
                    'Loss Train': temp_loss_train[0],
                    'Loss Test': temp_loss_test[0],
                    'Accuracy': temp_accuracy[0]
                })

            # update global weights
            w_glob_mainFL = FedAvg(w_locals_mainFL)

            # copy weight to net_glob
            net_glob_mainFL.load_state_dict(w_glob_mainFL)

            # print("round completed")
            loss_avg_mainFL = sum(loss_locals_mainFL) / len(loss_locals_mainFL)
            # print('Round {:3d}, Average loss {:.3f}'.format(iter, loss_avg_mainFL))
            loss_train_mainFL.append(loss_avg_mainFL)
            # print("round completed")

            acc_test_round_mainfed, loss_test_round_mainfed = test_img(net_glob_mainFL, dataset_test, args)
            validation_test_mainFed.append(acc_test_round_mainfed)
            workers_percent_main.append(workers_count_mainFL / args.num_users)

            # plot workers percent of participating
            print(iter_mainFL, " round main fl finished")

            acc_test_final_mainFL, loss_test_final_mainFL = test_img(net_glob_mainFL, dataset_test, args)
            while_counter_mainFL = loss_test_final_mainFL

            data_Global_main["Round"].append(iter_mainFL)
            data_Global_main["C"].append(args.frac)
            data_Global_main["Average Loss Train"].append(float(loss_avg_mainFL))
            data_Global_main["Average Loss Test"].append(float(loss_test_global_mainFL))
            data_Global_main["Accuracy Test"].append(float(acc_test_global_mainFL))
            data_Global_main["Workers Number"].append(float(workers_count_mainFL))
            data_Global_main["Large Test Loss"].append(float(loss_test_final_mainFL))
            data_Global_main["Large Test Accuracy"].append(float(acc_test_final_mainFL))

            iter_mainFL = iter_mainFL + 1

        workers_percent_final_mainFL = np.zeros(args.num_users)
        workers_name_mainFL = np.empty(args.num_users)
        for i in range(len(workers_participation_main_fd[:, 1])):
            workers_percent_final_mainFL[i] = sum(workers_participation_main_fd[i, :]) / args.epochs
            workers_name_mainFL[i] = i

        net_glob_mainFL.eval()
        # print("train test started")
        acc_train_final_main, loss_train_final_main = test_img(net_glob_mainFL, dataset_train, args)
        # print("train test finished")
        acc_test_final_main, loss_test_final_main = test_img(net_glob_mainFL, dataset_test, args)

        Final_LargeDataSetTest_MainFL["C"].append(args.frac)
        Final_LargeDataSetTest_MainFL["Test Loss"].append(float(loss_test_final_main))
        Final_LargeDataSetTest_MainFL["Test Accuracy"].append(float(acc_test_final_main))
        Final_LargeDataSetTest_MainFL["Train Loss"].append(float(loss_train_final_main))
        Final_LargeDataSetTest_MainFL["Train Accuracy"].append(float(acc_train_final_main))






        # copy weights
        w_glob = net_glob.state_dict()

        temp_after = copy.deepcopy(net_glob)
        temp_before = copy.deepcopy(net_glob)

        # training
        loss_train = []
        # cv_loss, cv_acc = [], []
        # val_loss_pre, counter = 0, 0
        # net_best = None
        # best_loss = None
        # val_acc_list, net_list = [], []

        Loss_local_each_global_total = []


        # valid_ds = create_shared_dataset(dataset_test, 500)
        loss_workers_total = np.zeros(shape=(args.num_users, args.epochs))
        label_workers = {i: np.array([], dtype='int64') for i in range(args.num_users)}

        workers_percent_dist = []
        validation_test_newFed = []
        workers_participation = np.zeros((args.num_users, args.epochs))
        workers = []
        for i in range(args.num_users):
            workers.append(i)

        counter_threshold_decrease = np.zeros(args.epochs)
        Global_Accuracy_Tracker = np.zeros(args.epochs)
        Global_Loss_Tracker = np.zeros(args.epochs)
        threshold = 0.5
        alpha = 0.5     ##decrease parameter
        beta = 0.1 ##delta accuracy controller
        gamma = 0.5  ##threshold decrease parameter


        Goal_Loss = float(loss_test_final_main)

        #for iter in range(args.epochs):

        net_glob.eval()
        acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args)
        while_counter = float(loss_test_final)
        iter = 0

        total_rounds_dcfl = 0

        while (while_counter + 0.01) > Goal_Loss and iter <= args.epochs:

            data_DCFL['round_{}'.format(iter)] = []
            Loss_local_each_global = []
            loss_workers = np.zeros((args.num_users, args.epochs))
            w_locals, loss_locals = [], []
            m = max(int(args.frac * args.num_users), 1)
            idxs_users = np.random.choice(range(args.num_users), m, replace=False)
            counter_threshold = 0
            print(iter, " in dist FL started")
            #if iter % 5 == 0:

            x = copy.deepcopy(net_glob)
            x.eval()
            acc_test_global, loss_test_global = test_img(x, valid_ds, args)
            Loss_local_each_global_total.append(acc_test_global)
            Global_Accuracy_Tracker[iter] = acc_test_global
            Global_Loss_Tracker[iter] = loss_test_global
            if iter > 0 & (Global_Loss_Tracker[iter-1] - Global_Loss_Tracker[iter] <= beta):
                threshold = threshold - gamma
                if threshold == 0.0:
                    threshold = 1.0
                print("threshold decreased to", threshold)
            workers_count = 0

            SD_acc_full_distributed.append(acc_test_global)
            SD_loss_full_ditributed.append(loss_test_global)


            temp_w_locals = []
            temp_workers_loss = np.empty(args.num_users)
            temp_workers_accuracy = np.empty(args.num_users)
            temp_workers_loss_test = np.empty(args.num_users)
            temp_workers_loss_differenc = np.empty(args.num_users)
            temp_workers_accuracy_differenc = np.empty(args.num_users)
            flag = np.zeros(args.num_users)

            list_of_random_workers_newfl = []
            if iter < (args.epochs/2):
                for key, value in dict_workers_index.items():
                    # print(value)
                    if key == iter:
                        list_of_random_workers_newfl = dict_workers_index[key]
            else:
                list_of_random_workers_newfl = random.sample(workers, m)


            for idx in list_of_random_workers_newfl:
                #print("train started")

                # before starting train
                temp_before = copy.deepcopy(net_glob)
                # temp_before.load_state_dict(w)
                temp_before.eval()
                acc_test_local_before, loss_test_local_before = test_img(temp_before, valid_ds, args)

                local = LocalUpdate(args=args, dataset=dataset_train, idxs=dict_users[idx])
                w, loss = local.train(net=copy.deepcopy(net_glob).to(args.device))
                #print(w)
                #print("train completed")

                #print("type of idx is ", type(temp_w_locals))
                temp_w_locals.append(copy.deepcopy(w))
                temp_workers_loss[idx] = copy.deepcopy(loss)

                temp_after = copy.deepcopy(net_glob)

                temp_after.load_state_dict(w)
                temp_after.eval()
                acc_test_local_after, loss_test_local_after = test_img(temp_after, valid_ds, args)
                loss_workers_total[idx, iter] = loss_test_local_after
                temp_workers_accuracy[idx] = acc_test_local_after
                temp_workers_loss_test[idx] = loss_test_local_after
                temp_workers_loss_differenc[idx] = loss_test_local_before - loss_test_local_after
                temp_workers_accuracy_differenc[idx] = acc_test_local_after - acc_test_local_before

            print("train finished")
            while len(w_locals) < 1:
                #print("recieving started")
                index = 0
                for idx in list_of_random_workers_newfl:
                    #print("acc is ", temp_workers_accuracy[idx])
                    # print(temp_workers_loss_differenc)
                    if workers_count >= m:
                        break
                    elif temp_workers_loss_differenc[idx] >= (threshold) \
                            and temp_workers_loss_differenc[idx] > 0 \
                            and flag[idx]==0:
                        print("Update Received")
                        w_locals.append(copy.deepcopy(temp_w_locals[index]))
                        #print(temp_w_locals[index])
                        loss_locals.append(temp_workers_loss[idx])
                        flag[idx] = 1
                        workers_count += 1
                        workers_participation[idx][iter] = 1

                        data_DCFL['round_{}'.format(iter)].append({
                            'C': args.frac,
                            'User ID': idx,
                            'Loss Train': loss_workers_total[idx, iter],
                            'Loss Test': temp_workers_loss[idx],
                            'Accuracy': temp_workers_accuracy[idx]
                        })
                    index += 1
                if len(w_locals) < 1:
                    threshold = threshold / 2
                    if threshold == -np.inf:
                        threshold = 1
                print("threshold increased to ", threshold)




            # update global weights
            w_glob = FedAvg(w_locals)

            # copy weight to net_glob
            net_glob.load_state_dict(w_glob)

            #print("round completed")
            loss_avg = sum(loss_locals) / len(loss_locals)
            loss_train.append(loss_avg)
            workers_percent_dist.append(workers_count/args.num_users)


            counter_threshold_decrease[iter] = counter_threshold
            print(iter, " round dist fl finished")


            acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args)
            while_counter = loss_test_final


            data_Global_DCFL["Round"].append(iter)
            data_Global_DCFL["C"].append(args.frac)
            data_Global_DCFL["Average Loss Train"].append(loss_avg)
            data_Global_DCFL["Accuracy Test"].append(Global_Accuracy_Tracker[iter])
            data_Global_DCFL["Average Loss Test"].append(Global_Loss_Tracker[iter])
            data_Global_DCFL["Workers Number"].append(workers_count)
            data_Global_DCFL["Large Test Loss"].append(float(loss_test_final))
            data_Global_DCFL["Large Test Accuracy"].append(float(acc_test_final))

            total_rounds_dcfl = iter

            iter = iter + 1


        #plot workers percent of participating
        workers_percent_final = np.zeros(args.num_users)
        workers_name = np.empty(args.num_users)
        #print(workers_participation)
        for i in range(len(workers_participation[:, 1])):
            workers_percent_final[i] = sum(workers_participation[i, :])/args.epochs
            workers_name[i] = i



        workers_fraction_each_round_newFL = sum(workers_percent_final)/len(workers_percent_final)


        # testing
        #print("testing started")
        net_glob.eval()
        #print("train test started")
        acc_train_final, loss_train_final = test_img(net_glob, dataset_train, args)
        #print("train test finished")
        acc_test_final, loss_test_final = test_img(net_glob, dataset_test, args)

        acc_full_distributed.append(acc_test_final)
        loss_full_ditributed.append(loss_test_final)

        Final_LargeDataSetTest_DCFL["C"].append(args.frac)
        Final_LargeDataSetTest_DCFL["Test Loss"].append(float(loss_test_final))
        Final_LargeDataSetTest_DCFL["Test Accuracy"].append(float(acc_test_final))
        Final_LargeDataSetTest_DCFL["Train Loss"].append(float(loss_train_final))
        Final_LargeDataSetTest_DCFL["Train Accuracy"].append(float(acc_train_final))
        Final_LargeDataSetTest_DCFL["Total Rounds"].append(int(total_rounds_dcfl))

        variable_start = variable_start + while_counter

        print("C is ", c_counter/10)

    with open('CIFAR_100users_data_main_1229-2020.json', 'w') as outfile:
        json.dump(data_main, outfile)

    with open('CIFAR_100users_data_DCFL_1229-2020.json', 'w') as outfile:
        json.dump(data_DCFL, outfile)

    with open('CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile:
        json.dump(data_Global_DCFL, outfile)

    with open('CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile:
        json.dump(data_Global_main, outfile)

    with open('Final-CIFAR_100users_data_main_Global_1229-2020.json', 'w') as outfile:
        json.dump(Final_LargeDataSetTest_MainFL, outfile)

    with open('Final-CIFAR_100users_data_DCFL_Global_1229-2020.json', 'w') as outfile:
        json.dump(Final_LargeDataSetTest_DCFL, outfile)


    return 1
Ejemplo n.º 28
0
    else:
        exit('Error: unrecognized dataset')
    img_size = dataset_train[0][0].shape

    # build model
    if args.model == 'cnn' and args.dataset == 'cifar':
        net_glob = customCNNCifar(args=args).to(args.device)
        net_glob1 = customCNNCifar(args=args).to(args.device)
        net_glob5 = customCNNCifar(args=args).to(args.device)
        net_glob10 = customCNNCifar(args=args).to(args.device)
        net_glob15 = customCNNCifar(args=args).to(args.device)
        net_glob20 = customCNNCifar(args=args).to(args.device)
        net_glob25 = customCNNCifar(args=args).to(args.device)
        net_glob30 = customCNNCifar(args=args).to(args.device)
    elif args.model == 'cnn' and args.dataset == 'mnist':
        net_glob = CNNMnist(args=args).to(args.device)

    elif args.model == 'mlp':
        len_in = 1
        for x in img_size:
            len_in *= x
        net_glob = MLP(dim_in=len_in, dim_hidden=64, dim_out=args.num_classes).to(args.device)
    else:
        exit('Error: unrecognized model')
    print(net_glob)
    net_glob.train()
    net_glob1.train()
    net_glob5.train()
    net_glob10.train()
    net_glob15.train()
    net_glob20.train()
Ejemplo n.º 29
0
class Server():
    def __init__(self, args, w):
        self.args = args
        self.clients_update_w = []
        self.clients_loss = []
        self.model = CNNMnist(args=args).to(args.device)
        self.model.load_state_dict(w)

    def FedAvg(self):
        # 1. part one
        #     DP mechanism
        # cause we choose to add noise at client end,the fedavg should be the same as plain
        if self.args.mode == 'plain' or self.args.mode == 'DP':
            update_w_avg = copy.deepcopy(self.clients_update_w[0])
            for k in update_w_avg.keys():
                for i in range(1, len(self.clients_update_w)):
                    update_w_avg[k] += self.clients_update_w[i][k]
                update_w_avg[k] = torch.div(update_w_avg[k],
                                            len(self.clients_update_w))
                self.model.state_dict()[k] += update_w_avg[k]
            return copy.deepcopy(self.model.state_dict()), sum(
                self.clients_loss) / len(self.clients_loss)

        # 2. part two
        #     Paillier add
        elif self.args.mode == 'Paillier':
            update_w_avg = copy.deepcopy(self.clients_update_w[0])
            for k in update_w_avg.keys():
                client_num = len(self.clients_update_w)
                for i in range(1, client_num):
                    for iter in range(len(update_w_avg[k])):
                        update_w_avg[k][iter] += self.clients_update_w[i][k][
                            iter]
                for iter in range(len(update_w_avg[k])):
                    update_w_avg[k][iter] /= client_num
            return update_w_avg, sum(self.clients_loss) / len(
                self.clients_loss)
        else:
            exit()

    def test(self, datatest):
        self.model.eval()

        # testing
        test_loss = 0
        correct = 0
        data_loader = DataLoader(datatest, batch_size=self.args.bs)
        for idx, (data, target) in enumerate(data_loader):
            if self.args.gpu != -1:
                data, target = data.cuda(), target.cuda()
            log_probs = self.model(data)

            # sum up batch loss
            test_loss += F.cross_entropy(log_probs, target,
                                         reduction='sum').item()

            # get the index of the max log-probability
            y_pred = log_probs.data.max(1, keepdim=True)[1]
            correct += y_pred.eq(
                target.data.view_as(y_pred)).long().cpu().sum()

        test_loss /= len(data_loader.dataset)
        accuracy = 100.00 * correct / len(data_loader.dataset)
        return accuracy, test_loss