Ejemplo n.º 1
0
def test_federated_dataloader_shuffle(workers):
    bob = workers["bob"]
    alice = workers["alice"]
    datasets = [
        fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob),
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5,
                                                           6])).send(alice),
    ]
    fed_dataset = sy.FederatedDataset(datasets)

    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         shuffle=True)
    for epoch in range(3):
        counter = 0
        for batch_idx, (data, target) in enumerate(fdataloader):
            if counter < 1:  # one batch for bob, two batches for alice (batch_size == 2)
                assert (
                    data.location.id == "bob"
                ), f"id should be bob, counter = {counter}, epoch = {epoch}"
            else:
                assert (
                    data.location.id == "alice"
                ), f"id should be alice, counter = {counter}, epoch = {epoch}"
            counter += 1
        assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}"

    num_iterators = 2
    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         num_iterators=num_iterators,
                                         shuffle=True)
    assert (fdataloader.num_iterators == num_iterators -
            1), f"{fdataloader.num_iterators} == {num_iterators - 1}"
Ejemplo n.º 2
0
def test_federated_dataloader(workers):
    bob = workers["bob"]
    alice = workers["alice"]
    datasets = [
        fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob),
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5,
                                                           6])).send(alice),
    ]
    fed_dataset = sy.FederatedDataset(datasets)

    fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2)
    counter = 0
    for batch_idx, (data, target) in enumerate(fdataloader):
        counter += 1

    assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}"

    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         drop_last=True)
    counter = 0
    for batch_idx, (data, target) in enumerate(fdataloader):
        counter += 1

    assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}"
Ejemplo n.º 3
0
def get_dataloaders(file,
                    logs,
                    tr_data_dstr,
                    test_data_distr,
                    num_workers=4,
                    train_batch_size=16,
                    test_batch_size=16,
                    size_split=None):

    dataset = pd.read_csv(file, low_memory=False, squeeze=True)

    # Create Virtual Workers
    hook = sy.TorchHook(torch)
    workers = []
    for idx in range(num_workers):
        workers.append(sy.VirtualWorker(hook, id="worker" + str(idx)))

    # Set aside the test dataset, which will be the same for all the workers
    train_data, test_data = _train_validation_split(dataset, 10)

    # If by_attack - ignore the number of workers
    if tr_data_dstr == "by_attack" or test_data_distr == "by_attack":
        num_workers = 4

    distr = data_distribution.Distribute(num_workers)

    train_data_subsets, train_distribution = distr.perform_split(
        tr_data_dstr, train_data, size=size_split)
    test_data_subsets, test_distribution = distr.perform_split(
        test_data_distr, test_data)

    logs.plot_distribution(train_distribution, "train_distribution")
    logs.plot_distribution(test_distribution, "test_distribution")
    logs.save_loaders(train_data_subsets, test_data_subsets)

    # Remember how many samples each worker have (needed for FedAvrg)
    worker_sizes = []
    for value in train_data_subsets.values():
        worker_sizes.append(len(value))
    assert len(worker_sizes) == len(workers)

    fed_dataset_train = _distribute_among_workers(train_data_subsets, workers)
    fed_dataset_test = _distribute_among_workers(test_data_subsets, workers)

    fed_loader_train = sy.FederatedDataLoader(fed_dataset_train,
                                              batch_size=train_batch_size,
                                              shuffle=True)
    fed_loader_test = sy.FederatedDataLoader(fed_dataset_test,
                                             batch_size=test_batch_size,
                                             shuffle=True)

    return fed_loader_train, fed_loader_test, workers, worker_sizes
Ejemplo n.º 4
0
    def construct_FL_loader(data_pointer, **kwargs):
        """ Cast paired data & labels into configured tensor dataloaders
        Args:
            dataset (list(sy.BaseDataset)): A tuple of X features & y labels
            kwargs: Additional parameters to configure PyTorch's Dataloader
        Returns:
            Configured dataloader (th.utils.data.DataLoader)
        """
        federated_dataset = sy.FederatedDataset(data_pointer)

#         print(federated_dataset)

        federated_data_loader = sy.FederatedDataLoader(
            federated_dataset,
            batch_size=(
                model_hyperparams['batch_size']
                if model_hyperparams['batch_size']
                else len(federated_dataset)
            ),
            shuffle=True,
            iter_per_worker=True, # for subsequent parallelization
            **kwargs
        )

        return federated_data_loader
Ejemplo n.º 5
0
def get_dataloaders(batch_size: int, federate_workers: list = None, **kwargs):

    train_dataset, test_dataset = get_datasets()

    if federate_workers is None:
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  **kwargs)

    else:

        train_loader = sy.FederatedDataLoader(
            train_dataset.federate(federate_workers),  #pylint: disable=no-member
            batch_size=batch_size,
            shuffle=True,
            **kwargs)

        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  **kwargs)
    return train_loader, test_loader
Ejemplo n.º 6
0
def load_data():
    '''<--Load CIFAR dataset from torch vision module distribute to workers using PySyft's Federated Data loader'''

    federated_train_loader = sy.FederatedDataLoader(  # <-- this is now a FederatedDataLoader 
        datasets.CIFAR10('../data',
                         train=True,
                         download=True,
                         transform=transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize((0.5, 0.5, 0.5),
                                                  (0.5, 0.5, 0.5))
                         ])).
        federate(
            (bob, alice)
        ),  # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    return federated_train_loader, test_loader
Ejemplo n.º 7
0
def test_federated_dataset(workers):

    bob = workers["bob"]
    alice = workers["alice"]

    grid = sy.VirtualGrid(*[bob, alice])

    train_bob = th.Tensor(th.zeros(1000, 100)).tag("data").send(bob)
    target_bob = th.Tensor(th.zeros(1000, 100)).tag("target").send(bob)

    train_alice = th.Tensor(th.zeros(1000, 100)).tag("data").send(alice)
    target_alice = th.Tensor(th.zeros(1000, 100)).tag("target").send(alice)

    data, _ = grid.search("data")
    target, _ = grid.search("target")

    dataset = sy.FederatedDataset(data, target)
    train_loader = sy.FederatedDataLoader(dataset,
                                          batch_size=4,
                                          shuffle=False,
                                          drop_last=False)

    epochs = 2
    for epoch in range(1, epochs + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            pass
Ejemplo n.º 8
0
def test_federated_dataset_search(workers):

    bob = workers["bob"]
    alice = workers["alice"]

    grid = sy.PrivateGridNetwork(*[bob, alice])

    train_bob = th.Tensor(th.zeros(1000, 100)).tag("data").send(bob)
    target_bob = th.Tensor(th.zeros(1000, 100)).tag("target").send(bob)

    train_alice = th.Tensor(th.zeros(1000, 100)).tag("data").send(alice)
    target_alice = th.Tensor(th.zeros(1000, 100)).tag("target").send(alice)

    data = grid.search("data")
    target = grid.search("target")

    datasets = [
        BaseDataset(data["bob"][0], target["bob"][0]),
        BaseDataset(data["alice"][0], target["alice"][0]),
    ]

    fed_dataset = sy.FederatedDataset(datasets)
    train_loader = sy.FederatedDataLoader(fed_dataset,
                                          batch_size=4,
                                          shuffle=False,
                                          drop_last=False)

    counter = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        counter += 1

    assert counter == len(train_loader), f"{counter} == {len(fed_dataset)}"
Ejemplo n.º 9
0
def experiment(num_workers,no_cuda):

    # Creating num_workers clients
    clients = []
    hook = sy.TorchHook(torch)
    clients_mem = torch.zeros(num_workers)
    for i in range(num_workers):
        clients.append(sy.VirtualWorker(hook, id="c "+str(i)))


    # Initializing arguments, with GPU usage or not
    args = Arguments(no_cuda)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if use_cuda:
    # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n",
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}


    # Federated data loader
    federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
      datasets.MNIST('../data', train=True, download=True,
                     transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize((0.1307,), (0.3081,))
                     ]))
      .federate(clients), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
      batch_size=args.batch_size, shuffle=True,iter_per_worker=True, **kwargs)

    test_loader = torch.utils.data.DataLoader(
      datasets.MNIST('../data', train=False, transform=transforms.Compose([
                         transforms.ToTensor(),
                         transforms.Normalize((0.1307,), (0.3081,))
                     ])),
      batch_size=args.test_batch_size, shuffle=True, **kwargs)


    start = time.time()
    #%%time
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr) 

    for epoch in range(1, args.epochs + 1):
        model = train(args, model, device, federated_train_loader, args.lr, args.federate_after_n_batches, epoch, clients_mem)
        test(args, model, device, test_loader)
        t = time.time()
        print(t-start)
    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    end = time.time()
    print(end - start)
    print("Memory exchanged : ",clients_mem)
    return clients_mem
Ejemplo n.º 10
0
def experiment(num_workers, no_cuda):

    # Creating num_workers clients
    clients = []
    hook = sy.TorchHook(torch)
    clients_mem = torch.zeros(num_workers)
    for i in range(num_workers):
        clients.append(sy.VirtualWorker(hook, id="c " + str(i)))

    # Initializing arguments, with GPU usage or not
    args = Arguments(no_cuda)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {}

    # Federated data loader
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    federated_train_loader = sy.FederatedDataLoader(
        datasets.CIFAR10('../data',
                         train=True,
                         download=True,
                         transform=transform).federate(clients),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        '../data', train=False, transform=transform),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    start = time.time()
    model = vgg11().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, federated_train_loader, optimizer, epoch,
              clients_mem)
        test(args, model, device, test_loader)
        t = time.time()
        print(t - start)
    if (args.save_model):
        torch.save(model.state_dict(), "mnist_cnn.pt")

    end = time.time()
    print(end - start)
    print("Memory exchanged : ", clients_mem)
    return clients_mem
Ejemplo n.º 11
0
def get_dataloaders(tr_data_dstr,
                    test_data_distr,
                    dataset_name,
                    train_batch_size,
                    test_batch_size,
                    num_workers,
                    sub_sample=None):

    # Create Virtual Workers
    hook = sy.TorchHook(torch)
    workers = []
    for idx in range(num_workers):
        workers.append(sy.VirtualWorker(hook, id="worker" + str(idx)))

    # Load the dataset
    trainset, testset = _get_data(dataset_name)
    print("Total number in trainset", len(trainset))

    n_classes = max(len(set(trainset.targets)), len(set(testset.targets)))
    _check_users_validity(tr_data_dstr, n_classes)

    train_samplers = _create_samplers(trainset, tr_data_dstr)[:10]
    test_samplers = _create_samplers(testset, test_data_distr)[:10]

    print("The number of train samples per agent ",
          [len(s) for i, s in enumerate(train_samplers)])
    print("The number of test samples per agent ",
          [len(s) for i, s in enumerate(test_samplers)])

    fed_dataset_train = _distribute_among_workers(train_samplers, trainset,
                                                  workers)
    fed_dataset_test = _distribute_among_workers(test_samplers, testset,
                                                 workers)

    print(fed_dataset_train, "\n", fed_dataset_test)
    fed_loader_train = sy.FederatedDataLoader(fed_dataset_train,
                                              batch_size=train_batch_size)
    fed_loader_test = sy.FederatedDataLoader(fed_dataset_test,
                                             batch_size=test_batch_size)

    return fed_loader_train, fed_loader_test, workers
Ejemplo n.º 12
0
def test_federated_dataloader_one_worker(workers):
    bob = workers["bob"]

    datasets = [
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5,
                                                           6])).send(bob)
    ]

    fed_dataset = sy.FederatedDataset(datasets)
    num_iterators = len(datasets)
    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         shuffle=True)
    assert fdataloader.num_iterators == 1, f"{fdataloader.num_iterators} == {1}"
def malaria(federate, num_worker=2):
    """
    Function to benchmark different numbers of workers and compare it with regular execution

    :param bool federate: Whether to use federated training
    :param int num_worker:  Number of workers
    :return: time for training
    """
    image_size = 128
    args = Arguments()
    use_cuda = False  # not args.no_cuda and torch.cuda.is_available()
    # torch.manual_seed(args.seed)
    device = torch.device("cpu")
    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    model = Simple_CNN_e2(128).to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)
    train_set, test_set = create_federated_dataset()
    train_dataset = DatasetFromSubset(train_set)

    # Only difference in setup
    if federate:
        if num_worker == 2:
            workers = (bob, alice)
        elif num_worker == 3:
            workers = (bob, alice, mike)
        elif num_worker == 4:
            workers = (bob, alice, mike, zoe)
        else:
            raise NotImplementedError

        train_loader = sy.FederatedDataLoader(
            train_dataset.federate(workers),
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs
        )
    else:
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs
        )

    # Train cycles
    start = timeit.default_timer()
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch, federate)
    end = timeit.default_timer()

    return end - start
Ejemplo n.º 14
0
def main():
    args = Arguments()

    hospitals = []
    for i in range(args.n_hospitals):
        hospitals.append(sy.VirtualWorker(hook, id="hospital " + str(i)))

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    federated_train_loader = sy.FederatedDataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])).federate(hospitals),
                                                    batch_size=args.batch_size,
                                                    shuffle=True,
                                                    iter_per_worker=True,
                                                    **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = Net().to(device)
    optimizer = optim.SGD(
        model.parameters(),
        lr=args.lr)  # TODO momentum is not supported at the moment

    for epoch in range(1, args.epochs + 1):
        colearn_train(args, model, device, federated_train_loader, optimizer,
                      epoch, hospitals)
        test(args, model, device, test_loader)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
Ejemplo n.º 15
0
def main():
    args = Arguments()
    torch.manual_seed(args.seed)

    hospitals = []
    for i in range(args.n_hospitals):
        hospitals.append(sy.VirtualWorker(hook, id="hospital " + str(i)))

    model = Net()
    summary(model, input_size=(1, 128, 128))

    # make an unfederated data loader for testing
    test_dataset = XrayDataset(args.data_dir,
                               train=False,
                               train_ratio=args.train_ratio)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.test_batch_size,
                                              shuffle=True)

    # show initial accuracy
    test(args, model, test_loader)

    # make a federated data loader for training
    train_dataset = XrayDataset(args.data_dir, train_ratio=args.train_ratio)

    fed_train_dataset = train_dataset.federate(hospitals)
    fed_train_loader = sy.FederatedDataLoader(fed_train_dataset,
                                              batch_size=args.batch_size,
                                              iter_per_worker=True,
                                              shuffle=True)
    optimizer = optim.SGD(model.parameters(), lr=args.lr)

    # current_performance = test_on_training_set(args, model, fdataloader, hospitals)
    current_performance = {w: 0 for w in hospitals}
    for epoch in range(1, args.epochs + 1):
        current_weights = model.state_dict()
        proposer = colearn_train(args, model, fed_train_loader, optimizer,
                                 epoch, hospitals)
        update_accepted, new_performance = vote(model, args,
                                                current_performance,
                                                fed_train_loader, hospitals,
                                                proposer)
        if update_accepted:
            current_performance = new_performance
            test(args, model, test_loader)
        else:
            # load the old weights into the model
            model.load_state_dict(current_weights)
Ejemplo n.º 16
0
def GetTrainLoader(workers: tuple, args):
    loader = sy.FederatedDataLoader(datasets.MNIST(
        "../data",
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ]),
    ).federate(tuple(workers)),
                                    batch_size=args.batch_size,
                                    shuffle=True,
                                    iter_per_worker=True)

    return loader
    def prepare_federated_iid_data_parallel(self, train=True):
        '''
		Distribute the data in batches to workers
		Each worker holds several batches
		'''

        data = self.train_data if train == True else self.test_data

        print("Distributing data...")
        federated_iid_data_loader = syft.FederatedDataLoader(
            data.federate(self.workers),
            batch_size=self.batch_size,
            shuffle=True)

        return federated_iid_data_loader
def getTrainDataLoader(workers, batch_size):

	transform = transforms.Compose([
		transforms.ToTensor(),
		transforms.Normalize((0.1307,), (0.3081,))
		])

	train_mnist = datasets.MNIST('../data', train=True, 
		download=True, transform=transform)

	federated_train_loader = sy.FederatedDataLoader(
		train_mnist.federate((bob, alice)), 
        batch_size=batch_size, shuffle=True, 
        num_iterators=2, iter_per_worker=True)

	return federated_train_loader
Ejemplo n.º 19
0
def test_extract_batches_per_worker(workers):
    bob = workers["bob"]
    alice = workers["alice"]

    datasets = [
        fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob),
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice),
    ]
    fed_dataset = sy.FederatedDataset(datasets)

    fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, shuffle=True)

    batches = utils.extract_batches_per_worker(fdataloader)

    assert len(batches.keys()) == len(
        datasets
    ), "each worker should appear as key in the batches dictionary"
def mnist(federate):
    args = Arguments()
    use_cuda = False
    # torch.manual_seed(args.seed)
    device = torch.device("cpu")
    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    model = Net().to(device)
    optimizer = optim.SGD(
        model.parameters(), lr=args.lr
    )  # TODO momentum is not supported at the moment

    if federate:
        train_loader = sy.FederatedDataLoader(
            datasets.MNIST(
                Path("../../data"),
                train=True,
                download=True,
                transform=transforms.Compose(
                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
                ),
            ).federate((bob, alice)),
            batch_size=args.batch_size,
            shuffle=True,
            **kwargs
        )

    else:
        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(
                Path("../../data"),
                train=False,
                transform=transforms.Compose(
                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
                ),
            ),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs
        )

    start = timeit.default_timer()
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch, federate)
    end = timeit.default_timer()
    return end - start
Ejemplo n.º 21
0
    def collect_datasets(self, grid):
        loaders = []
        tags = ['train', 'valid', 'test']
        for tag in tags:
            found_X = grid.search("#X", f"#{tag}")
            found_y = grid.search("#Y", f"#{tag}")

            datasets = []
            for worker in found_X.keys():
                datasets.append(
                    sy.BaseDataset(found_X[worker][0], found_y[worker][0]))

            dataset = sy.FederatedDataset(datasets)
            loaders.append(
                sy.FederatedDataLoader(
                    dataset, batch_size=self.model_config.batch_size))

        return loaders
Ejemplo n.º 22
0
def load_data():
    federated_train_loader = sy.FederatedDataLoader( 
    datasets.CIFAR10('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ]))
    .federate((bob, alice)), 
    batch_size=args.batch_size, shuffle=True, **kwargs)

    test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)
    
    return federated_train_loader,test_loader
Ejemplo n.º 23
0
def run(constant_overwrites):
    config_path = ROOT_DIR / 'hyperparams.yml'
    constants = merge_dict(load_hyperparams(config_path), constant_overwrites)
    use_cuda = constants['cuda'] and torch.cuda.is_available()
    hook = sy.TorchHook(torch)

    # The organisations that will participate in training
    org1 = sy.VirtualWorker(hook, id="org1")
    org2 = sy.VirtualWorker(hook, id="org2")

    torch.manual_seed(constants['seed'])
    device = torch.device('cuda' if use_cuda else 'cpu')
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    batch_size = constants['batch_size']
    test_batch_size = constants['test_batch_size']
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset = datasets.MNIST('../data',
                             train=True,
                             download=True,
                             transform=transform)
    federated_train_loader = sy.FederatedDataLoader(dataset.federate(
        (org1, org2)),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    **kwargs)

    test_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = CNN().to(device)
    optimizer = optim.SGD(model.parameters(), lr=constants['learning_rate'])

    for epoch in range(1, constants['n_epochs'] + 1):
        train(constants, model, device, federated_train_loader, optimizer,
              epoch)
        test(constants, model, device, test_loader)

    if constants['save_model']:
        torch.save(model.state_dict(), 'mnist_cnn.pt')
Ejemplo n.º 24
0
def test_federated_dataloader_num_iterators(workers):
    bob = workers["bob"]
    alice = workers["alice"]
    james = workers["james"]
    datasets = [
        fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob),
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5,
                                                           6])).send(alice),
        fl.BaseDataset(th.tensor([7, 8, 9, 10]), th.tensor([7, 8, 9,
                                                            10])).send(james),
    ]

    fed_dataset = sy.FederatedDataset(datasets)
    num_iterators = len(datasets)
    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         num_iterators=num_iterators,
                                         shuffle=True)
    assert (fdataloader.num_iterators == num_iterators -
            1), f"{fdataloader.num_iterators} == {num_iterators - 1}"
    counter = 0
    for batch_idx, batches in enumerate(fdataloader):
        assert (len(batches.keys()) == num_iterators -
                1), f"len(batches.keys()) == {num_iterators} - 1"
        if batch_idx < 1:
            data_bob, target_bob = batches[bob]
            assert data_bob.location.id == "bob", "id should be bob, batch_idx = {0}".format(
                batch_idx)
        else:  # bob is replaced by james
            data_james, target_james = batches[james]
            assert data_james.location.id == "james", "id should be james, batch_idx = {0}".format(
                batch_idx)
        if batch_idx < 2:
            data_alice, target_alice = batches[alice]
            assert data_alice.location.id == "alice", "id should be alice, batch_idx = {0}".format(
                batch_idx)
        counter += 1
    epochs = num_iterators - 1
    assert counter * (num_iterators - 1) == epochs * len(
        fdataloader), " == epochs * len(fdataloader)"
def load_data():
    '''<--Load CIFAR dataset from torch vision module distribute to workers using PySyft's Federated Data loader'''

    federated_train_loader = sy.FederatedDataLoader(  # <-- this is now a FederatedDataLoader
        torchvision.datasets.MNIST(
            './mnist/',
            train=True,
            download=DOWNLOAD_MNIST,
            transform=torchvision.transforms.ToTensor(),
        ).federate((bob, alice)),
        # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    test_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST(
        './mnist/', train=False, transform=torchvision.transforms.ToTensor()),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    return federated_train_loader, test_loader
Ejemplo n.º 26
0
    def create_federated_mnist(self, dataset, destination_idx, batch_size,
                               shuffle):
        """ 

        Args:
            dataset (FLCustomDataset): Dataset to be federated
            destination_idx (list[str]): Path to the config file
        Returns:
            Obj: Corresponding python object
        """
        workers = []
        if "server" in destination_idx:
            workers.append(self.server)
        else:
            for worker_id, worker in self.workers.items():
                worker_id in destination_idx and workers.append(worker)

        fed_dataloader = sy.FederatedDataLoader(dataset.federate(workers),
                                                batch_size=batch_size,
                                                shuffle=shuffle,
                                                drop_last=True)

        return fed_dataloader
Ejemplo n.º 27
0
def test_federated_dataloader_iter_per_worker(workers):
    bob = workers["bob"]
    alice = workers["alice"]
    james = workers["james"]
    datasets = [
        fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob),
        fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5,
                                                           6])).send(alice),
        fl.BaseDataset(th.tensor([7, 8, 9, 10]), th.tensor([7, 8, 9,
                                                            10])).send(james),
    ]

    fed_dataset = sy.FederatedDataset(datasets)
    fdataloader = sy.FederatedDataLoader(fed_dataset,
                                         batch_size=2,
                                         iter_per_worker=True,
                                         shuffle=True)
    nr_workers = len(datasets)
    assert (fdataloader.num_iterators == nr_workers
            ), "num_iterators should be equal to number or workers"
    for batch_idx, batches in enumerate(fdataloader):
        assert len(
            batches.keys()) == nr_workers, "return a batch for each worker"
Ejemplo n.º 28
0
def init_syft_workers(eth_accounts):
    # create 3 workers and split the data between them
    # aggregator for now just: hosts the data, tests the model
    # for future discussion: MPC, Rewards, Crypto provider
    workers = data_utils.generate_virtual_workers(number_of_workers, hook)
    worker_eth_accounts = dict()

    # assign each worker a unique ethereum acount
    for i, worker in enumerate(workers):
        worker_eth_accounts[worker.id] = eth_accounts[i+1]

    central_server = sy.VirtualWorker(hook, id="aggregator")

    # Use sklearn to split into train and test
    X_train, X_val, y_train, y_val = train_test_split(
        df.drop(["ICU"], 1),
        df["ICU"],
        test_size=0.2,
        random_state=101,
        stratify=df["ICU"]
    )

    # Create a federated dataset using BaseDataset for all train
    # frames and randomly share them in an IID manner between clients
    record_list, result_list = data_utils.split_into_lists(X_train, y_train)
    record_list = data_utils.convert_to_tensors(record_list)
    base_federated_set = sy.BaseDataset(
        record_list, result_list).federate(workers)
    federated_train_loader = sy.FederatedDataLoader(base_federated_set)

    test_list, test_labels = data_utils.split_into_lists(X_val, y_val)
    test_list = data_utils.convert_to_tensors(test_list)
    test_dataset = sy.BaseDataset(test_list, test_labels)
    test_loader = torch.utils.data.DataLoader(test_dataset)
    # TODO: Implement, make necessary imports and
    # update requirements.txt file!
    return workers, federated_train_loader, test_loader, worker_eth_accounts
Ejemplo n.º 29
0
# Define train/test process
for itr in range(1, args.itr_numbers + 1):
    # Select the participants from the total users with the given probability
    Users_Current = np.random.binomial(Users_num_total, args.user_sel_prob,
                                       1).sum()
    if Users_Current == 0:
        Users_Current = 1
    # Compute the standard variance B
    B = ComputeB(S)

    # Load samples from the participants with the given probability or mini-batch size args.batch_size
    federated_train_loader = sy.FederatedDataLoader(
        Federate_Dataset,
        batch_size=args.batch_size,
        shuffle=True,
        worker_num=Users_Current,
        batch_num=args.batchs_round,
        **kwargs)
    workers_list = federated_train_loader.workers  # List of participants at the current iteration

    # Next two lines are only necessary for synchronous aggregation
    # for idx in range(len(workers_list)):
    #     models[workers_list[idx]] = model

    # Initialize the same model-structure tensor with zero elements
    Collect_Gradients = ZerosGradients(Layers_shape)
    Loss_train = torch.tensor(0.)
    for idx_outer, (train_data,
                    train_targets) in enumerate(federated_train_loader):
        model_round = models[train_data.location.id]
#If you have your workers operating remotely, like on Raspberry PIs
#kwargs_websocket_alice = {"host": "35.193.97.131", "hook": hook}
#alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket_alice)
#kwargs_websocket_bob = {"host": "34.68.237.214", "hook": hook}
#bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket_bob)
#workers_virtual = [alice, bob]


#array_lines_proper_dimension = our data points(X)
#categories_numpy = our labels (Y)
langDataset =  LanguageDataset(array_lines_proper_dimension, categories_numpy)

#assign the data points and the corresponding categories to workers.
federated_train_loader = sy.FederatedDataLoader(
            langDataset
            .federate(workers_virtual),
            batch_size=args.batch_size)
            
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)