def test_federated_dataloader_shuffle(workers): bob = workers["bob"] alice = workers["alice"] datasets = [ fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob), fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice), ] fed_dataset = sy.FederatedDataset(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, shuffle=True) for epoch in range(3): counter = 0 for batch_idx, (data, target) in enumerate(fdataloader): if counter < 1: # one batch for bob, two batches for alice (batch_size == 2) assert ( data.location.id == "bob" ), f"id should be bob, counter = {counter}, epoch = {epoch}" else: assert ( data.location.id == "alice" ), f"id should be alice, counter = {counter}, epoch = {epoch}" counter += 1 assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}" num_iterators = 2 fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, num_iterators=num_iterators, shuffle=True) assert (fdataloader.num_iterators == num_iterators - 1), f"{fdataloader.num_iterators} == {num_iterators - 1}"
def test_federated_dataloader(workers): bob = workers["bob"] alice = workers["alice"] datasets = [ fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob), fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice), ] fed_dataset = sy.FederatedDataset(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2) counter = 0 for batch_idx, (data, target) in enumerate(fdataloader): counter += 1 assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}" fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, drop_last=True) counter = 0 for batch_idx, (data, target) in enumerate(fdataloader): counter += 1 assert counter == len(fdataloader), f"{counter} == {len(fdataloader)}"
def get_dataloaders(file, logs, tr_data_dstr, test_data_distr, num_workers=4, train_batch_size=16, test_batch_size=16, size_split=None): dataset = pd.read_csv(file, low_memory=False, squeeze=True) # Create Virtual Workers hook = sy.TorchHook(torch) workers = [] for idx in range(num_workers): workers.append(sy.VirtualWorker(hook, id="worker" + str(idx))) # Set aside the test dataset, which will be the same for all the workers train_data, test_data = _train_validation_split(dataset, 10) # If by_attack - ignore the number of workers if tr_data_dstr == "by_attack" or test_data_distr == "by_attack": num_workers = 4 distr = data_distribution.Distribute(num_workers) train_data_subsets, train_distribution = distr.perform_split( tr_data_dstr, train_data, size=size_split) test_data_subsets, test_distribution = distr.perform_split( test_data_distr, test_data) logs.plot_distribution(train_distribution, "train_distribution") logs.plot_distribution(test_distribution, "test_distribution") logs.save_loaders(train_data_subsets, test_data_subsets) # Remember how many samples each worker have (needed for FedAvrg) worker_sizes = [] for value in train_data_subsets.values(): worker_sizes.append(len(value)) assert len(worker_sizes) == len(workers) fed_dataset_train = _distribute_among_workers(train_data_subsets, workers) fed_dataset_test = _distribute_among_workers(test_data_subsets, workers) fed_loader_train = sy.FederatedDataLoader(fed_dataset_train, batch_size=train_batch_size, shuffle=True) fed_loader_test = sy.FederatedDataLoader(fed_dataset_test, batch_size=test_batch_size, shuffle=True) return fed_loader_train, fed_loader_test, workers, worker_sizes
def construct_FL_loader(data_pointer, **kwargs): """ Cast paired data & labels into configured tensor dataloaders Args: dataset (list(sy.BaseDataset)): A tuple of X features & y labels kwargs: Additional parameters to configure PyTorch's Dataloader Returns: Configured dataloader (th.utils.data.DataLoader) """ federated_dataset = sy.FederatedDataset(data_pointer) # print(federated_dataset) federated_data_loader = sy.FederatedDataLoader( federated_dataset, batch_size=( model_hyperparams['batch_size'] if model_hyperparams['batch_size'] else len(federated_dataset) ), shuffle=True, iter_per_worker=True, # for subsequent parallelization **kwargs ) return federated_data_loader
def get_dataloaders(batch_size: int, federate_workers: list = None, **kwargs): train_dataset, test_dataset = get_datasets() if federate_workers is None: train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, **kwargs) else: train_loader = sy.FederatedDataLoader( train_dataset.federate(federate_workers), #pylint: disable=no-member batch_size=batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, **kwargs) return train_loader, test_loader
def load_data(): '''<--Load CIFAR dataset from torch vision module distribute to workers using PySyft's Federated Data loader''' federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader datasets.CIFAR10('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])). federate( (bob, alice) ), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) return federated_train_loader, test_loader
def test_federated_dataset(workers): bob = workers["bob"] alice = workers["alice"] grid = sy.VirtualGrid(*[bob, alice]) train_bob = th.Tensor(th.zeros(1000, 100)).tag("data").send(bob) target_bob = th.Tensor(th.zeros(1000, 100)).tag("target").send(bob) train_alice = th.Tensor(th.zeros(1000, 100)).tag("data").send(alice) target_alice = th.Tensor(th.zeros(1000, 100)).tag("target").send(alice) data, _ = grid.search("data") target, _ = grid.search("target") dataset = sy.FederatedDataset(data, target) train_loader = sy.FederatedDataLoader(dataset, batch_size=4, shuffle=False, drop_last=False) epochs = 2 for epoch in range(1, epochs + 1): for batch_idx, (data, target) in enumerate(train_loader): pass
def test_federated_dataset_search(workers): bob = workers["bob"] alice = workers["alice"] grid = sy.PrivateGridNetwork(*[bob, alice]) train_bob = th.Tensor(th.zeros(1000, 100)).tag("data").send(bob) target_bob = th.Tensor(th.zeros(1000, 100)).tag("target").send(bob) train_alice = th.Tensor(th.zeros(1000, 100)).tag("data").send(alice) target_alice = th.Tensor(th.zeros(1000, 100)).tag("target").send(alice) data = grid.search("data") target = grid.search("target") datasets = [ BaseDataset(data["bob"][0], target["bob"][0]), BaseDataset(data["alice"][0], target["alice"][0]), ] fed_dataset = sy.FederatedDataset(datasets) train_loader = sy.FederatedDataLoader(fed_dataset, batch_size=4, shuffle=False, drop_last=False) counter = 0 for batch_idx, (data, target) in enumerate(train_loader): counter += 1 assert counter == len(train_loader), f"{counter} == {len(fed_dataset)}"
def experiment(num_workers,no_cuda): # Creating num_workers clients clients = [] hook = sy.TorchHook(torch) clients_mem = torch.zeros(num_workers) for i in range(num_workers): clients.append(sy.VirtualWorker(hook, id="c "+str(i))) # Initializing arguments, with GPU usage or not args = Arguments(no_cuda) use_cuda = not args.no_cuda and torch.cuda.is_available() if use_cuda: # TODO Quickhack. Actually need to fix the problem moving the model to CUDA\n", torch.set_default_tensor_type(torch.cuda.FloatTensor) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {} # Federated data loader federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) .federate(clients), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset batch_size=args.batch_size, shuffle=True,iter_per_worker=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) start = time.time() #%%time model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): model = train(args, model, device, federated_train_loader, args.lr, args.federate_after_n_batches, epoch, clients_mem) test(args, model, device, test_loader) t = time.time() print(t-start) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt") end = time.time() print(end - start) print("Memory exchanged : ",clients_mem) return clients_mem
def experiment(num_workers, no_cuda): # Creating num_workers clients clients = [] hook = sy.TorchHook(torch) clients_mem = torch.zeros(num_workers) for i in range(num_workers): clients.append(sy.VirtualWorker(hook, id="c " + str(i))) # Initializing arguments, with GPU usage or not args = Arguments(no_cuda) use_cuda = not args.no_cuda and torch.cuda.is_available() if use_cuda: torch.set_default_tensor_type(torch.cuda.FloatTensor) torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 0, 'pin_memory': False} if use_cuda else {} # Federated data loader transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) federated_train_loader = sy.FederatedDataLoader( datasets.CIFAR10('../data', train=True, download=True, transform=transform).federate(clients), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.CIFAR10( '../data', train=False, transform=transform), batch_size=args.test_batch_size, shuffle=True, **kwargs) start = time.time() model = vgg11().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): train(args, model, device, federated_train_loader, optimizer, epoch, clients_mem) test(args, model, device, test_loader) t = time.time() print(t - start) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt") end = time.time() print(end - start) print("Memory exchanged : ", clients_mem) return clients_mem
def get_dataloaders(tr_data_dstr, test_data_distr, dataset_name, train_batch_size, test_batch_size, num_workers, sub_sample=None): # Create Virtual Workers hook = sy.TorchHook(torch) workers = [] for idx in range(num_workers): workers.append(sy.VirtualWorker(hook, id="worker" + str(idx))) # Load the dataset trainset, testset = _get_data(dataset_name) print("Total number in trainset", len(trainset)) n_classes = max(len(set(trainset.targets)), len(set(testset.targets))) _check_users_validity(tr_data_dstr, n_classes) train_samplers = _create_samplers(trainset, tr_data_dstr)[:10] test_samplers = _create_samplers(testset, test_data_distr)[:10] print("The number of train samples per agent ", [len(s) for i, s in enumerate(train_samplers)]) print("The number of test samples per agent ", [len(s) for i, s in enumerate(test_samplers)]) fed_dataset_train = _distribute_among_workers(train_samplers, trainset, workers) fed_dataset_test = _distribute_among_workers(test_samplers, testset, workers) print(fed_dataset_train, "\n", fed_dataset_test) fed_loader_train = sy.FederatedDataLoader(fed_dataset_train, batch_size=train_batch_size) fed_loader_test = sy.FederatedDataLoader(fed_dataset_test, batch_size=test_batch_size) return fed_loader_train, fed_loader_test, workers
def test_federated_dataloader_one_worker(workers): bob = workers["bob"] datasets = [ fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(bob) ] fed_dataset = sy.FederatedDataset(datasets) num_iterators = len(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, shuffle=True) assert fdataloader.num_iterators == 1, f"{fdataloader.num_iterators} == {1}"
def malaria(federate, num_worker=2): """ Function to benchmark different numbers of workers and compare it with regular execution :param bool federate: Whether to use federated training :param int num_worker: Number of workers :return: time for training """ image_size = 128 args = Arguments() use_cuda = False # not args.no_cuda and torch.cuda.is_available() # torch.manual_seed(args.seed) device = torch.device("cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} model = Simple_CNN_e2(128).to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr) train_set, test_set = create_federated_dataset() train_dataset = DatasetFromSubset(train_set) # Only difference in setup if federate: if num_worker == 2: workers = (bob, alice) elif num_worker == 3: workers = (bob, alice, mike) elif num_worker == 4: workers = (bob, alice, mike, zoe) else: raise NotImplementedError train_loader = sy.FederatedDataLoader( train_dataset.federate(workers), batch_size=args.batch_size, shuffle=True, **kwargs ) else: train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs ) # Train cycles start = timeit.default_timer() for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch, federate) end = timeit.default_timer() return end - start
def main(): args = Arguments() hospitals = [] for i in range(args.n_hospitals): hospitals.append(sy.VirtualWorker(hook, id="hospital " + str(i))) use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} federated_train_loader = sy.FederatedDataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])).federate(hospitals), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = Net().to(device) optimizer = optim.SGD( model.parameters(), lr=args.lr) # TODO momentum is not supported at the moment for epoch in range(1, args.epochs + 1): colearn_train(args, model, device, federated_train_loader, optimizer, epoch, hospitals) test(args, model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
def main(): args = Arguments() torch.manual_seed(args.seed) hospitals = [] for i in range(args.n_hospitals): hospitals.append(sy.VirtualWorker(hook, id="hospital " + str(i))) model = Net() summary(model, input_size=(1, 128, 128)) # make an unfederated data loader for testing test_dataset = XrayDataset(args.data_dir, train=False, train_ratio=args.train_ratio) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True) # show initial accuracy test(args, model, test_loader) # make a federated data loader for training train_dataset = XrayDataset(args.data_dir, train_ratio=args.train_ratio) fed_train_dataset = train_dataset.federate(hospitals) fed_train_loader = sy.FederatedDataLoader(fed_train_dataset, batch_size=args.batch_size, iter_per_worker=True, shuffle=True) optimizer = optim.SGD(model.parameters(), lr=args.lr) # current_performance = test_on_training_set(args, model, fdataloader, hospitals) current_performance = {w: 0 for w in hospitals} for epoch in range(1, args.epochs + 1): current_weights = model.state_dict() proposer = colearn_train(args, model, fed_train_loader, optimizer, epoch, hospitals) update_accepted, new_performance = vote(model, args, current_performance, fed_train_loader, hospitals, proposer) if update_accepted: current_performance = new_performance test(args, model, test_loader) else: # load the old weights into the model model.load_state_dict(current_weights)
def GetTrainLoader(workers: tuple, args): loader = sy.FederatedDataLoader(datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True) return loader
def prepare_federated_iid_data_parallel(self, train=True): ''' Distribute the data in batches to workers Each worker holds several batches ''' data = self.train_data if train == True else self.test_data print("Distributing data...") federated_iid_data_loader = syft.FederatedDataLoader( data.federate(self.workers), batch_size=self.batch_size, shuffle=True) return federated_iid_data_loader
def getTrainDataLoader(workers, batch_size): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_mnist = datasets.MNIST('../data', train=True, download=True, transform=transform) federated_train_loader = sy.FederatedDataLoader( train_mnist.federate((bob, alice)), batch_size=batch_size, shuffle=True, num_iterators=2, iter_per_worker=True) return federated_train_loader
def test_extract_batches_per_worker(workers): bob = workers["bob"] alice = workers["alice"] datasets = [ fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob), fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice), ] fed_dataset = sy.FederatedDataset(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, shuffle=True) batches = utils.extract_batches_per_worker(fdataloader) assert len(batches.keys()) == len( datasets ), "each worker should appear as key in the batches dictionary"
def mnist(federate): args = Arguments() use_cuda = False # torch.manual_seed(args.seed) device = torch.device("cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} model = Net().to(device) optimizer = optim.SGD( model.parameters(), lr=args.lr ) # TODO momentum is not supported at the moment if federate: train_loader = sy.FederatedDataLoader( datasets.MNIST( Path("../../data"), train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ).federate((bob, alice)), batch_size=args.batch_size, shuffle=True, **kwargs ) else: train_loader = torch.utils.data.DataLoader( datasets.MNIST( Path("../../data"), train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ), batch_size=args.test_batch_size, shuffle=True, **kwargs ) start = timeit.default_timer() for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch, federate) end = timeit.default_timer() return end - start
def collect_datasets(self, grid): loaders = [] tags = ['train', 'valid', 'test'] for tag in tags: found_X = grid.search("#X", f"#{tag}") found_y = grid.search("#Y", f"#{tag}") datasets = [] for worker in found_X.keys(): datasets.append( sy.BaseDataset(found_X[worker][0], found_y[worker][0])) dataset = sy.FederatedDataset(datasets) loaders.append( sy.FederatedDataLoader( dataset, batch_size=self.model_config.batch_size)) return loaders
def load_data(): federated_train_loader = sy.FederatedDataLoader( datasets.CIFAR10('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])) .federate((bob, alice)), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) return federated_train_loader,test_loader
def run(constant_overwrites): config_path = ROOT_DIR / 'hyperparams.yml' constants = merge_dict(load_hyperparams(config_path), constant_overwrites) use_cuda = constants['cuda'] and torch.cuda.is_available() hook = sy.TorchHook(torch) # The organisations that will participate in training org1 = sy.VirtualWorker(hook, id="org1") org2 = sy.VirtualWorker(hook, id="org2") torch.manual_seed(constants['seed']) device = torch.device('cuda' if use_cuda else 'cpu') kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} batch_size = constants['batch_size'] test_batch_size = constants['test_batch_size'] transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) dataset = datasets.MNIST('../data', train=True, download=True, transform=transform) federated_train_loader = sy.FederatedDataLoader(dataset.federate( (org1, org2)), batch_size=batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(dataset, batch_size=test_batch_size, shuffle=True, **kwargs) model = CNN().to(device) optimizer = optim.SGD(model.parameters(), lr=constants['learning_rate']) for epoch in range(1, constants['n_epochs'] + 1): train(constants, model, device, federated_train_loader, optimizer, epoch) test(constants, model, device, test_loader) if constants['save_model']: torch.save(model.state_dict(), 'mnist_cnn.pt')
def test_federated_dataloader_num_iterators(workers): bob = workers["bob"] alice = workers["alice"] james = workers["james"] datasets = [ fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob), fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice), fl.BaseDataset(th.tensor([7, 8, 9, 10]), th.tensor([7, 8, 9, 10])).send(james), ] fed_dataset = sy.FederatedDataset(datasets) num_iterators = len(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, num_iterators=num_iterators, shuffle=True) assert (fdataloader.num_iterators == num_iterators - 1), f"{fdataloader.num_iterators} == {num_iterators - 1}" counter = 0 for batch_idx, batches in enumerate(fdataloader): assert (len(batches.keys()) == num_iterators - 1), f"len(batches.keys()) == {num_iterators} - 1" if batch_idx < 1: data_bob, target_bob = batches[bob] assert data_bob.location.id == "bob", "id should be bob, batch_idx = {0}".format( batch_idx) else: # bob is replaced by james data_james, target_james = batches[james] assert data_james.location.id == "james", "id should be james, batch_idx = {0}".format( batch_idx) if batch_idx < 2: data_alice, target_alice = batches[alice] assert data_alice.location.id == "alice", "id should be alice, batch_idx = {0}".format( batch_idx) counter += 1 epochs = num_iterators - 1 assert counter * (num_iterators - 1) == epochs * len( fdataloader), " == epochs * len(fdataloader)"
def load_data(): '''<--Load CIFAR dataset from torch vision module distribute to workers using PySyft's Federated Data loader''' federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader torchvision.datasets.MNIST( './mnist/', train=True, download=DOWNLOAD_MNIST, transform=torchvision.transforms.ToTensor(), ).federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST( './mnist/', train=False, transform=torchvision.transforms.ToTensor()), batch_size=args.test_batch_size, shuffle=True, **kwargs) return federated_train_loader, test_loader
def create_federated_mnist(self, dataset, destination_idx, batch_size, shuffle): """ Args: dataset (FLCustomDataset): Dataset to be federated destination_idx (list[str]): Path to the config file Returns: Obj: Corresponding python object """ workers = [] if "server" in destination_idx: workers.append(self.server) else: for worker_id, worker in self.workers.items(): worker_id in destination_idx and workers.append(worker) fed_dataloader = sy.FederatedDataLoader(dataset.federate(workers), batch_size=batch_size, shuffle=shuffle, drop_last=True) return fed_dataloader
def test_federated_dataloader_iter_per_worker(workers): bob = workers["bob"] alice = workers["alice"] james = workers["james"] datasets = [ fl.BaseDataset(th.tensor([1, 2]), th.tensor([1, 2])).send(bob), fl.BaseDataset(th.tensor([3, 4, 5, 6]), th.tensor([3, 4, 5, 6])).send(alice), fl.BaseDataset(th.tensor([7, 8, 9, 10]), th.tensor([7, 8, 9, 10])).send(james), ] fed_dataset = sy.FederatedDataset(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=2, iter_per_worker=True, shuffle=True) nr_workers = len(datasets) assert (fdataloader.num_iterators == nr_workers ), "num_iterators should be equal to number or workers" for batch_idx, batches in enumerate(fdataloader): assert len( batches.keys()) == nr_workers, "return a batch for each worker"
def init_syft_workers(eth_accounts): # create 3 workers and split the data between them # aggregator for now just: hosts the data, tests the model # for future discussion: MPC, Rewards, Crypto provider workers = data_utils.generate_virtual_workers(number_of_workers, hook) worker_eth_accounts = dict() # assign each worker a unique ethereum acount for i, worker in enumerate(workers): worker_eth_accounts[worker.id] = eth_accounts[i+1] central_server = sy.VirtualWorker(hook, id="aggregator") # Use sklearn to split into train and test X_train, X_val, y_train, y_val = train_test_split( df.drop(["ICU"], 1), df["ICU"], test_size=0.2, random_state=101, stratify=df["ICU"] ) # Create a federated dataset using BaseDataset for all train # frames and randomly share them in an IID manner between clients record_list, result_list = data_utils.split_into_lists(X_train, y_train) record_list = data_utils.convert_to_tensors(record_list) base_federated_set = sy.BaseDataset( record_list, result_list).federate(workers) federated_train_loader = sy.FederatedDataLoader(base_federated_set) test_list, test_labels = data_utils.split_into_lists(X_val, y_val) test_list = data_utils.convert_to_tensors(test_list) test_dataset = sy.BaseDataset(test_list, test_labels) test_loader = torch.utils.data.DataLoader(test_dataset) # TODO: Implement, make necessary imports and # update requirements.txt file! return workers, federated_train_loader, test_loader, worker_eth_accounts
# Define train/test process for itr in range(1, args.itr_numbers + 1): # Select the participants from the total users with the given probability Users_Current = np.random.binomial(Users_num_total, args.user_sel_prob, 1).sum() if Users_Current == 0: Users_Current = 1 # Compute the standard variance B B = ComputeB(S) # Load samples from the participants with the given probability or mini-batch size args.batch_size federated_train_loader = sy.FederatedDataLoader( Federate_Dataset, batch_size=args.batch_size, shuffle=True, worker_num=Users_Current, batch_num=args.batchs_round, **kwargs) workers_list = federated_train_loader.workers # List of participants at the current iteration # Next two lines are only necessary for synchronous aggregation # for idx in range(len(workers_list)): # models[workers_list[idx]] = model # Initialize the same model-structure tensor with zero elements Collect_Gradients = ZerosGradients(Layers_shape) Loss_train = torch.tensor(0.) for idx_outer, (train_data, train_targets) in enumerate(federated_train_loader): model_round = models[train_data.location.id]
#If you have your workers operating remotely, like on Raspberry PIs #kwargs_websocket_alice = {"host": "35.193.97.131", "hook": hook} #alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket_alice) #kwargs_websocket_bob = {"host": "34.68.237.214", "hook": hook} #bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket_bob) #workers_virtual = [alice, bob] #array_lines_proper_dimension = our data points(X) #categories_numpy = our labels (Y) langDataset = LanguageDataset(array_lines_proper_dimension, categories_numpy) #assign the data points and the corresponding categories to workers. federated_train_loader = sy.FederatedDataLoader( langDataset .federate(workers_virtual), batch_size=args.batch_size) def categoryFromOutput(output): top_n, top_i = output.topk(1) category_i = top_i[0].item() return all_categories[category_i], category_i def timeSince(since): now = time.time() s = now - since m = math.floor(s / 60) s -= m * 60 return '%dm %ds' % (m, s)