def start_websocket_server_worker(id, host, port, hook, verbose, keep_labels=None, training=True): """Helper function for spinning up a websocket server and setting up the local datasets.""" server = WebsocketServerWorker(id=id, host=host, port=port, hook=hook, verbose=verbose) # Setup toy data (mnist example) mnist_dataset = datasets.MNIST( root="./data", train=training, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ) if training: indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8") logger.info("number of true indices: %s", indices.sum()) selected_data = (torch.native_masked_select( mnist_dataset.data.transpose(0, 2), torch.tensor(indices)).view(28, 28, -1).transpose(2, 0)) logger.info("after selection: %s", selected_data.shape) selected_targets = torch.native_masked_select(mnist_dataset.targets, torch.tensor(indices)) dataset = sy.BaseDataset(data=selected_data, targets=selected_targets, transform=mnist_dataset.transform) key = "mnist" else: dataset = sy.BaseDataset( data=mnist_dataset.data, targets=mnist_dataset.targets, transform=mnist_dataset.transform, ) key = "mnist_testing" server.add_dataset(dataset, key=key) logger.info("datasets: %s", server.datasets) if training: logger.info("len(datasets[mnist]): %s", len(server.datasets["mnist"])) server.start() return server
def setup_FL_env(training_datasets, validation_datasets, testing_dataset, is_shared=False): """ Sets up a basic federated learning environment using virtual workers, with a allocated arbiter (i.e. TTP) to faciliate in model development & utilisation, and deploys datasets to their respective workers Args: training_datasets (dict(tuple(th.Tensor))): Datasets to be used for training validation_datasets (dict(tuple(th.Tensor))): Datasets to be used for validation testing_dataset (tuple(th.Tensor)): Datasets to be used for testing is_shared (bool): Toggles if SMPC encryption protocols are active Returns: training_pointers (dict(sy.BaseDataset)) validation_pointer (dict(sy.BaseDataset)) testing_pointer (sy.BaseDataset) workers (list(sy.VirtualWorker)) crypto_provider (sy.VirtualWorker) """ # Simulate FL computation amongst K worker nodes, # where K is the no. of datasets to be federated workers = connect_to_workers(n_workers=len(training_datasets)) # Allow for 1 exchanger/Arbiter (i.e. TTP) crypto_provider = connect_to_crypto_provider() crypto_provider.clear_objects() assert (len(crypto_provider._objects) == 0) # Send training & validation datasets to their respective workers training_pointers = {} validation_pointers = {} for w_idx in range(len(workers)): # Retrieve & prepare worker for receiving dataset curr_worker = workers[w_idx] curr_worker.clear_objects() assert (len(curr_worker._objects) == 0) train_data = training_datasets[w_idx] validation_data = validation_datasets[w_idx] # Cast dataset into a Tensor & send it to the relevant worker train_pointer = sy.BaseDataset(*train_data).send(curr_worker) validation_pointer = sy.BaseDataset(*validation_data).send(curr_worker) # Store data pointers for subsequent reference training_pointers[curr_worker] = train_pointer validation_pointers[curr_worker] = validation_pointer # 'Me' serves as the client -> test pointer stays with me, but is shared via SMPC testing_pointer = sy.BaseDataset(*testing_dataset).send(crypto_provider) return training_pointers, validation_pointers, testing_pointer, workers, crypto_provider
def start_websocket_server_worker(id, host, port, hook, verbose, keep_labels=None, training=True): """Helper function for spinning up a websocket server and setting up the local datasets.""" d = load_cnn_virus() server = websocket_server.WebsocketServerWorker(id=id, host=host, port=port, hook=hook, verbose=verbose) if training: #print(d[0].shape) #print(mnist_dataset.data.transpose(0, 2).shape) indices = np.isin(d[1], keep_labels).astype("uint8") #print((torch.tensor(indices)).shape) logger.info("number of true indices: %s", indices.sum()) selected_data = (torch.native_masked_select(d[0].transpose( 0, 1), torch.tensor(indices)).view(470, -1).transpose(1, 0).to(device)) logger.info("after selection: %s", selected_data.shape) selected_targets = torch.native_masked_select( d[1], torch.tensor(indices)).to(device) dataset = sy.BaseDataset(data=selected_data, targets=selected_targets) key = "mnist" else: dataset = sy.BaseDataset( data=d[0].to(device), targets=d[1].to(device), ) key = "mnist_testing" server.add_dataset(dataset, key=key) count = [0] * 5 logger.info("MNIST dataset (%s set), available numbers on %s: ", "train" if training else "test", id) for i in range(5): count[i] = (dataset.targets == i).sum().item() logger.info(" %s: %s", i, count[i]) logger.info("datasets: %s", server.datasets) if training: logger.info("len(datasets[mnist]): %s", len(server.datasets[key])) server.start() return server
def prepare_training(hook, alice): # pragma: no cover data, target = utils.create_gaussian_mixture_toy_data(nr_samples=100) dataset_key = "gaussian_mixture" dataset = sy.BaseDataset(data, target) alice.add_dataset(dataset, key=dataset_key) @hook.torch.jit.script def loss_fn(pred, target): return ((pred - target.unsqueeze(1))**2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) self.fc3 = nn.Linear(2, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) pred = model(data) loss_before = loss_fn(target=target, pred=pred) return model, loss_fn, data, target, loss_before, dataset_key
def main(number, start_slice, end_slice): mnist_dataset = TrainDataset(transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]), number=number, start_slice=start_slice, end_slice=end_slice) _id = 'h%s' % number ip = '10.0.0.%s' % number hook = syft.TorchHook(torch) server = WebsocketServerWorker(id=_id, host=ip, port=8778, hook=hook, verbose=True) print("Worker:{}, Dataset contains {}".format(_id, str(len( mnist_dataset.data)))) dataset = syft.BaseDataset(data=mnist_dataset.data, targets=mnist_dataset.target, transform=mnist_dataset.transform) key = "targeted" server.add_dataset(dataset, key=key) server.start()
def get_mnist_dataset(keep_labels, training=True): """ Sets up MNIST dataset for training or testing. """ mnist_dataset = datasets.MNIST( root="./data", train=training, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ) # create mnist training indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8") logger.info("number of true indices: %s", indices.sum()) selected_data = (torch.masked_select(mnist_dataset.data.transpose(0, 2), torch.tensor(indices)).view( 28, 28, -1).transpose(2, 0)) logger.info("after selection: %s", selected_data.shape) selected_targets = torch.masked_select(mnist_dataset.targets, torch.tensor(indices)) return sy.BaseDataset(data=selected_data, targets=selected_targets, transform=mnist_dataset.transform)
def dataset_federate_noniid(dataset, workers, Ratio=[1, 1, 1], net='NOT CNN'): """ Add a method to easily transform a torch.Dataset or a sy.BaseDataset into a sy.FederatedDataset. The dataset given is split in len(workers) part and sent to each workers """ logger.info( f"Scanning and sending data to {', '.join([w.id for w in workers])}..." ) datasets = [] N = 0 dataset_list = list(dataset) for n in range(0, len(workers)): ratio = Ratio[n] / sum(Ratio) #计算比例 num = round(ratio * len(dataset)) #根据比例计算要抽取的数据的长度 Subset = dataset_list[N:N + num] #抽取数据 N = N + num data = [] targets = [] for d, t in Subset: data.append(d) targets.append(t) data = torch.cat(data) if net == 'CNN': data = torch.unsqueeze(data, 1) targets = torch.tensor(targets) worker = workers[n] logger.debug("Sending data to worker %s", worker.id) data = data.send(worker) targets = targets.send(worker) datasets.append(sy.BaseDataset(data, targets)) # .send(worker) logger.debug("Done!") return sy.FederatedDataset(datasets)
def run_server(i, indices): import torch # Each process should import torch to allow parallelization? hook = sy.TorchHook(torch) server = CustomWebsocketServerWorker(id=f"dataserver-{i}", host="0.0.0.0", port=f"{8777 + i}", hook=hook) mnist = datasets.MNIST( root="./data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ) is_kept_mask = torch.tensor( [x in indices for x in range(len(mnist.targets))]) dataset = sy.BaseDataset( data=torch.masked_select(mnist.data.transpose(0, 2), is_kept_mask).view(28, 28, -1).transpose(2, 0), targets=torch.masked_select(mnist.targets, is_kept_mask), transform=mnist.transform) server.add_dataset(dataset, key="mnist") print(f"Server {i} started") server.start()
def test_train_config_with_jit_trace(hook, workers): # pragma: no cover alice = workers["alice"] me = workers["me"] data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True) target = torch.tensor([[1], [0], [1], [0]]) dataset = sy.BaseDataset(data, target) alice.add_dataset(dataset, key="vectors") @hook.torch.jit.script def loss_fn(real, pred): return ((real.float() - pred.float())**2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) self.fc3 = nn.Linear(2, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) model_with_id = pointers.ObjectWrapper(model, sy.ID_PROVIDER.pop()) loss_fn_with_id = pointers.ObjectWrapper(loss_fn, sy.ID_PROVIDER.pop()) model_ptr = me.send(model_with_id, alice) loss_fn_ptr = me.send(loss_fn_with_id, alice) print("Evaluation before training") pred = model(data) loss_before = loss_fn(real=target, pred=pred) print("Loss: {}".format(loss_before)) # Create and send train config train_config = sy.TrainConfig(model=model, loss_fn=loss_fn, batch_size=2) train_config.send(alice) for epoch in range(5): loss = alice.fit(dataset="vectors") print("-" * 50) print("Iteration %s: alice's loss: %s" % (epoch, loss)) print("Evaluation after training:") new_model = model_ptr.get() pred = new_model.obj(data) loss_after = loss_fn(real=target, pred=pred) print("Loss: {}".format(loss_after)) assert loss_after < loss_before
def test_train_config_with_jit_trace(hook, workers): # pragma: no cover alice = workers["alice"] data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True) target = torch.tensor([[1], [0], [1], [0]]) dataset = sy.BaseDataset(data, target) alice.add_dataset(dataset, key="gaussian_mixture") @hook.torch.jit.script def loss_fn(pred, target): return ((target.float() - pred.float())**2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) self.fc3 = nn.Linear(2, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) if PRINT_IN_UNITTESTS: print("Evaluation before training") pred = model(data) loss_before = loss_fn(target=target, pred=pred) if PRINT_IN_UNITTESTS: print("Loss: {}".format(loss_before)) # Create and send train config train_config = sy.TrainConfig(model=model, loss_fn=loss_fn, batch_size=2) train_config.send(alice) for epoch in range(5): loss = alice.fit(dataset_key="gaussian_mixture") if PRINT_IN_UNITTESTS: # pragma: no cover: print("-" * 50) print("Iteration %s: alice's loss: %s" % (epoch, loss)) new_model = train_config.model_ptr.get() pred = new_model.obj(data) loss_after = loss_fn(target=target, pred=pred) if PRINT_IN_UNITTESTS: # pragma: no cover: print("Loss before training: {}".format(loss_before)) print("Loss after training: {}".format(loss_after)) assert loss_after < loss_before
def test_fit(): data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True) target = torch.tensor([[1], [0], [1], [0]]) fed_client = federated.FederatedClient() dataset = sy.BaseDataset(data, target) fed_client.add_dataset(dataset, key="vectors") def loss_fn(real, pred): return ((real.float() - pred.float()) ** 2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = torch.nn.Linear(2, 3) self.fc2 = torch.nn.Linear(3, 2) self.fc3 = torch.nn.Linear(2, 1) def forward(self, x): x = torch.nn.functional.relu(self.fc1(x)) x = torch.nn.functional.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) model_id = 0 model_ow = pointers.ObjectWrapper(obj=model, id=model_id) loss_id = 1 loss_ow = pointers.ObjectWrapper(obj=loss_fn, id=loss_id) print("Evaluation before training") pred = model(data) loss_before = loss_fn(real=target, pred=pred) print("Loss: {}".format(loss_before)) # Create and send train config train_config = sy.TrainConfig( batch_size=1, model=None, loss_fn=None, model_id=model_id, loss_fn_id=loss_id ) fed_client.set_obj(model_ow) fed_client.set_obj(loss_ow) fed_client.set_obj(train_config) for epoch in range(5): loss = fed_client.fit(dataset_key="vectors") print("-" * 50) print("Iteration %s: alice's loss: %s" % (epoch, loss)) print("Evaluation after training:") new_model = fed_client.get_obj(model_id) pred = new_model.obj(data) loss_after = loss_fn(real=target, pred=pred) print("Loss: {}".format(loss_after)) assert loss_after < loss_before
def _distribute_among_workers(dataset, workers): datasets = [] for i, data in dataset.items(): x_train, y_train = _data_target_split(data) data = x_train.send(workers[i]) targets = y_train.send(workers[i]) datasets.append(sy.BaseDataset(data, targets)) return sy.FederatedDataset(datasets)
def get_federated_dataset(data, users, context_size, hook): users_data = [] workers = [] for user in users: user_worker = sy.VirtualWorker(hook, id = user) cur_data = data[data.user == user] X, Y = extend_data(cur_data.X, cur_data.Y, context_size) X = th.tensor(X) Y = th.tensor(Y) users_data.append(sy.BaseDataset(X, Y).send(user_worker)) workers.append(user_worker) return sy.FederatedDataset(users_data), workers
def main(): hook = syft.TorchHook(torch) data = torch.tensor([[1.0], [2.0], [3.0], [4.0]], requires_grad=True) target = torch.tensor([[1.0], [2.0], [3.0], [4.0]], requires_grad=False) dataset = syft.BaseDataset(data, target) h1 = WebsocketServerWorker(id="h1", host="10.0.0.1", port="8778", hook=hook) h1.add_dataset(dataset, key="train") h1.start() return h1
def init_syft_workers(eth_accounts): # create 3 workers and split the data between them # aggregator for now just: hosts the data, tests the model # for future discussion: MPC, Rewards, Crypto provider workers = data_utils.generate_virtual_workers(number_of_workers, hook) worker_eth_accounts = dict() # assign each worker a unique ethereum acount for i, worker in enumerate(workers): worker_eth_accounts[worker.id] = eth_accounts[i+1] central_server = sy.VirtualWorker(hook, id="aggregator") # Use sklearn to split into train and test X_train, X_val, y_train, y_val = train_test_split( df.drop(["ICU"], 1), df["ICU"], test_size=0.2, random_state=101, stratify=df["ICU"] ) # Create a federated dataset using BaseDataset for all train # frames and randomly share them in an IID manner between clients record_list, result_list = data_utils.split_into_lists(X_train, y_train) record_list = data_utils.convert_to_tensors(record_list) base_federated_set = sy.BaseDataset( record_list, result_list).federate(workers) federated_train_loader = sy.FederatedDataLoader(base_federated_set) test_list, test_labels = data_utils.split_into_lists(X_val, y_val) test_list = data_utils.convert_to_tensors(test_list) test_dataset = sy.BaseDataset(test_list, test_labels) test_loader = torch.utils.data.DataLoader(test_dataset) # TODO: Implement, make necessary imports and # update requirements.txt file! return workers, federated_train_loader, test_loader, worker_eth_accounts
def test_fl_with_trainconfig(isolated_filesystem, start_remote_server_worker_only, hook): os.chdir("advanced/Federated Learning with TrainConfig/") notebook = "Introduction to TrainConfig.ipynb" p_name = Path("examples/tutorials/advanced/Federated Learning with TrainConfig/") tested_notebooks.append(str(p_name / notebook)) hook.local_worker.remove_worker_from_registry("alice") kwargs = {"id": "alice", "host": "localhost", "port": 8777, "hook": hook} data = torch.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True) target = torch.tensor([[1.0], [1.0], [0.0], [0.0]], requires_grad=False) dataset = sy.BaseDataset(data, target) process_remote_worker = start_remote_server_worker_only(dataset=(dataset, "xor"), **kwargs) res = pm.execute_notebook(notebook, "/dev/null", timeout=300) assert isinstance(res, nbformat.notebooknode.NotebookNode) process_remote_worker.terminate() sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
def collect_datasets(self, grid): loaders = [] tags = ['train', 'valid', 'test'] for tag in tags: found_X = grid.search("#X", f"#{tag}") found_y = grid.search("#Y", f"#{tag}") datasets = [] for worker in found_X.keys(): datasets.append( sy.BaseDataset(found_X[worker][0], found_y[worker][0])) dataset = sy.FederatedDataset(datasets) loaders.append( sy.FederatedDataLoader( dataset, batch_size=self.model_config.batch_size)) return loaders
def create_femnist_datasets(self, raw_dataset, workers_idx): datasets = dict() for worker_id in workers_idx: images = tensor(raw_dataset[worker_id]['x'], dtype=float32) labels = tensor(raw_dataset[worker_id]['y'].ravel(), dtype=int64) dataset = sy.BaseDataset( images, labels, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (raw_dataset[worker_id]['x'].mean(), ), (raw_dataset[worker_id]['x'].std(), )) ])) datasets[worker_id] = dataset return datasets
def create_femnist_fed_dataset(self, raw_data, workers_idx, percentage): """ Assume this only used for preparing aggregated dataset for the server Args: raw_data (dict): workers_idx (list(int)): percentage (float): Out of 100, amount of public data of each user Returns: """ logging.info( "Creating the dataset from {}% of {} selected users' data...". format(percentage, len(workers_idx))) # Fraction of public data of each user, which be shared by the server server_images = tensor([], dtype=float32).view(-1, 28, 28) server_labels = tensor([], dtype=int64) # server_images = np.array([], dtype = np.float32).reshape(-1, 28, 28) # server_labels = np.array([], dtype = np.int64) for worker_id in workers_idx: worker_samples_num = len(raw_data[worker_id]['y']) num_samples_for_server = math.floor( (percentage / 100.0) * worker_samples_num) logging.debug( "Sending {} samples from worker {} with total {}".format( num_samples_for_server, worker_id, worker_samples_num)) indices = sample(range(worker_samples_num), num_samples_for_server) images = tensor([raw_data[worker_id]['x'][i] for i in indices], dtype=float32).view(-1, 28, 28) labels = tensor([raw_data[worker_id]['y'][i] for i in indices], dtype=int64).view(-1) server_images = cat((server_images, images)) server_labels = cat((server_labels, labels)) logging.info( "Selected {} samples in total for the server from {} users.". format(server_images.shape, len(workers_idx))) return sy.BaseDataset(server_images, server_labels, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (server_images.mean().item(), ), (server_images.std().item(), )) ])).federate([self.server])
def _distribute_among_workers(samplers, dataset, workers): datasets = [] # Each worker have it's own sampler; len(samplers)== len(workers) for idx, sampler in enumerate(samplers): loader = DataLoader(dataset, batch_size=len(sampler), shuffle=False, sampler=sampler) # Loader always contains only one batch (because batch_size=len(sampler)) for batch in loader: data = batch[0].send(workers[idx].id) targets = batch[1].send(workers[idx].id) datasets.append(sy.BaseDataset(data, targets)) return sy.FederatedDataset(datasets)
def get_dataset_pointers(self): self.worker_handles = [ResearcherWorker(hook, PROXY_URL, PROXY_PORT, cookie = cookie, verbose = self.verbose, id = this_id, is_client_worker = True) for cookie, this_id in self.tokens.items()] self.datasets = dict() self.workers = [] for worker in self.worker_handles: print(worker) # print(worker.test_hello_world()) # print(worker._objects) # help(worker.list_objects_remote()) # print(worker._remote_objects) this_dataset = worker.search(self.dataset_key) this_targets = worker.search(self.target_key) # this_dataset.location = worker # this_targets.location = worker remote_dataset = sy.BaseDataset(this_dataset, this_targets) # remote_dataset.send(worker) self.datasets[worker.id] = remote_dataset self.workers.append(worker.id)
def main(**kwargs): # pragma: no cover """Helper function for spinning up a websocket participant.""" # Create websocket worker worker = WebsocketServerWorker(**kwargs) # Setup toy data (xor example) data = th.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True) target = th.tensor([[1.0], [1.0], [0.0], [0.0]], requires_grad=False) # Create a dataset using the toy data dataset = sy.BaseDataset(data, target) # Tell the worker about the dataset worker.add_dataset(dataset, key="xor") # Start worker worker.start() return worker
def prepare_training(hook, alice): # pragma: no cover data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True) target = torch.tensor([[1], [0], [1], [0]]) dataset = sy.BaseDataset(data, target) alice.add_dataset(dataset, key="vectors") @hook.torch.jit.script def loss_fn(real, pred): return ((real.float() - pred.float())**2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) self.fc3 = nn.Linear(2, 1) nn.init.xavier_uniform_(self.fc1.weight) nn.init.xavier_uniform_(self.fc2.weight) nn.init.xavier_uniform_(self.fc3.weight) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) print("Evaluation before training") pred = model(data) loss_before = loss_fn(real=target, pred=pred) print("Loss: {}".format(loss_before)) return model, loss_fn, data, target, loss_before
def get_eicu_dataset(hospitalid, outcome): """ Sets up the eICU dataset for training or testing. """ df_x = pd.read_csv('x.csv') df_y = pd.read_csv('y.csv') # delete rows where the outcome is missing to_keep = ~(pd.isnull(df_y).sum(axis=1) > 0) df_x = df_x[to_keep] df_y = df_y[to_keep] # restrict x and y to the required hospital or test set to_keep = df_x.hospitalid.values == hospitalid df_x.drop('hospitalid', axis=1, inplace=True) df_x = df_x[to_keep] scaler = RobustScaler(quantile_range=(10.0, 90.0)) x = scaler.fit_transform(df_x.values) y = df_y[outcome][to_keep].values return sy.BaseDataset(data=torch.from_numpy(x.astype('float32')), targets=torch.from_numpy(y.astype('float32')))
def start_websocket_server_worker(id, host, port, hook, verbose, dataset, training=True): """Helper function for spinning up a websocket server and setting up the local datasets.""" server = WebsocketServerWorker(id=id, host=host, port=port, hook=hook, verbose=verbose) dataset_key = dataset #if we are in the traning loop if training: with open("./data/split/%d" % int(id), "rb") as fp: # Unpickling data = pickle.load(fp) dataset_data, dataset_target = readnpy(data) print(type(dataset_data.long())) logger.info("Number of samples for client %s is %s : ", id, len(dataset_data)) dataset = sy.BaseDataset(data=dataset_data, targets=dataset_target) key = dataset_key nb_labels = len(torch.unique(dataset_target)) server.add_dataset(dataset, key=key) count = [0] * nb_labels logger.info("Dataset(train set) ,available numbers on %s: ", id) for i in range(nb_labels): count[i] = (dataset.targets == i).sum().item() logger.info(" %s: %s", i, count[i]) logger.info("datasets: %s", server.datasets) if training: logger.info("len(datasets): %s", len(server.datasets[key])) server.start() return server
def create_mnist_fed_datasets(self, raw_dataset): """ raw_datasets (dict) ex. data: raw_datasets['worker_1']['x'] label: raw_datasets['worker_1']['y'] """ fed_datasets = dict() for ww_id, ww_data in raw_dataset.items(): images = tensor(ww_data['x'], dtype=float32) labels = tensor(ww_data['y'].ravel(), dtype=int64) dataset = sy.BaseDataset(images, labels, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (ww_data['x'].mean(), ), (ww_data['x'].std(), )) ])).federate([self.workers[ww_id]]) fed_datasets[ww_id] = dataset return fed_datasets
def test_train_config_with_jit_trace_sync( hook, start_remote_worker): # pragma: no cover data, target = utils.create_gaussian_mixture_toy_data(100) dataset = sy.BaseDataset(data, target) dataset_key = "gaussian_mixture" server, remote_proxy = start_remote_worker(id="sync_fit", hook=hook, port=9000, dataset=(dataset, dataset_key)) @hook.torch.jit.script def loss_fn(pred, target): return ((target.view(pred.shape).float() - pred.float())**2).mean() class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) self.fc3 = nn.Linear(2, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x model_untraced = Net() model = torch.jit.trace(model_untraced, data) pred = model(data) loss_before = loss_fn(pred=pred, target=target) # Create and send train config train_config = sy.TrainConfig(model=model, loss_fn=loss_fn, batch_size=2, epochs=1) train_config.send(remote_proxy) for epoch in range(5): loss = remote_proxy.fit(dataset_key=dataset_key) if PRINT_IN_UNITTESTS: # pragma: no cover print("-" * 50) print("Iteration %s: alice's loss: %s" % (epoch, loss)) new_model = train_config.model_ptr.get() # assert that the new model has updated (modified) parameters assert not ((model.fc1._parameters["weight"] - new_model.obj.fc1._parameters["weight"]).abs() < 10e-3).all() assert not ((model.fc2._parameters["weight"] - new_model.obj.fc2._parameters["weight"]).abs() < 10e-3).all() assert not ((model.fc3._parameters["weight"] - new_model.obj.fc3._parameters["weight"]).abs() < 10e-3).all() assert not ((model.fc1._parameters["bias"] - new_model.obj.fc1._parameters["bias"]).abs() < 10e-3).all() assert not ((model.fc2._parameters["bias"] - new_model.obj.fc2._parameters["bias"]).abs() < 10e-3).all() assert not ((model.fc3._parameters["bias"] - new_model.obj.fc3._parameters["bias"]).abs() < 10e-3).all() new_model.obj.eval() pred = new_model.obj(data) loss_after = loss_fn(pred=pred, target=target) if PRINT_IN_UNITTESTS: # pragma: no cover print("Loss before training: {}".format(loss_before)) print("Loss after training: {}".format(loss_after)) remote_proxy.close() server.terminate() assert loss_after < loss_before
def start_websocket_server_worker( id, host, port, hook, verbose, keep_labels=None, training=True ): # pragma: no cover """Helper function for spinning up a websocket server and setting up the local datasets.""" server = WebsocketServerWorker(id=id, host=host, port=port, hook=hook, verbose=verbose) # Setup toy data (mnist example) mnist_dataset = datasets.MNIST( root="./data", train=training, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ) if training: indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8") logger.info("number of true indices: %s", indices.sum()) selected_data = ( torch.native_masked_select(mnist_dataset.data.transpose(0, 2), torch.tensor(indices)) .view(28, 28, -1) .transpose(2, 0) ) logger.info("after selection: %s", selected_data.shape) selected_targets = torch.native_masked_select(mnist_dataset.targets, torch.tensor(indices)) dataset = sy.BaseDataset( data=selected_data, targets=selected_targets, transform=mnist_dataset.transform ) key = "mnist" else: dataset = sy.BaseDataset( data=mnist_dataset.data, targets=mnist_dataset.targets, transform=mnist_dataset.transform, ) key = "mnist_testing" server.add_dataset(dataset, key=key) # Setup toy data (vectors example) data_vectors = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True) target_vectors = torch.tensor([[1], [0], [1], [0]]) server.add_dataset(sy.BaseDataset(data_vectors, target_vectors), key="vectors") # Setup toy data (xor example) data_xor = torch.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True) target_xor = torch.tensor([1.0, 1.0, 0.0, 0.0], requires_grad=False) server.add_dataset(sy.BaseDataset(data_xor, target_xor), key="xor") # Setup gaussian mixture dataset data, target = utils.create_gaussian_mixture_toy_data(nr_samples=100) server.add_dataset(sy.BaseDataset(data, target), key="gaussian_mixture") # Setup partial iris dataset data, target = utils.iris_data_partial() dataset = sy.BaseDataset(data, target) dataset_key = "iris" server.add_dataset(dataset, key=dataset_key) logger.info("datasets: %s", server.datasets) if training: logger.info("len(datasets[mnist]): %s", len(server.datasets["mnist"])) server.start() return server
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) else: kwargs_websocket = {"host": "localhost", "hook": hook, "verbose": args.verbose} alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) workers = [alice, bob] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} if DATATHON: bob_data, bob_target = preprocessed_data("EICU_DB", True, 0) alice_data, alice_target = preprocessed_data("MIMIC_DB", True, 1) alice_train_dataset = sy.BaseDataset(alice_data, alice_target).send(alice) bob_train_dataset = sy.BaseDataset(bob_data, bob_target).send(bob) federated_train_dataset = sy.FederatedDataset([alice_train_dataset, bob_train_dataset]) federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle = True, batch_size=args.batch_size, iter_per_worker=True, **kwargs,) test_loader_mimic = get_dataloader(is_train=False, batch_size=args.batch_size, is_mimic = 1 ) test_loader_eicu = get_dataloader(is_train=False, batch_size=args.batch_size, is_mimic = 0 ) else: federated_train_loader = sy.FederatedDataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader_mimic, 1) test(model, device, test_loader_eicu, 0) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
""" import numpy as np from torch.utils.data.sampler import SubsetRandomSampler df = pd.read_csv('mnist_train.csv') y = df['label'].values X = df.drop(['label'], 1).values X = X[:len(X) // 2] y = y[:len(y) // 2] torch_X_train = torch.from_numpy(X).type(torch.LongTensor) torch_y_train = torch.from_numpy(y).type(torch.LongTensor) # data type is long base = sy.BaseDataset(torch_X_train, torch_y_train) base_federated = base.federate((bob, alice)) train_loader = sy.FederatedDataLoader(base_federated, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) """### CNN specification Here we use exactly the same CNN as in the official example. """