def test_vertically_federated_raises_if_more_than_two_workers_are_provided(): sy.local_worker.clear_objects() alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False) bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False) charlie = sy.VirtualWorker(id="charlie", hook=hook, is_client_worker=False) inputs = th.tensor([1, 2, 3, 4.0]) targets = th.tensor([1, 2, 3, 4.0]) dataset = PartitionedDataset(inputs, targets) with pytest.raises(AssertionError): vertical_dataset = dataset.vertically_federate((alice, bob, charlie)) alice.remove_worker_from_local_worker_registry() bob.remove_worker_from_local_worker_registry() charlie.remove_worker_from_local_worker_registry()
def test_plan_built_on_method(hook): """ Test @sy.meth2plan and plan send / get / send """ hook.local_worker.is_client_worker = False x11 = th.tensor([-1, 2.0]).tag("input_data") x21 = th.tensor([-1, 2.0]).tag("input_data") device_1 = sy.VirtualWorker(hook, id="device_1", data=(x11, )) device_2 = sy.VirtualWorker(hook, id="device_2", data=(x21, )) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) @sy.method2plan def forward(self, x): x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=0) net = Net() # build net.forward.build(th.tensor([1, 2.0])) net.send(device_1) pointer_to_data = device_1.search("input_data")[0] pointer_to_result = net(pointer_to_data) assert isinstance(pointer_to_result.get(), th.Tensor) net.get() net.send(device_2) pointer_to_data = device_2.search("input_data")[0] pointer_to_result = net(pointer_to_data) assert isinstance(pointer_to_result.get(), th.Tensor) hook.local_worker.is_client_worker = True
def run(constant_overwrites): config_path = ROOT_DIR / 'hyperparams.yml' constants = merge_dict(load_hyperparams(config_path), constant_overwrites) use_cuda = constants['cuda'] and torch.cuda.is_available() hook = sy.TorchHook(torch) # The organisations that will participate in training org1 = sy.VirtualWorker(hook, id="org1") org2 = sy.VirtualWorker(hook, id="org2") torch.manual_seed(constants['seed']) device = torch.device('cuda' if use_cuda else 'cpu') kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} batch_size = constants['batch_size'] test_batch_size = constants['test_batch_size'] transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) dataset = datasets.MNIST('../data', train=True, download=True, transform=transform) federated_train_loader = sy.FederatedDataLoader(dataset.federate( (org1, org2)), batch_size=batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(dataset, batch_size=test_batch_size, shuffle=True, **kwargs) model = CNN().to(device) optimizer = optim.SGD(model.parameters(), lr=constants['learning_rate']) for epoch in range(1, constants['n_epochs'] + 1): train(constants, model, device, federated_train_loader, optimizer, epoch) test(constants, model, device, test_loader) if constants['save_model']: torch.save(model.state_dict(), 'mnist_cnn.pt')
def _update_node_infos(self, node_id: str): """ Create a new virtual worker to store/compute datasets owned by this peer. Args: node_id: ID used to identify this peer. """ worker = sy.VirtualWorker(sy.hook, id=node_id) sy.local_worker._known_workers[node_id] = worker sy.local_worker.is_client_worker = False return worker
def test_encode_virtualWorker(self): # Given obj = sy.VirtualWorker() expected = {"mode": "subscribe", "obj": {"__worker__": 0}} # When result = self.cut.encode(obj) # Then self.assertDictEqual(expected, result)
def test_serde_virtual_worker(hook): virtual_worker = syft.VirtualWorker(hook=hook, id="deserialized_worker1") # Populate worker tensor1, tensor2 = torch.tensor([1.0, 2.0]), torch.tensor([0.0]) ptr1, ptr2 = tensor1.send(virtual_worker), tensor2.send(virtual_worker) serialized_worker = serde.serialize(virtual_worker, force_full_simplification=False) deserialized_worker = serde.deserialize(serialized_worker) assert virtual_worker.id == deserialized_worker.id
def generate_workers(num_workers): """Generates a given number of PySyft's virtual workers""" workers_list = [] # init workers for i in range(num_workers): worker = sy.VirtualWorker(hook, id=str(i)) workers_list.append(worker) return workers_list
def get_fog_graph(hook, num_workers, num_clusters, shuffle_workers=True, uniform_clusters=True, fog=True): # Define workers and layers workers = {} agg_map = {} layer = 0 for id_ in range(num_workers): name = 'L{}_W{}'.format(layer, id_) workers[name] = sy.VirtualWorker(hook, id=name) layer = 1 if not fog: # single layer model averaging fl name = 'L1_W0' workers[name] = sy.VirtualWorker(hook, id=name) worker_ids = [_ for _ in workers.keys() if 'L0' in _] agg_map[name] = worker_ids return agg_map, workers for num_cluster in num_clusters: # multi layer aggregation fog learning for id_ in range(num_cluster): name = 'L{}_W{}'.format(layer, id_) workers[name] = sy.VirtualWorker(hook, id=name) layer += 1 for l in range(1, len(num_clusters)+1): clustr_ids = [_ for _ in workers.keys() if 'L{}'.format(l) in _] worker_ids = [_ for _ in workers.keys() if 'L{}'.format(l-1) in _] if shuffle_workers: worker_ids = list(np.array(worker_ids)[ np.random.permutation(len(worker_ids))]) cluster_sizes = get_cluster_sizes(len(worker_ids), len(clustr_ids), uniform_clusters) indices = [sum(cluster_sizes[:id_]) for id_ in range(len(cluster_sizes)+1)] for id_ in range(len(clustr_ids)): agg_map[clustr_ids[id_]] = worker_ids[indices[id_]: indices[id_+1]] return agg_map, workers
def connect_to_crypto_provider(): """ Simulates the existence of an arbitor to facilitate model generation & client-side utilisation Returns: Arbiter (i.e. TTP) (sy.VirtualWorker) """ return sy.VirtualWorker( pt_hook, id="crypto_provider" ).clear_objects()
def workers(hook): alice = syft.VirtualWorker(id="alice", hook=hook, is_client_worker=False) bob = syft.VirtualWorker(id="bob", hook=hook, is_client_worker=False) james = syft.VirtualWorker(id="james", hook=hook, is_client_worker=False) bob.add_workers([alice, james]) alice.add_workers([bob, james]) james.add_workers([bob, alice]) hook.local_worker.add_workers([alice, bob, james]) # TODO: should one set this boolean to true? # It was done previously in self.setUp() from `test_hook.py` # hook.local_worker.is_client_worker = True output = {} output["me"] = hook.local_worker output["alice"] = alice output["bob"] = bob output["james"] = james return output
def create_workers(self, workers_id_list): logging.info("Creating workers...") for worker_id in workers_id_list: if worker_id not in self.workers: logging.debug("Creating the worker: {}".format(worker_id)) self.workers[worker_id] = sy.VirtualWorker(self.hook, id=worker_id) else: logging.debug( "Worker {} exists. Skip creating this worker".format( worker_id))
def test_spinup_time(hook): """Tests to ensure that virtual workers intialized with 10000 data points load in under 1 seconds. This is needed to ensure that virtual workers spun up inside web frameworks are created quickly enough to not cause timeout errors""" data = [] for i in range(10000): data.append(torch.Tensor(5, 5).random_(100)) start_time = time() dummy = sy.VirtualWorker(hook, id="dummy", data=data) end_time = time() assert (end_time - start_time) < 1
def create_virtual_workers(num_workers=None, id_list=None): if id_list is None: if num_workers is not None: id_list = [f'worker_{i}' for i in range(num_workers)] if num_workers is not None and len(id_list) != num_workers: logger.warning( 'Number of workers and id_list length not maching.' ' id_list will be taken and num_workers will be ignored.') workers = [sy.VirtualWorker(hook, id=i) for i in id_list] return workers, id_list
def test_pointer_tensor_simplify(): """Test the simplification of PointerTensor""" alice = syft.VirtualWorker(syft.torch.hook, id="alice") input_tensor = PointerTensor(id=1000, location=alice, owner=alice) output = serde._simplify(input_tensor) assert output[1][0] == input_tensor.id assert output[1][1] == input_tensor.id_at_location assert output[1][2] == input_tensor.owner.id
def test_vertically_federate_raises_if_dataset_does_not_have_data_and_targets(): sy.local_worker.clear_objects() alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False) bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False) inputs = th.tensor([1, 2, 3, 4.0]) targets = th.tensor([1, 2, 3, 4.0]) data_only_dataset = PartitionedDataset(data=inputs) with pytest.raises(AssertionError): vertical_dataset = data_only_dataset.vertically_federate((alice, bob)) targets_only_dataset = PartitionedDataset(targets=targets) with pytest.raises(AssertionError): vertical_dataset = targets_only_dataset.vertically_federate((alice, bob)) alice.remove_worker_from_local_worker_registry() bob.remove_worker_from_local_worker_registry()
def init_workers(self, num_workers): workers = [] for i in range(num_workers): worker = {} worker["instance"] = syft.VirtualWorker(hook, id=f"worker_{i}") worker["model"] = None worker["optim"] = None worker["criterion"] = None worker["loss"] = None workers.append(worker) return workers
def setUpModule(): print("setup module") global me global bob global alice global james global hook hook = sy.TorchHook(verbose=True) me = hook.local_worker me.is_client_worker = False bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False) alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False) james = sy.VirtualWorker(id="james", hook=hook, is_client_worker=False) bob.add_workers([alice, james]) alice.add_workers([bob, james]) james.add_workers([bob, alice])
def test_clear_object_for_worker_created_with_pre_existing_id(hook): worker = sy.VirtualWorker(hook, id="worker") worker.clear_objects() ptr = th.tensor([1, 2, 3]).send(worker) assert len(worker._known_workers[worker.id]._objects) == len(worker._objects) assert len(worker._objects) == 1 # create worker with pre-existing id worker = sy.VirtualWorker(hook, id="worker") worker.clear_objects() assert len(worker._known_workers[worker.id]._objects) == len(worker._objects) assert len(worker._objects) == 0 ptr = th.tensor([1, 2, 3]).send(worker) assert len(worker._known_workers[worker.id]._objects) == len(worker._objects) assert len(worker._objects) == 1
def train_federated(trainset, num_workers=10, epochs=5, batch_size=64): manager = sy.VirtualWorker(hook, id="manager") workers = create_workers(num_workers) worker_data = send_train_data_to_workers(workers, trainset, batch_size) model = MNISTClassifier() criterion = nn.NLLLoss() for e in range(epochs): models = [model.copy().send(worker) for worker in workers] optimizers = [ optim.Adam(model.parameters(), lr=0.003) for model in models ] mean_loss = 0 for i, worker in enumerate(worker_data): model = models[i] optimizer = optimizers[i] # batches for images, labels in worker: optimizer.zero_grad() log_ps = model(images.float()) loss = criterion(log_ps, labels) loss.backward() optimizer.step() mean_loss += (loss.get().item() / len(worker_data)) for model in models: model.move(manager) with torch.no_grad(): fc_layers = ("fc1", "fc2", "fc3", "fc4") for layer in fc_layers: weights = torch.stack( [getattr(m, layer).weight for m in models]) biases = torch.stack([getattr(m, layer).bias for m in models]) mean_weight = torch.mean(weights).get() mean_bias = torch.mean(biases).get() model_layer = getattr(model, layer) model_layer.weight.set_(mean_weight) model_layer.bias.set_(mean_bias) model = model.get() print("Epoch: {:0>2d}/{:0>2d} | ".format(e + 1, epochs), "Mean training Loss: {:.3f} ".format(mean_loss))
def get_federated_dataset(data, users, context_size, hook): users_data = [] workers = [] for user in users: user_worker = sy.VirtualWorker(hook, id = user) cur_data = data[data.user == user] X, Y = extend_data(cur_data.X, cur_data.Y, context_size) X = th.tensor(X) Y = th.tensor(Y) users_data.append(sy.BaseDataset(X, Y).send(user_worker)) workers.append(user_worker) return sy.FederatedDataset(users_data), workers
def setUp(self): hook = sy.TorchHook(torch, verbose=True) self.me = hook.local_worker self.me.is_client_worker = True instance_id = str(int(10e10 * random.random())) bob = sy.VirtualWorker(id=f"bob{instance_id}", hook=hook, is_client_worker=False) alice = sy.VirtualWorker(id=f"alice{instance_id}", hook=hook, is_client_worker=False) james = sy.VirtualWorker(id=f"james{instance_id}", hook=hook, is_client_worker=False) bob.add_workers([alice, james]) alice.add_workers([bob, james]) james.add_workers([bob, alice]) self.hook = hook self.bob = bob self.alice = alice self.james = james # A Toy Dataset data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1.0]], requires_grad=True) target = torch.tensor([[0], [0], [1], [1.0]], requires_grad=True) # get pointers to training data on each worker by # sending some training data to bob and alice data_bob = data[0:2] target_bob = target[0:2] data_alice = data[2:] target_alice = target[2:] data_bob = data_bob.send(bob) data_alice = data_alice.send(alice) target_bob = target_bob.send(bob) target_alice = target_alice.send(alice) # organize pointers into a list self.datasets = [(data_bob, target_bob), (data_alice, target_alice)]
def test_generic_federated_learning(): """ """ hook = sy.TorchHook(torch) bob = sy.VirtualWorker(hook, id="bob") alice = sy.VirtualWorker(hook, id="alice") data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1.]], requires_grad=True) target = torch.tensor([[0], [0], [1], [1.]], requires_grad=True) data_bob = data[0:2] target_bob = target[0:2] data_alice = data[2:] target_alice = target[2:] model = nn.Linear(2, 1) opt = optim.SGD(params=model.parameters(), lr=0.1) data_bob = data_bob.send(bob) data_alice = data_alice.send(alice) target_bob = target_bob.send(bob) target_alice = target_alice.send(alice) datasets = [(data_bob, target_bob), (data_alice, target_alice)] def train(): opt = optim.SGD(params=model.parameters(), lr=0.1) for _ in range(2): for data, target in datasets: model.send(data.location) opt.zero_grad() pred = model(data) loss = ((pred - target)**2).sum() loss.backward() opt.step() model.get() train()
def test_plan_built_on_method(hook): """ Test @sy.meth2plan and plan send / get / send """ x11 = torch.tensor([-1, 2.0]).tag("input_data") x12 = torch.tensor([1, -2.0]).tag("input_data2") x21 = torch.tensor([-1, 2.0]).tag("input_data") x22 = torch.tensor([1, -2.0]).tag("input_data2") device_1 = sy.VirtualWorker(hook, id="device_1", data=(x11, x12)) device_2 = sy.VirtualWorker(hook, id="device_2", data=(x21, x22)) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.fc1 = nn.Linear(2, 3) self.fc2 = nn.Linear(3, 2) @sy.method2plan def forward(self, x): x = F.relu(self.fc1(x)) x = self.fc2(x) return F.log_softmax(x, dim=0) net = Net() net.send(device_1) net.forward.send(device_1) pointer_to_data = device_1.search("input_data")[0] pointer_to_result = net(pointer_to_data) pointer_to_result.get() net.get() net.forward.get() net.send(device_2) net.forward.send(device_2) pointer_to_data = device_2.search("input_data")[0] pointer_to_result = net(pointer_to_data) pointer_to_result.get()
def get_dataloaders(file, logs, tr_data_dstr, test_data_distr, num_workers=4, train_batch_size=16, test_batch_size=16, size_split=None): dataset = pd.read_csv(file, low_memory=False, squeeze=True) # Create Virtual Workers hook = sy.TorchHook(torch) workers = [] for idx in range(num_workers): workers.append(sy.VirtualWorker(hook, id="worker" + str(idx))) # Set aside the test dataset, which will be the same for all the workers train_data, test_data = _train_validation_split(dataset, 10) # If by_attack - ignore the number of workers if tr_data_dstr == "by_attack" or test_data_distr == "by_attack": num_workers = 4 distr = data_distribution.Distribute(num_workers) train_data_subsets, train_distribution = distr.perform_split( tr_data_dstr, train_data, size=size_split) test_data_subsets, test_distribution = distr.perform_split( test_data_distr, test_data) logs.plot_distribution(train_distribution, "train_distribution") logs.plot_distribution(test_distribution, "test_distribution") logs.save_loaders(train_data_subsets, test_data_subsets) # Remember how many samples each worker have (needed for FedAvrg) worker_sizes = [] for value in train_data_subsets.values(): worker_sizes.append(len(value)) assert len(worker_sizes) == len(workers) fed_dataset_train = _distribute_among_workers(train_data_subsets, workers) fed_dataset_test = _distribute_among_workers(test_data_subsets, workers) fed_loader_train = sy.FederatedDataLoader(fed_dataset_train, batch_size=train_batch_size, shuffle=True) fed_loader_test = sy.FederatedDataLoader(fed_dataset_test, batch_size=test_batch_size, shuffle=True) return fed_loader_train, fed_loader_test, workers, worker_sizes
def test_send_frozen(): hook = syft.TorchHook(torch) worker = syft.VirtualWorker(hook, id="worker") d_in, h, d_out = 1000, 100, 10 model = torch.nn.Sequential(torch.nn.Linear(d_in, h), torch.nn.ReLU(), torch.nn.Linear(h, d_out)) for param in model.parameters(): param.requires_grad = False model.send(worker)
def serialize_model_params(params): """Serializes list of tensors into State/protobuf.""" model_params_state = State(state_placeholders=[ PlaceHolder().instantiate(param) for param in params ]) # make fake local worker for serialization worker = sy.VirtualWorker(hook=None) pb = protobuf.serde._bufferize(worker, model_params_state) serialized_state = pb.SerializeToString() return serialized_state
def test_that_vertical_dataset_can_return_datsets(): sy.local_worker.clear_objects() alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False) bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False) inputs = th.tensor([1, 2, 3, 4.0]) targets = th.tensor([1, 2, 3, 4.0]) datset = PartitionedDataset(data=inputs, targets=targets) vertical_dataset = datset.vertically_federate((alice, bob)) assert vertical_dataset.workers == ["alice", "bob"] # Collect alice's dataset alice_dataset = vertical_dataset.get_dataset("alice") # VerticalDataset should only have bob now assert vertical_dataset.workers == ["bob"] alice.remove_worker_from_local_worker_registry() bob.remove_worker_from_local_worker_registry()
def test_keras_activations_fn(): hook = syft.TensorFlowHook(tf) bob = syft.VirtualWorker(hook, id="bob") x_to_give = tf.constant([-2.0, 3.0, 5.0]) expected = tf.keras.activations.relu(x_to_give) x_ptr = x_to_give.send(bob) relu_ptr = tf.keras.activations.relu(x_ptr) actual = relu_ptr.get() assert tf.math.equal(actual, expected).numpy().all()
def force_detail(worker: AbstractWorker, worker_tuple: tuple) -> tuple: worker_id, _objects, auto_add = worker_tuple worker_id = sy.serde.msgpack.serde._detail(worker, worker_id) result = sy.VirtualWorker(sy.hook, worker_id, auto_add=auto_add) _objects = sy.serde.msgpack.serde._detail(worker, _objects) result._objects = _objects # make sure they weren't accidentally double registered for _, obj in _objects.items(): if obj.id in worker._objects: del worker._objects[obj.id] return result
def connect_to_workers(n_workers): """ Simulates the existence of N workers Args: n_workers (int): No. of virtual workers to simulate Returns: N virtual workers (list(sy.VirtualWorker)) """ return [ sy.VirtualWorker( pt_hook, id=f"worker{i+1}" ).clear_objects( ) for i in range(n_workers) ]