Esempio n. 1
0
def test_vertically_federated_raises_if_more_than_two_workers_are_provided():
    sy.local_worker.clear_objects()

    alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
    bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False)
    charlie = sy.VirtualWorker(id="charlie", hook=hook, is_client_worker=False)

    inputs = th.tensor([1, 2, 3, 4.0])
    targets = th.tensor([1, 2, 3, 4.0])

    dataset = PartitionedDataset(inputs, targets)

    with pytest.raises(AssertionError):
        vertical_dataset = dataset.vertically_federate((alice, bob, charlie))

    alice.remove_worker_from_local_worker_registry()
    bob.remove_worker_from_local_worker_registry()
    charlie.remove_worker_from_local_worker_registry()
Esempio n. 2
0
def test_plan_built_on_method(hook):
    """
    Test @sy.meth2plan and plan send / get / send
    """
    hook.local_worker.is_client_worker = False

    x11 = th.tensor([-1, 2.0]).tag("input_data")
    x21 = th.tensor([-1, 2.0]).tag("input_data")

    device_1 = sy.VirtualWorker(hook, id="device_1", data=(x11, ))
    device_2 = sy.VirtualWorker(hook, id="device_2", data=(x21, ))

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)

        @sy.method2plan
        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = self.fc2(x)
            return F.log_softmax(x, dim=0)

    net = Net()

    # build
    net.forward.build(th.tensor([1, 2.0]))

    net.send(device_1)
    pointer_to_data = device_1.search("input_data")[0]
    pointer_to_result = net(pointer_to_data)

    assert isinstance(pointer_to_result.get(), th.Tensor)

    net.get()
    net.send(device_2)

    pointer_to_data = device_2.search("input_data")[0]
    pointer_to_result = net(pointer_to_data)

    assert isinstance(pointer_to_result.get(), th.Tensor)

    hook.local_worker.is_client_worker = True
Esempio n. 3
0
def run(constant_overwrites):
    config_path = ROOT_DIR / 'hyperparams.yml'
    constants = merge_dict(load_hyperparams(config_path), constant_overwrites)
    use_cuda = constants['cuda'] and torch.cuda.is_available()
    hook = sy.TorchHook(torch)

    # The organisations that will participate in training
    org1 = sy.VirtualWorker(hook, id="org1")
    org2 = sy.VirtualWorker(hook, id="org2")

    torch.manual_seed(constants['seed'])
    device = torch.device('cuda' if use_cuda else 'cpu')
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    batch_size = constants['batch_size']
    test_batch_size = constants['test_batch_size']
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset = datasets.MNIST('../data',
                             train=True,
                             download=True,
                             transform=transform)
    federated_train_loader = sy.FederatedDataLoader(dataset.federate(
        (org1, org2)),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    **kwargs)

    test_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = CNN().to(device)
    optimizer = optim.SGD(model.parameters(), lr=constants['learning_rate'])

    for epoch in range(1, constants['n_epochs'] + 1):
        train(constants, model, device, federated_train_loader, optimizer,
              epoch)
        test(constants, model, device, test_loader)

    if constants['save_model']:
        torch.save(model.state_dict(), 'mnist_cnn.pt')
Esempio n. 4
0
    def _update_node_infos(self, node_id: str):
        """ Create a new virtual worker to store/compute datasets owned by this peer.

            Args:
                node_id: ID used to identify this peer.
        """
        worker = sy.VirtualWorker(sy.hook, id=node_id)
        sy.local_worker._known_workers[node_id] = worker
        sy.local_worker.is_client_worker = False
        return worker
Esempio n. 5
0
    def test_encode_virtualWorker(self):
        # Given
        obj = sy.VirtualWorker()
        expected = {"mode": "subscribe", "obj": {"__worker__": 0}}

        # When
        result = self.cut.encode(obj)

        # Then
        self.assertDictEqual(expected, result)
Esempio n. 6
0
def test_serde_virtual_worker(hook):
    virtual_worker = syft.VirtualWorker(hook=hook, id="deserialized_worker1")
    # Populate worker
    tensor1, tensor2 = torch.tensor([1.0, 2.0]), torch.tensor([0.0])
    ptr1, ptr2 = tensor1.send(virtual_worker), tensor2.send(virtual_worker)

    serialized_worker = serde.serialize(virtual_worker, force_full_simplification=False)
    deserialized_worker = serde.deserialize(serialized_worker)

    assert virtual_worker.id == deserialized_worker.id
Esempio n. 7
0
def generate_workers(num_workers):
    """Generates a given number of PySyft's virtual workers"""

    workers_list = []
    # init workers
    for i in range(num_workers):
        worker = sy.VirtualWorker(hook, id=str(i))
        workers_list.append(worker)

    return workers_list
Esempio n. 8
0
def get_fog_graph(hook, num_workers, num_clusters,
                  shuffle_workers=True, uniform_clusters=True, fog=True):
    # Define workers and layers
    workers = {}
    agg_map = {}
    layer = 0
    for id_ in range(num_workers):
        name = 'L{}_W{}'.format(layer, id_)
        workers[name] = sy.VirtualWorker(hook, id=name)

    layer = 1

    if not fog:
        # single layer model averaging fl
        name = 'L1_W0'
        workers[name] = sy.VirtualWorker(hook, id=name)
        worker_ids = [_ for _ in workers.keys() if 'L0' in _]
        agg_map[name] = worker_ids

        return agg_map, workers

    for num_cluster in num_clusters:
        # multi layer aggregation fog learning
        for id_ in range(num_cluster):
            name = 'L{}_W{}'.format(layer, id_)
            workers[name] = sy.VirtualWorker(hook, id=name)
        layer += 1

    for l in range(1, len(num_clusters)+1):
        clustr_ids = [_ for _ in workers.keys() if 'L{}'.format(l) in _]
        worker_ids = [_ for _ in workers.keys() if 'L{}'.format(l-1) in _]
        if shuffle_workers:
            worker_ids = list(np.array(worker_ids)[
                np.random.permutation(len(worker_ids))])
        cluster_sizes = get_cluster_sizes(len(worker_ids),
                                          len(clustr_ids), uniform_clusters)
        indices = [sum(cluster_sizes[:id_])
                   for id_ in range(len(cluster_sizes)+1)]
        for id_ in range(len(clustr_ids)):
            agg_map[clustr_ids[id_]] = worker_ids[indices[id_]: indices[id_+1]]

    return agg_map, workers
Esempio n. 9
0
def connect_to_crypto_provider():
    """ Simulates the existence of an arbitor to facilitate
        model generation & client-side utilisation

    Returns:
        Arbiter (i.e. TTP) (sy.VirtualWorker)
    """
    return sy.VirtualWorker(
        pt_hook,
        id="crypto_provider"
    ).clear_objects()
Esempio n. 10
0
def workers(hook):
    alice = syft.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
    bob = syft.VirtualWorker(id="bob", hook=hook, is_client_worker=False)
    james = syft.VirtualWorker(id="james", hook=hook, is_client_worker=False)

    bob.add_workers([alice, james])
    alice.add_workers([bob, james])
    james.add_workers([bob, alice])
    hook.local_worker.add_workers([alice, bob, james])
    # TODO: should one set this boolean to true?
    # It was done previously in self.setUp() from `test_hook.py`
    # hook.local_worker.is_client_worker = True

    output = {}
    output["me"] = hook.local_worker
    output["alice"] = alice
    output["bob"] = bob
    output["james"] = james

    return output
Esempio n. 11
0
 def create_workers(self, workers_id_list):
     logging.info("Creating workers...")
     for worker_id in workers_id_list:
         if worker_id not in self.workers:
             logging.debug("Creating the worker: {}".format(worker_id))
             self.workers[worker_id] = sy.VirtualWorker(self.hook,
                                                        id=worker_id)
         else:
             logging.debug(
                 "Worker {} exists. Skip creating this worker".format(
                     worker_id))
Esempio n. 12
0
def test_spinup_time(hook):
    """Tests to ensure that virtual workers intialized with 10000 data points
    load in under 1 seconds. This is needed to ensure that virtual workers
    spun up inside web frameworks are created quickly enough to not cause timeout errors"""
    data = []
    for i in range(10000):
        data.append(torch.Tensor(5, 5).random_(100))
    start_time = time()
    dummy = sy.VirtualWorker(hook, id="dummy", data=data)
    end_time = time()
    assert (end_time - start_time) < 1
def create_virtual_workers(num_workers=None, id_list=None):
    if id_list is None:
        if num_workers is not None:
            id_list = [f'worker_{i}' for i in range(num_workers)]
    if num_workers is not None and len(id_list) != num_workers:
        logger.warning(
            'Number of workers and id_list length not maching.'
            ' id_list will be taken and num_workers will be ignored.')

    workers = [sy.VirtualWorker(hook, id=i) for i in id_list]
    return workers, id_list
Esempio n. 14
0
def test_pointer_tensor_simplify():
    """Test the simplification of PointerTensor"""

    alice = syft.VirtualWorker(syft.torch.hook, id="alice")
    input_tensor = PointerTensor(id=1000, location=alice, owner=alice)

    output = serde._simplify(input_tensor)

    assert output[1][0] == input_tensor.id
    assert output[1][1] == input_tensor.id_at_location
    assert output[1][2] == input_tensor.owner.id
Esempio n. 15
0
def test_vertically_federate_raises_if_dataset_does_not_have_data_and_targets():
    sy.local_worker.clear_objects()

    alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
    bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False)

    inputs = th.tensor([1, 2, 3, 4.0])
    targets = th.tensor([1, 2, 3, 4.0])

    data_only_dataset = PartitionedDataset(data=inputs)

    with pytest.raises(AssertionError):
        vertical_dataset = data_only_dataset.vertically_federate((alice, bob))

    targets_only_dataset = PartitionedDataset(targets=targets)

    with pytest.raises(AssertionError):
        vertical_dataset = targets_only_dataset.vertically_federate((alice, bob))

    alice.remove_worker_from_local_worker_registry()
    bob.remove_worker_from_local_worker_registry()
    def init_workers(self, num_workers):
        workers = []
        for i in range(num_workers):
            worker = {}
            worker["instance"] = syft.VirtualWorker(hook, id=f"worker_{i}")
            worker["model"] = None
            worker["optim"] = None
            worker["criterion"] = None
            worker["loss"] = None

            workers.append(worker)
        return workers
Esempio n. 17
0
def setUpModule():
    print("setup module")

    global me
    global bob
    global alice
    global james
    global hook

    hook = sy.TorchHook(verbose=True)

    me = hook.local_worker
    me.is_client_worker = False

    bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False)
    alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
    james = sy.VirtualWorker(id="james", hook=hook, is_client_worker=False)

    bob.add_workers([alice, james])
    alice.add_workers([bob, james])
    james.add_workers([bob, alice])
Esempio n. 18
0
def test_clear_object_for_worker_created_with_pre_existing_id(hook):

    worker = sy.VirtualWorker(hook, id="worker")
    worker.clear_objects()

    ptr = th.tensor([1, 2, 3]).send(worker)

    assert len(worker._known_workers[worker.id]._objects) == len(worker._objects)
    assert len(worker._objects) == 1

    # create worker with pre-existing id
    worker = sy.VirtualWorker(hook, id="worker")
    worker.clear_objects()

    assert len(worker._known_workers[worker.id]._objects) == len(worker._objects)
    assert len(worker._objects) == 0

    ptr = th.tensor([1, 2, 3]).send(worker)

    assert len(worker._known_workers[worker.id]._objects) == len(worker._objects)
    assert len(worker._objects) == 1
def train_federated(trainset, num_workers=10, epochs=5, batch_size=64):
    manager = sy.VirtualWorker(hook, id="manager")
    workers = create_workers(num_workers)
    worker_data = send_train_data_to_workers(workers, trainset, batch_size)

    model = MNISTClassifier()
    criterion = nn.NLLLoss()

    for e in range(epochs):
        models = [model.copy().send(worker) for worker in workers]
        optimizers = [
            optim.Adam(model.parameters(), lr=0.003) for model in models
        ]

        mean_loss = 0

        for i, worker in enumerate(worker_data):
            model = models[i]
            optimizer = optimizers[i]

            # batches
            for images, labels in worker:
                optimizer.zero_grad()

                log_ps = model(images.float())
                loss = criterion(log_ps, labels)
                loss.backward()
                optimizer.step()

                mean_loss += (loss.get().item() / len(worker_data))

        for model in models:
            model.move(manager)

        with torch.no_grad():
            fc_layers = ("fc1", "fc2", "fc3", "fc4")

            for layer in fc_layers:
                weights = torch.stack(
                    [getattr(m, layer).weight for m in models])
                biases = torch.stack([getattr(m, layer).bias for m in models])

                mean_weight = torch.mean(weights).get()
                mean_bias = torch.mean(biases).get()

                model_layer = getattr(model, layer)
                model_layer.weight.set_(mean_weight)
                model_layer.bias.set_(mean_bias)

        model = model.get()

        print("Epoch: {:0>2d}/{:0>2d} | ".format(e + 1, epochs),
              "Mean training Loss: {:.3f} ".format(mean_loss))
Esempio n. 20
0
def get_federated_dataset(data, users, context_size, hook):
    users_data = []
    workers = []
    for user in users:
        user_worker = sy.VirtualWorker(hook, id = user)
        cur_data = data[data.user == user]
        X, Y = extend_data(cur_data.X, cur_data.Y, context_size)
        X = th.tensor(X)
        Y = th.tensor(Y)
        users_data.append(sy.BaseDataset(X, Y).send(user_worker))
        workers.append(user_worker)
    return sy.FederatedDataset(users_data), workers
    def setUp(self):
        hook = sy.TorchHook(torch, verbose=True)

        self.me = hook.local_worker
        self.me.is_client_worker = True

        instance_id = str(int(10e10 * random.random()))
        bob = sy.VirtualWorker(id=f"bob{instance_id}", hook=hook, is_client_worker=False)
        alice = sy.VirtualWorker(id=f"alice{instance_id}", hook=hook, is_client_worker=False)
        james = sy.VirtualWorker(id=f"james{instance_id}", hook=hook, is_client_worker=False)

        bob.add_workers([alice, james])
        alice.add_workers([bob, james])
        james.add_workers([bob, alice])

        self.hook = hook

        self.bob = bob
        self.alice = alice
        self.james = james

        # A Toy Dataset
        data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1.0]], requires_grad=True)
        target = torch.tensor([[0], [0], [1], [1.0]], requires_grad=True)

        # get pointers to training data on each worker by
        # sending some training data to bob and alice
        data_bob = data[0:2]
        target_bob = target[0:2]

        data_alice = data[2:]
        target_alice = target[2:]

        data_bob = data_bob.send(bob)
        data_alice = data_alice.send(alice)
        target_bob = target_bob.send(bob)
        target_alice = target_alice.send(alice)

        # organize pointers into a list
        self.datasets = [(data_bob, target_bob), (data_alice, target_alice)]
Esempio n. 22
0
def test_generic_federated_learning():
    """

    """
    hook = sy.TorchHook(torch)

    bob = sy.VirtualWorker(hook, id="bob")
    alice = sy.VirtualWorker(hook, id="alice")

    data = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1.]], requires_grad=True)
    target = torch.tensor([[0], [0], [1], [1.]], requires_grad=True)

    data_bob = data[0:2]
    target_bob = target[0:2]

    data_alice = data[2:]
    target_alice = target[2:]

    model = nn.Linear(2, 1)
    opt = optim.SGD(params=model.parameters(), lr=0.1)

    data_bob = data_bob.send(bob)
    data_alice = data_alice.send(alice)
    target_bob = target_bob.send(bob)
    target_alice = target_alice.send(alice)
    datasets = [(data_bob, target_bob), (data_alice, target_alice)]

    def train():
        opt = optim.SGD(params=model.parameters(), lr=0.1)
        for _ in range(2):
            for data, target in datasets:
                model.send(data.location)
                opt.zero_grad()
                pred = model(data)
                loss = ((pred - target)**2).sum()
                loss.backward()
                opt.step()
                model.get()

    train()
Esempio n. 23
0
def test_plan_built_on_method(hook):
    """
    Test @sy.meth2plan and plan send / get / send
    """
    x11 = torch.tensor([-1, 2.0]).tag("input_data")
    x12 = torch.tensor([1, -2.0]).tag("input_data2")
    x21 = torch.tensor([-1, 2.0]).tag("input_data")
    x22 = torch.tensor([1, -2.0]).tag("input_data2")

    device_1 = sy.VirtualWorker(hook, id="device_1", data=(x11, x12))
    device_2 = sy.VirtualWorker(hook, id="device_2", data=(x21, x22))

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)

        @sy.method2plan
        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = self.fc2(x)
            return F.log_softmax(x, dim=0)

    net = Net()

    net.send(device_1)
    net.forward.send(device_1)
    pointer_to_data = device_1.search("input_data")[0]
    pointer_to_result = net(pointer_to_data)
    pointer_to_result.get()

    net.get()
    net.forward.get()

    net.send(device_2)
    net.forward.send(device_2)
    pointer_to_data = device_2.search("input_data")[0]
    pointer_to_result = net(pointer_to_data)
    pointer_to_result.get()
Esempio n. 24
0
def get_dataloaders(file,
                    logs,
                    tr_data_dstr,
                    test_data_distr,
                    num_workers=4,
                    train_batch_size=16,
                    test_batch_size=16,
                    size_split=None):

    dataset = pd.read_csv(file, low_memory=False, squeeze=True)

    # Create Virtual Workers
    hook = sy.TorchHook(torch)
    workers = []
    for idx in range(num_workers):
        workers.append(sy.VirtualWorker(hook, id="worker" + str(idx)))

    # Set aside the test dataset, which will be the same for all the workers
    train_data, test_data = _train_validation_split(dataset, 10)

    # If by_attack - ignore the number of workers
    if tr_data_dstr == "by_attack" or test_data_distr == "by_attack":
        num_workers = 4

    distr = data_distribution.Distribute(num_workers)

    train_data_subsets, train_distribution = distr.perform_split(
        tr_data_dstr, train_data, size=size_split)
    test_data_subsets, test_distribution = distr.perform_split(
        test_data_distr, test_data)

    logs.plot_distribution(train_distribution, "train_distribution")
    logs.plot_distribution(test_distribution, "test_distribution")
    logs.save_loaders(train_data_subsets, test_data_subsets)

    # Remember how many samples each worker have (needed for FedAvrg)
    worker_sizes = []
    for value in train_data_subsets.values():
        worker_sizes.append(len(value))
    assert len(worker_sizes) == len(workers)

    fed_dataset_train = _distribute_among_workers(train_data_subsets, workers)
    fed_dataset_test = _distribute_among_workers(test_data_subsets, workers)

    fed_loader_train = sy.FederatedDataLoader(fed_dataset_train,
                                              batch_size=train_batch_size,
                                              shuffle=True)
    fed_loader_test = sy.FederatedDataLoader(fed_dataset_test,
                                             batch_size=test_batch_size,
                                             shuffle=True)

    return fed_loader_train, fed_loader_test, workers, worker_sizes
Esempio n. 25
0
def test_send_frozen():
    hook = syft.TorchHook(torch)
    worker = syft.VirtualWorker(hook, id="worker")

    d_in, h, d_out = 1000, 100, 10

    model = torch.nn.Sequential(torch.nn.Linear(d_in, h), torch.nn.ReLU(),
                                torch.nn.Linear(h, d_out))

    for param in model.parameters():
        param.requires_grad = False

    model.send(worker)
Esempio n. 26
0
    def serialize_model_params(params):
        """Serializes list of tensors into State/protobuf."""
        model_params_state = State(state_placeholders=[
            PlaceHolder().instantiate(param) for param in params
        ])

        # make fake local worker for serialization
        worker = sy.VirtualWorker(hook=None)

        pb = protobuf.serde._bufferize(worker, model_params_state)
        serialized_state = pb.SerializeToString()

        return serialized_state
Esempio n. 27
0
def test_that_vertical_dataset_can_return_datsets():
    sy.local_worker.clear_objects()

    alice = sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
    bob = sy.VirtualWorker(id="bob", hook=hook, is_client_worker=False)

    inputs = th.tensor([1, 2, 3, 4.0])
    targets = th.tensor([1, 2, 3, 4.0])

    datset = PartitionedDataset(data=inputs, targets=targets)

    vertical_dataset = datset.vertically_federate((alice, bob))
    assert vertical_dataset.workers == ["alice", "bob"]

    # Collect alice's dataset
    alice_dataset = vertical_dataset.get_dataset("alice")

    # VerticalDataset should only have bob now
    assert vertical_dataset.workers == ["bob"]

    alice.remove_worker_from_local_worker_registry()
    bob.remove_worker_from_local_worker_registry()
Esempio n. 28
0
def test_keras_activations_fn():
    hook = syft.TensorFlowHook(tf)
    bob = syft.VirtualWorker(hook, id="bob")

    x_to_give = tf.constant([-2.0, 3.0, 5.0])
    expected = tf.keras.activations.relu(x_to_give)

    x_ptr = x_to_give.send(bob)

    relu_ptr = tf.keras.activations.relu(x_ptr)
    actual = relu_ptr.get()

    assert tf.math.equal(actual, expected).numpy().all()
Esempio n. 29
0
    def force_detail(worker: AbstractWorker, worker_tuple: tuple) -> tuple:
        worker_id, _objects, auto_add = worker_tuple
        worker_id = sy.serde.msgpack.serde._detail(worker, worker_id)

        result = sy.VirtualWorker(sy.hook, worker_id, auto_add=auto_add)
        _objects = sy.serde.msgpack.serde._detail(worker, _objects)
        result._objects = _objects

        # make sure they weren't accidentally double registered
        for _, obj in _objects.items():
            if obj.id in worker._objects:
                del worker._objects[obj.id]

        return result
Esempio n. 30
0
def connect_to_workers(n_workers):
    """ Simulates the existence of N workers

    Args:
        n_workers (int): No. of virtual workers to simulate
    Returns:
        N virtual workers (list(sy.VirtualWorker))
    """
    return [
        sy.VirtualWorker(
            pt_hook, id=f"worker{i+1}"
        ).clear_objects(
        ) for i in range(n_workers)
    ]