예제 #1
0
def start_websocket_server_worker(id,
                                  host,
                                  port,
                                  hook,
                                  verbose,
                                  keep_labels=None,
                                  training=True):
    """Helper function for spinning up a websocket server and setting up the local datasets."""

    server = WebsocketServerWorker(id=id,
                                   host=host,
                                   port=port,
                                   hook=hook,
                                   verbose=verbose)

    # Setup toy data (mnist example)
    mnist_dataset = datasets.MNIST(
        root="./data",
        train=training,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ]),
    )

    if training:
        indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8")
        logger.info("number of true indices: %s", indices.sum())
        selected_data = (torch.native_masked_select(
            mnist_dataset.data.transpose(0, 2),
            torch.tensor(indices)).view(28, 28, -1).transpose(2, 0))
        logger.info("after selection: %s", selected_data.shape)
        selected_targets = torch.native_masked_select(mnist_dataset.targets,
                                                      torch.tensor(indices))

        dataset = sy.BaseDataset(data=selected_data,
                                 targets=selected_targets,
                                 transform=mnist_dataset.transform)
        key = "mnist"
    else:
        dataset = sy.BaseDataset(
            data=mnist_dataset.data,
            targets=mnist_dataset.targets,
            transform=mnist_dataset.transform,
        )
        key = "mnist_testing"

    server.add_dataset(dataset, key=key)

    logger.info("datasets: %s", server.datasets)
    if training:
        logger.info("len(datasets[mnist]): %s", len(server.datasets["mnist"]))

    server.start()
    return server
예제 #2
0
def setup_FL_env(training_datasets, validation_datasets,
                 testing_dataset, is_shared=False):
    """ Sets up a basic federated learning environment using virtual workers,
        with a allocated arbiter (i.e. TTP) to faciliate in model development
        & utilisation, and deploys datasets to their respective workers

    Args:

        training_datasets   (dict(tuple(th.Tensor))): Datasets to be used for training
        validation_datasets (dict(tuple(th.Tensor))): Datasets to be used for validation
        testing_dataset           (tuple(th.Tensor)): Datasets to be used for testing
        is_shared (bool): Toggles if SMPC encryption protocols are active
    Returns:
        training_pointers  (dict(sy.BaseDataset))
        validation_pointer (dict(sy.BaseDataset))
        testing_pointer    (sy.BaseDataset)
        workers            (list(sy.VirtualWorker))
        crypto_provider    (sy.VirtualWorker)
    """
    # Simulate FL computation amongst K worker nodes,
    # where K is the no. of datasets to be federated
    workers = connect_to_workers(n_workers=len(training_datasets))

    # Allow for 1 exchanger/Arbiter (i.e. TTP)
    crypto_provider = connect_to_crypto_provider()
    crypto_provider.clear_objects()

    assert (len(crypto_provider._objects) == 0)

    # Send training & validation datasets to their respective workers
    training_pointers = {}
    validation_pointers = {}
    for w_idx in range(len(workers)):

        # Retrieve & prepare worker for receiving dataset
        curr_worker = workers[w_idx]
        curr_worker.clear_objects()

        assert (len(curr_worker._objects) == 0)

        train_data = training_datasets[w_idx]
        validation_data = validation_datasets[w_idx]

        # Cast dataset into a Tensor & send it to the relevant worker
        train_pointer = sy.BaseDataset(*train_data).send(curr_worker)
        validation_pointer = sy.BaseDataset(*validation_data).send(curr_worker)

        # Store data pointers for subsequent reference
        training_pointers[curr_worker] = train_pointer
        validation_pointers[curr_worker] = validation_pointer

    # 'Me' serves as the client -> test pointer stays with me, but is shared via SMPC
    testing_pointer = sy.BaseDataset(*testing_dataset).send(crypto_provider)

    return training_pointers, validation_pointers, testing_pointer, workers, crypto_provider
def start_websocket_server_worker(id,
                                  host,
                                  port,
                                  hook,
                                  verbose,
                                  keep_labels=None,
                                  training=True):
    """Helper function for spinning up a websocket server and setting up the local datasets."""
    d = load_cnn_virus()
    server = websocket_server.WebsocketServerWorker(id=id,
                                                    host=host,
                                                    port=port,
                                                    hook=hook,
                                                    verbose=verbose)

    if training:
        #print(d[0].shape)
        #print(mnist_dataset.data.transpose(0, 2).shape)
        indices = np.isin(d[1], keep_labels).astype("uint8")
        #print((torch.tensor(indices)).shape)
        logger.info("number of true indices: %s", indices.sum())
        selected_data = (torch.native_masked_select(d[0].transpose(
            0, 1), torch.tensor(indices)).view(470,
                                               -1).transpose(1, 0).to(device))
        logger.info("after selection: %s", selected_data.shape)
        selected_targets = torch.native_masked_select(
            d[1], torch.tensor(indices)).to(device)

        dataset = sy.BaseDataset(data=selected_data, targets=selected_targets)
        key = "mnist"
    else:
        dataset = sy.BaseDataset(
            data=d[0].to(device),
            targets=d[1].to(device),
        )
        key = "mnist_testing"

    server.add_dataset(dataset, key=key)
    count = [0] * 5
    logger.info("MNIST dataset (%s set), available numbers on %s: ",
                "train" if training else "test", id)
    for i in range(5):
        count[i] = (dataset.targets == i).sum().item()
        logger.info("      %s: %s", i, count[i])

    logger.info("datasets: %s", server.datasets)
    if training:
        logger.info("len(datasets[mnist]): %s", len(server.datasets[key]))

    server.start()
    return server
예제 #4
0
def prepare_training(hook, alice):  # pragma: no cover

    data, target = utils.create_gaussian_mixture_toy_data(nr_samples=100)
    dataset_key = "gaussian_mixture"

    dataset = sy.BaseDataset(data, target)
    alice.add_dataset(dataset, key=dataset_key)

    @hook.torch.jit.script
    def loss_fn(pred, target):
        return ((pred - target.unsqueeze(1))**2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)
            self.fc3 = nn.Linear(2, 1)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()

    model = torch.jit.trace(model_untraced, data)

    pred = model(data)
    loss_before = loss_fn(target=target, pred=pred)
    return model, loss_fn, data, target, loss_before, dataset_key
예제 #5
0
def main(number, start_slice, end_slice):
    mnist_dataset = TrainDataset(transform=transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))]),
                                 number=number,
                                 start_slice=start_slice,
                                 end_slice=end_slice)
    _id = 'h%s' % number
    ip = '10.0.0.%s' % number

    hook = syft.TorchHook(torch)

    server = WebsocketServerWorker(id=_id,
                                   host=ip,
                                   port=8778,
                                   hook=hook,
                                   verbose=True)
    print("Worker:{}, Dataset contains {}".format(_id,
                                                  str(len(
                                                      mnist_dataset.data))))
    dataset = syft.BaseDataset(data=mnist_dataset.data,
                               targets=mnist_dataset.target,
                               transform=mnist_dataset.transform)
    key = "targeted"
    server.add_dataset(dataset, key=key)
    server.start()
예제 #6
0
def get_mnist_dataset(keep_labels, training=True):
    """
    Sets up MNIST dataset for training or testing.
    """
    mnist_dataset = datasets.MNIST(
        root="./data",
        train=training,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ]),
    )

    # create mnist training
    indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8")
    logger.info("number of true indices: %s", indices.sum())
    selected_data = (torch.masked_select(mnist_dataset.data.transpose(0, 2),
                                         torch.tensor(indices)).view(
                                             28, 28, -1).transpose(2, 0))
    logger.info("after selection: %s", selected_data.shape)
    selected_targets = torch.masked_select(mnist_dataset.targets,
                                           torch.tensor(indices))

    return sy.BaseDataset(data=selected_data,
                          targets=selected_targets,
                          transform=mnist_dataset.transform)
예제 #7
0
def dataset_federate_noniid(dataset, workers, Ratio=[1, 1, 1], net='NOT CNN'):
    """
    Add a method to easily transform a torch.Dataset or a sy.BaseDataset
    into a sy.FederatedDataset. The dataset given is split in len(workers)
    part and sent to each workers
    """
    logger.info(
        f"Scanning and sending data to {', '.join([w.id for w in workers])}..."
    )
    datasets = []
    N = 0
    dataset_list = list(dataset)
    for n in range(0, len(workers)):
        ratio = Ratio[n] / sum(Ratio)  #计算比例
        num = round(ratio * len(dataset))  #根据比例计算要抽取的数据的长度
        Subset = dataset_list[N:N + num]  #抽取数据
        N = N + num
        data = []
        targets = []
        for d, t in Subset:
            data.append(d)
            targets.append(t)

        data = torch.cat(data)
        if net == 'CNN':
            data = torch.unsqueeze(data, 1)
        targets = torch.tensor(targets)
        worker = workers[n]
        logger.debug("Sending data to worker %s", worker.id)
        data = data.send(worker)
        targets = targets.send(worker)
        datasets.append(sy.BaseDataset(data, targets))  # .send(worker)

    logger.debug("Done!")
    return sy.FederatedDataset(datasets)
def run_server(i, indices):
    import torch  # Each process should import torch to allow parallelization?

    hook = sy.TorchHook(torch)
    server = CustomWebsocketServerWorker(id=f"dataserver-{i}",
                                         host="0.0.0.0",
                                         port=f"{8777 + i}",
                                         hook=hook)

    mnist = datasets.MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ]),
    )

    is_kept_mask = torch.tensor(
        [x in indices for x in range(len(mnist.targets))])

    dataset = sy.BaseDataset(
        data=torch.masked_select(mnist.data.transpose(0, 2),
                                 is_kept_mask).view(28, 28,
                                                    -1).transpose(2, 0),
        targets=torch.masked_select(mnist.targets, is_kept_mask),
        transform=mnist.transform)

    server.add_dataset(dataset, key="mnist")
    print(f"Server {i} started")
    server.start()
예제 #9
0
def test_train_config_with_jit_trace(hook, workers):  # pragma: no cover
    alice = workers["alice"]
    me = workers["me"]

    data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]],
                        requires_grad=True)
    target = torch.tensor([[1], [0], [1], [0]])

    dataset = sy.BaseDataset(data, target)
    alice.add_dataset(dataset, key="vectors")

    @hook.torch.jit.script
    def loss_fn(real, pred):
        return ((real.float() - pred.float())**2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)
            self.fc3 = nn.Linear(2, 1)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()

    model = torch.jit.trace(model_untraced, data)
    model_with_id = pointers.ObjectWrapper(model, sy.ID_PROVIDER.pop())

    loss_fn_with_id = pointers.ObjectWrapper(loss_fn, sy.ID_PROVIDER.pop())

    model_ptr = me.send(model_with_id, alice)
    loss_fn_ptr = me.send(loss_fn_with_id, alice)

    print("Evaluation before training")
    pred = model(data)
    loss_before = loss_fn(real=target, pred=pred)
    print("Loss: {}".format(loss_before))

    # Create and send train config
    train_config = sy.TrainConfig(model=model, loss_fn=loss_fn, batch_size=2)
    train_config.send(alice)

    for epoch in range(5):
        loss = alice.fit(dataset="vectors")
        print("-" * 50)
        print("Iteration %s: alice's loss: %s" % (epoch, loss))

    print("Evaluation after training:")
    new_model = model_ptr.get()
    pred = new_model.obj(data)
    loss_after = loss_fn(real=target, pred=pred)
    print("Loss: {}".format(loss_after))

    assert loss_after < loss_before
예제 #10
0
def test_train_config_with_jit_trace(hook, workers):  # pragma: no cover
    alice = workers["alice"]

    data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]],
                        requires_grad=True)
    target = torch.tensor([[1], [0], [1], [0]])

    dataset = sy.BaseDataset(data, target)
    alice.add_dataset(dataset, key="gaussian_mixture")

    @hook.torch.jit.script
    def loss_fn(pred, target):
        return ((target.float() - pred.float())**2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)
            self.fc3 = nn.Linear(2, 1)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()

    model = torch.jit.trace(model_untraced, data)

    if PRINT_IN_UNITTESTS:
        print("Evaluation before training")
    pred = model(data)
    loss_before = loss_fn(target=target, pred=pred)

    if PRINT_IN_UNITTESTS:
        print("Loss: {}".format(loss_before))

    # Create and send train config
    train_config = sy.TrainConfig(model=model, loss_fn=loss_fn, batch_size=2)
    train_config.send(alice)

    for epoch in range(5):
        loss = alice.fit(dataset_key="gaussian_mixture")
        if PRINT_IN_UNITTESTS:  # pragma: no cover:
            print("-" * 50)
            print("Iteration %s: alice's loss: %s" % (epoch, loss))

    new_model = train_config.model_ptr.get()
    pred = new_model.obj(data)
    loss_after = loss_fn(target=target, pred=pred)

    if PRINT_IN_UNITTESTS:  # pragma: no cover:
        print("Loss before training: {}".format(loss_before))
        print("Loss after training: {}".format(loss_after))

    assert loss_after < loss_before
예제 #11
0
def test_fit():
    data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True)
    target = torch.tensor([[1], [0], [1], [0]])

    fed_client = federated.FederatedClient()
    dataset = sy.BaseDataset(data, target)
    fed_client.add_dataset(dataset, key="vectors")

    def loss_fn(real, pred):
        return ((real.float() - pred.float()) ** 2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = torch.nn.Linear(2, 3)
            self.fc2 = torch.nn.Linear(3, 2)
            self.fc3 = torch.nn.Linear(2, 1)

        def forward(self, x):
            x = torch.nn.functional.relu(self.fc1(x))
            x = torch.nn.functional.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()
    model = torch.jit.trace(model_untraced, data)
    model_id = 0
    model_ow = pointers.ObjectWrapper(obj=model, id=model_id)
    loss_id = 1
    loss_ow = pointers.ObjectWrapper(obj=loss_fn, id=loss_id)

    print("Evaluation before training")
    pred = model(data)
    loss_before = loss_fn(real=target, pred=pred)
    print("Loss: {}".format(loss_before))

    # Create and send train config
    train_config = sy.TrainConfig(
        batch_size=1, model=None, loss_fn=None, model_id=model_id, loss_fn_id=loss_id
    )

    fed_client.set_obj(model_ow)
    fed_client.set_obj(loss_ow)
    fed_client.set_obj(train_config)

    for epoch in range(5):
        loss = fed_client.fit(dataset_key="vectors")
        print("-" * 50)
        print("Iteration %s: alice's loss: %s" % (epoch, loss))

    print("Evaluation after training:")
    new_model = fed_client.get_obj(model_id)
    pred = new_model.obj(data)
    loss_after = loss_fn(real=target, pred=pred)
    print("Loss: {}".format(loss_after))

    assert loss_after < loss_before
예제 #12
0
def _distribute_among_workers(dataset, workers):
    datasets = []

    for i, data in dataset.items():
        x_train, y_train = _data_target_split(data)

        data = x_train.send(workers[i])
        targets = y_train.send(workers[i])
        datasets.append(sy.BaseDataset(data, targets))

    return sy.FederatedDataset(datasets)
예제 #13
0
def get_federated_dataset(data, users, context_size, hook):
    users_data = []
    workers = []
    for user in users:
        user_worker = sy.VirtualWorker(hook, id = user)
        cur_data = data[data.user == user]
        X, Y = extend_data(cur_data.X, cur_data.Y, context_size)
        X = th.tensor(X)
        Y = th.tensor(Y)
        users_data.append(sy.BaseDataset(X, Y).send(user_worker))
        workers.append(user_worker)
    return sy.FederatedDataset(users_data), workers
예제 #14
0
파일: Simple-h1.py 프로젝트: iansee/FYP
def main():
    hook = syft.TorchHook(torch)
    data = torch.tensor([[1.0], [2.0], [3.0], [4.0]], requires_grad=True)
    target = torch.tensor([[1.0], [2.0], [3.0], [4.0]], requires_grad=False)
    dataset = syft.BaseDataset(data, target)

    h1 = WebsocketServerWorker(id="h1",
                               host="10.0.0.1",
                               port="8778",
                               hook=hook)
    h1.add_dataset(dataset, key="train")
    h1.start()
    return h1
예제 #15
0
def init_syft_workers(eth_accounts):
    # create 3 workers and split the data between them
    # aggregator for now just: hosts the data, tests the model
    # for future discussion: MPC, Rewards, Crypto provider
    workers = data_utils.generate_virtual_workers(number_of_workers, hook)
    worker_eth_accounts = dict()

    # assign each worker a unique ethereum acount
    for i, worker in enumerate(workers):
        worker_eth_accounts[worker.id] = eth_accounts[i+1]

    central_server = sy.VirtualWorker(hook, id="aggregator")

    # Use sklearn to split into train and test
    X_train, X_val, y_train, y_val = train_test_split(
        df.drop(["ICU"], 1),
        df["ICU"],
        test_size=0.2,
        random_state=101,
        stratify=df["ICU"]
    )

    # Create a federated dataset using BaseDataset for all train
    # frames and randomly share them in an IID manner between clients
    record_list, result_list = data_utils.split_into_lists(X_train, y_train)
    record_list = data_utils.convert_to_tensors(record_list)
    base_federated_set = sy.BaseDataset(
        record_list, result_list).federate(workers)
    federated_train_loader = sy.FederatedDataLoader(base_federated_set)

    test_list, test_labels = data_utils.split_into_lists(X_val, y_val)
    test_list = data_utils.convert_to_tensors(test_list)
    test_dataset = sy.BaseDataset(test_list, test_labels)
    test_loader = torch.utils.data.DataLoader(test_dataset)
    # TODO: Implement, make necessary imports and
    # update requirements.txt file!
    return workers, federated_train_loader, test_loader, worker_eth_accounts
예제 #16
0
def test_fl_with_trainconfig(isolated_filesystem, start_remote_server_worker_only, hook):
    os.chdir("advanced/Federated Learning with TrainConfig/")
    notebook = "Introduction to TrainConfig.ipynb"
    p_name = Path("examples/tutorials/advanced/Federated Learning with TrainConfig/")
    tested_notebooks.append(str(p_name / notebook))
    hook.local_worker.remove_worker_from_registry("alice")
    kwargs = {"id": "alice", "host": "localhost", "port": 8777, "hook": hook}
    data = torch.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True)
    target = torch.tensor([[1.0], [1.0], [0.0], [0.0]], requires_grad=False)
    dataset = sy.BaseDataset(data, target)
    process_remote_worker = start_remote_server_worker_only(dataset=(dataset, "xor"), **kwargs)
    res = pm.execute_notebook(notebook, "/dev/null", timeout=300)
    assert isinstance(res, nbformat.notebooknode.NotebookNode)
    process_remote_worker.terminate()
    sy.VirtualWorker(id="alice", hook=hook, is_client_worker=False)
예제 #17
0
    def collect_datasets(self, grid):
        loaders = []
        tags = ['train', 'valid', 'test']
        for tag in tags:
            found_X = grid.search("#X", f"#{tag}")
            found_y = grid.search("#Y", f"#{tag}")

            datasets = []
            for worker in found_X.keys():
                datasets.append(
                    sy.BaseDataset(found_X[worker][0], found_y[worker][0]))

            dataset = sy.FederatedDataset(datasets)
            loaders.append(
                sy.FederatedDataLoader(
                    dataset, batch_size=self.model_config.batch_size))

        return loaders
예제 #18
0
    def create_femnist_datasets(self, raw_dataset, workers_idx):
        datasets = dict()

        for worker_id in workers_idx:
            images = tensor(raw_dataset[worker_id]['x'], dtype=float32)
            labels = tensor(raw_dataset[worker_id]['y'].ravel(), dtype=int64)
            dataset = sy.BaseDataset(
                images,
                labels,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(
                        (raw_dataset[worker_id]['x'].mean(), ),
                        (raw_dataset[worker_id]['x'].std(), ))
                ]))
            datasets[worker_id] = dataset

        return datasets
예제 #19
0
    def create_femnist_fed_dataset(self, raw_data, workers_idx, percentage):
        """ 
        Assume this only used for preparing aggregated dataset for the server
        Args:
            raw_data (dict): 
            workers_idx (list(int)): 
            percentage (float): Out of 100, amount of public data of each user
        Returns:
        """
        logging.info(
            "Creating the dataset from {}% of {} selected users' data...".
            format(percentage, len(workers_idx)))
        # Fraction of public data of each user, which be shared by the server
        server_images = tensor([], dtype=float32).view(-1, 28, 28)
        server_labels = tensor([], dtype=int64)
        # server_images = np.array([], dtype = np.float32).reshape(-1, 28, 28)
        # server_labels = np.array([], dtype = np.int64)
        for worker_id in workers_idx:
            worker_samples_num = len(raw_data[worker_id]['y'])
            num_samples_for_server = math.floor(
                (percentage / 100.0) * worker_samples_num)
            logging.debug(
                "Sending {} samples from worker {} with total {}".format(
                    num_samples_for_server, worker_id, worker_samples_num))
            indices = sample(range(worker_samples_num), num_samples_for_server)
            images = tensor([raw_data[worker_id]['x'][i] for i in indices],
                            dtype=float32).view(-1, 28, 28)
            labels = tensor([raw_data[worker_id]['y'][i] for i in indices],
                            dtype=int64).view(-1)
            server_images = cat((server_images, images))
            server_labels = cat((server_labels, labels))

        logging.info(
            "Selected {} samples in total for the server from {} users.".
            format(server_images.shape, len(workers_idx)))

        return sy.BaseDataset(server_images,
                              server_labels,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize(
                                      (server_images.mean().item(), ),
                                      (server_images.std().item(), ))
                              ])).federate([self.server])
예제 #20
0
def _distribute_among_workers(samplers, dataset, workers):

    datasets = []

    # Each worker have it's own sampler; len(samplers)== len(workers)
    for idx, sampler in enumerate(samplers):

        loader = DataLoader(dataset,
                            batch_size=len(sampler),
                            shuffle=False,
                            sampler=sampler)

        # Loader always contains only one batch (because batch_size=len(sampler))
        for batch in loader:
            data = batch[0].send(workers[idx].id)
            targets = batch[1].send(workers[idx].id)
            datasets.append(sy.BaseDataset(data, targets))

    return sy.FederatedDataset(datasets)
예제 #21
0
    def get_dataset_pointers(self):
        self.worker_handles = [ResearcherWorker(hook, PROXY_URL, PROXY_PORT, cookie = cookie, verbose = self.verbose, id = this_id, is_client_worker = True) for cookie, this_id in self.tokens.items()]

        self.datasets = dict()
        self.workers = []
        for worker in self.worker_handles:
            print(worker)
            # print(worker.test_hello_world())
            # print(worker._objects)
            # help(worker.list_objects_remote())
            # print(worker._remote_objects)
            this_dataset = worker.search(self.dataset_key)
            this_targets = worker.search(self.target_key)
            # this_dataset.location = worker
            # this_targets.location = worker
            remote_dataset = sy.BaseDataset(this_dataset, this_targets)
            # remote_dataset.send(worker)
            self.datasets[worker.id] = remote_dataset

            self.workers.append(worker.id)
def main(**kwargs):  # pragma: no cover
    """Helper function for spinning up a websocket participant."""

    # Create websocket worker
    worker = WebsocketServerWorker(**kwargs)

    # Setup toy data (xor example)
    data = th.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True)
    target = th.tensor([[1.0], [1.0], [0.0], [0.0]], requires_grad=False)

    # Create a dataset using the toy data
    dataset = sy.BaseDataset(data, target)

    # Tell the worker about the dataset
    worker.add_dataset(dataset, key="xor")

    # Start worker
    worker.start()

    return worker
예제 #23
0
def prepare_training(hook, alice):  # pragma: no cover

    data = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]],
                        requires_grad=True)
    target = torch.tensor([[1], [0], [1], [0]])

    dataset = sy.BaseDataset(data, target)
    alice.add_dataset(dataset, key="vectors")

    @hook.torch.jit.script
    def loss_fn(real, pred):
        return ((real.float() - pred.float())**2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)
            self.fc3 = nn.Linear(2, 1)

            nn.init.xavier_uniform_(self.fc1.weight)
            nn.init.xavier_uniform_(self.fc2.weight)
            nn.init.xavier_uniform_(self.fc3.weight)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()

    model = torch.jit.trace(model_untraced, data)

    print("Evaluation before training")
    pred = model(data)
    loss_before = loss_fn(real=target, pred=pred)
    print("Loss: {}".format(loss_before))

    return model, loss_fn, data, target, loss_before
예제 #24
0
def get_eicu_dataset(hospitalid, outcome):
    """
    Sets up the eICU dataset for training or testing.
    """
    df_x = pd.read_csv('x.csv')
    df_y = pd.read_csv('y.csv')

    # delete rows where the outcome is missing
    to_keep = ~(pd.isnull(df_y).sum(axis=1) > 0)
    df_x = df_x[to_keep]
    df_y = df_y[to_keep]

    # restrict x and y to the required hospital or test set
    to_keep = df_x.hospitalid.values == hospitalid
    df_x.drop('hospitalid', axis=1, inplace=True)
    df_x = df_x[to_keep]
    scaler = RobustScaler(quantile_range=(10.0, 90.0))
    x = scaler.fit_transform(df_x.values)
    y = df_y[outcome][to_keep].values

    return sy.BaseDataset(data=torch.from_numpy(x.astype('float32')),
                          targets=torch.from_numpy(y.astype('float32')))
예제 #25
0
def start_websocket_server_worker(id,
                                  host,
                                  port,
                                  hook,
                                  verbose,
                                  dataset,
                                  training=True):
    """Helper function for spinning up a websocket server and setting up the local datasets."""

    server = WebsocketServerWorker(id=id,
                                   host=host,
                                   port=port,
                                   hook=hook,
                                   verbose=verbose)
    dataset_key = dataset
    #if we are in the traning loop
    if training:
        with open("./data/split/%d" % int(id), "rb") as fp:  # Unpickling
            data = pickle.load(fp)
        dataset_data, dataset_target = readnpy(data)
        print(type(dataset_data.long()))
        logger.info("Number of samples for client %s is %s : ", id,
                    len(dataset_data))
        dataset = sy.BaseDataset(data=dataset_data, targets=dataset_target)
        key = dataset_key

    nb_labels = len(torch.unique(dataset_target))
    server.add_dataset(dataset, key=key)
    count = [0] * nb_labels
    logger.info("Dataset(train set) ,available numbers on %s: ", id)
    for i in range(nb_labels):
        count[i] = (dataset.targets == i).sum().item()
        logger.info("      %s: %s", i, count[i])
    logger.info("datasets: %s", server.datasets)
    if training:
        logger.info("len(datasets): %s", len(server.datasets[key]))

    server.start()
    return server
예제 #26
0
    def create_mnist_fed_datasets(self, raw_dataset):
        """
        raw_datasets (dict)
        ex.
            data: raw_datasets['worker_1']['x']
            label: raw_datasets['worker_1']['y']
        """
        fed_datasets = dict()

        for ww_id, ww_data in raw_dataset.items():
            images = tensor(ww_data['x'], dtype=float32)
            labels = tensor(ww_data['y'].ravel(), dtype=int64)
            dataset = sy.BaseDataset(images,
                                     labels,
                                     transform=transforms.Compose([
                                         transforms.ToTensor(),
                                         transforms.Normalize(
                                             (ww_data['x'].mean(), ),
                                             (ww_data['x'].std(), ))
                                     ])).federate([self.workers[ww_id]])
            fed_datasets[ww_id] = dataset

        return fed_datasets
예제 #27
0
def test_train_config_with_jit_trace_sync(
        hook, start_remote_worker):  # pragma: no cover
    data, target = utils.create_gaussian_mixture_toy_data(100)
    dataset = sy.BaseDataset(data, target)
    dataset_key = "gaussian_mixture"

    server, remote_proxy = start_remote_worker(id="sync_fit",
                                               hook=hook,
                                               port=9000,
                                               dataset=(dataset, dataset_key))

    @hook.torch.jit.script
    def loss_fn(pred, target):
        return ((target.view(pred.shape).float() - pred.float())**2).mean()

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(2, 3)
            self.fc2 = nn.Linear(3, 2)
            self.fc3 = nn.Linear(2, 1)

        def forward(self, x):
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    model_untraced = Net()

    model = torch.jit.trace(model_untraced, data)

    pred = model(data)
    loss_before = loss_fn(pred=pred, target=target)

    # Create and send train config
    train_config = sy.TrainConfig(model=model,
                                  loss_fn=loss_fn,
                                  batch_size=2,
                                  epochs=1)
    train_config.send(remote_proxy)

    for epoch in range(5):
        loss = remote_proxy.fit(dataset_key=dataset_key)
        if PRINT_IN_UNITTESTS:  # pragma: no cover
            print("-" * 50)
            print("Iteration %s: alice's loss: %s" % (epoch, loss))

    new_model = train_config.model_ptr.get()

    # assert that the new model has updated (modified) parameters
    assert not ((model.fc1._parameters["weight"] -
                 new_model.obj.fc1._parameters["weight"]).abs() < 10e-3).all()
    assert not ((model.fc2._parameters["weight"] -
                 new_model.obj.fc2._parameters["weight"]).abs() < 10e-3).all()
    assert not ((model.fc3._parameters["weight"] -
                 new_model.obj.fc3._parameters["weight"]).abs() < 10e-3).all()
    assert not ((model.fc1._parameters["bias"] -
                 new_model.obj.fc1._parameters["bias"]).abs() < 10e-3).all()
    assert not ((model.fc2._parameters["bias"] -
                 new_model.obj.fc2._parameters["bias"]).abs() < 10e-3).all()
    assert not ((model.fc3._parameters["bias"] -
                 new_model.obj.fc3._parameters["bias"]).abs() < 10e-3).all()

    new_model.obj.eval()
    pred = new_model.obj(data)
    loss_after = loss_fn(pred=pred, target=target)

    if PRINT_IN_UNITTESTS:  # pragma: no cover
        print("Loss before training: {}".format(loss_before))
        print("Loss after training: {}".format(loss_after))

    remote_proxy.close()
    server.terminate()

    assert loss_after < loss_before
예제 #28
0
def start_websocket_server_worker(
    id, host, port, hook, verbose, keep_labels=None, training=True
):  # pragma: no cover
    """Helper function for spinning up a websocket server and setting up the local datasets."""

    server = WebsocketServerWorker(id=id, host=host, port=port, hook=hook, verbose=verbose)

    # Setup toy data (mnist example)
    mnist_dataset = datasets.MNIST(
        root="./data",
        train=training,
        download=True,
        transform=transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
        ),
    )

    if training:
        indices = np.isin(mnist_dataset.targets, keep_labels).astype("uint8")
        logger.info("number of true indices: %s", indices.sum())
        selected_data = (
            torch.native_masked_select(mnist_dataset.data.transpose(0, 2), torch.tensor(indices))
            .view(28, 28, -1)
            .transpose(2, 0)
        )
        logger.info("after selection: %s", selected_data.shape)
        selected_targets = torch.native_masked_select(mnist_dataset.targets, torch.tensor(indices))

        dataset = sy.BaseDataset(
            data=selected_data, targets=selected_targets, transform=mnist_dataset.transform
        )
        key = "mnist"
    else:
        dataset = sy.BaseDataset(
            data=mnist_dataset.data,
            targets=mnist_dataset.targets,
            transform=mnist_dataset.transform,
        )
        key = "mnist_testing"

    server.add_dataset(dataset, key=key)

    # Setup toy data (vectors example)
    data_vectors = torch.tensor([[-1, 2.0], [0, 1.1], [-1, 2.1], [0, 1.2]], requires_grad=True)
    target_vectors = torch.tensor([[1], [0], [1], [0]])

    server.add_dataset(sy.BaseDataset(data_vectors, target_vectors), key="vectors")

    # Setup toy data (xor example)
    data_xor = torch.tensor([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0], [0.0, 0.0]], requires_grad=True)
    target_xor = torch.tensor([1.0, 1.0, 0.0, 0.0], requires_grad=False)

    server.add_dataset(sy.BaseDataset(data_xor, target_xor), key="xor")

    # Setup gaussian mixture dataset
    data, target = utils.create_gaussian_mixture_toy_data(nr_samples=100)
    server.add_dataset(sy.BaseDataset(data, target), key="gaussian_mixture")

    # Setup partial iris dataset
    data, target = utils.iris_data_partial()
    dataset = sy.BaseDataset(data, target)
    dataset_key = "iris"
    server.add_dataset(dataset, key=dataset_key)

    logger.info("datasets: %s", server.datasets)
    if training:
        logger.info("len(datasets[mnist]): %s", len(server.datasets["mnist"]))

    server.start()
    return server
예제 #29
0
def main():
    args = define_and_get_arguments()

    hook = sy.TorchHook(torch)

    if args.use_virtual:
        alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose)
        bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose)
    else:
        kwargs_websocket = {"host": "localhost", "hook": hook, "verbose": args.verbose}
        alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket)
        bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket)

    workers = [alice, bob]

    use_cuda = args.cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}

    if DATATHON:
        bob_data, bob_target = preprocessed_data("EICU_DB", True, 0)
        alice_data, alice_target = preprocessed_data("MIMIC_DB", True, 1)
        alice_train_dataset = sy.BaseDataset(alice_data, alice_target).send(alice)
        bob_train_dataset = sy.BaseDataset(bob_data, bob_target).send(bob)

        federated_train_dataset = sy.FederatedDataset([alice_train_dataset, bob_train_dataset])

        federated_train_loader = sy.FederatedDataLoader(federated_train_dataset, shuffle = True, batch_size=args.batch_size, iter_per_worker=True, **kwargs,)

        test_loader_mimic = get_dataloader(is_train=False, batch_size=args.batch_size, is_mimic = 1 )
        test_loader_eicu = get_dataloader(is_train=False, batch_size=args.batch_size, is_mimic = 0 )

    else:
        federated_train_loader = sy.FederatedDataLoader(
            datasets.MNIST(
                "../data",
                train=True,
                download=True,
                transform=transforms.Compose(
                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
                ),
            ).federate(tuple(workers)),
            batch_size=args.batch_size,
            shuffle=True,
            iter_per_worker=True,
            **kwargs,
        )

        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(
                "../data",
                train=False,
                transform=transforms.Compose(
                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
                ),
            ),
            batch_size=args.test_batch_size,
            shuffle=True,
            **kwargs,
        )

    model = Net().to(device)

    for epoch in range(1, args.epochs + 1):
        logger.info("Starting epoch %s/%s", epoch, args.epochs)
        model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches)
        test(model, device, test_loader_mimic, 1)
        test(model, device, test_loader_eicu, 0)

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
예제 #30
0
"""

import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler

df = pd.read_csv('mnist_train.csv')

y = df['label'].values
X = df.drop(['label'], 1).values
X = X[:len(X) // 2]
y = y[:len(y) // 2]

torch_X_train = torch.from_numpy(X).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y).type(torch.LongTensor) # data type is long

base = sy.BaseDataset(torch_X_train, torch_y_train)
base_federated = base.federate((bob, alice))

train_loader = sy.FederatedDataLoader(base_federated, batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)


"""### CNN specification
Here we use exactly the same CNN as in the official example.
"""