コード例 #1
0
ファイル: test_dataset.py プロジェクト: zeta1999/PySyft
def test_federated_dataset_search(workers):

    bob = workers["bob"]
    alice = workers["alice"]

    grid = sy.PrivateGridNetwork(*[bob, alice])

    train_bob = th.Tensor(th.zeros(1000, 100)).tag("data").send(bob)
    target_bob = th.Tensor(th.zeros(1000, 100)).tag("target").send(bob)

    train_alice = th.Tensor(th.zeros(1000, 100)).tag("data").send(alice)
    target_alice = th.Tensor(th.zeros(1000, 100)).tag("target").send(alice)

    data = grid.search("data")
    target = grid.search("target")

    datasets = [
        BaseDataset(data["bob"][0], target["bob"][0]),
        BaseDataset(data["alice"][0], target["alice"][0]),
    ]

    fed_dataset = sy.FederatedDataset(datasets)
    train_loader = sy.FederatedDataLoader(fed_dataset,
                                          batch_size=4,
                                          shuffle=False,
                                          drop_last=False)

    counter = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        counter += 1

    assert counter == len(train_loader), f"{counter} == {len(fed_dataset)}"
コード例 #2
0
def main():
    hook = sy.TorchHook(torch)
    device = torch.device("cpu")
    model = Net()
    model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))
    # model.build(torch.zeros([1, node_num], dtype=torch.float).to(device))

    @sy.func2plan()
    def loss_fn(pred, target):
        return nll_loss(input=pred, target=target)

    input_num = torch.randn(3, 5, requires_grad=True)
    target = torch.tensor([1, 0, 4])
    dummy_pred = F.log_softmax(input_num, dim=1)
    loss_fn.build(dummy_pred, target)

    built_model = model
    built_loss_fn = loss_fn

    epoch_num = 21
    batch_size = 64
    lr = 0.1
    learning_rate = lr
    optimizer_args = {"lr": lr}

    alice = NodeClient(hook, "ws://10.0.17.6:6666", id="alice")
    # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob")
    # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie")

    worker_list = [alice]
    # worker_list = [alice]
    grid = sy.PrivateGridNetwork(*worker_list)

    for epoch in range(epoch_num):

        logger.info("round %s/%s", epoch, epoch_num)

        epoch_start = time.time()

        jobs = [
            gevent.spawn(send_model_to_worker, worker, built_model)
            for worker in worker_list
        ]

        gevent.joinall(jobs)

        # results = await asyncio.gather(
        #     *[
        #         send_model_to_worker(
        #             worker=worker,
        #             built_model=built_model,
        #         )
        #         for worker in worker_list
        #     ]
        # )
        print("[PROF]", "AllWorkerSend", "duration", "COORD",
              time.time() - epoch_start)

        built_model.pointers = {}
        built_loss_fn.pointers = {}
コード例 #3
0
    def distribute_dataset(self, X, y, train_idx, test_idx, workers):
        tensor_X, tensor_y = torch.tensor(X).float(), torch.tensor(y).float()

        split = int(
            np.floor(self.model_config.validation_split * len(train_idx)))
        train_idx, valid_idx = train_idx[split:], train_idx[:split]
        indices = [train_idx, valid_idx, test_idx]
        tags = ['train', 'valid', 'test']

        for idx, tag in zip(indices, tags):
            for i, (part_x, part_y) in enumerate(
                    self.node_distribution(tensor_X[idx], tensor_y[idx],
                                           len(workers))):
                tag_X = part_x.tag("#X", f"#{tag}").describe("")
                tag_y = part_y.tag("#Y", f"#{tag}").describe("")

                tag_X.send(workers[i], garbage_collect_data=False)
                tag_y.send(workers[i], garbage_collect_data=False)

        return sy.PrivateGridNetwork(*workers)
コード例 #4
0
def test_virtual_grid(workers):
    """This tests our ability to simplify tuple types.

    This test is pretty simple since tuples just serialize to
    themselves, with a tuple wrapper with the correct ID (1)
    for tuples so that the detailer knows how to interpret it."""

    print(len(workers))
    print(workers)

    bob = workers["bob"]
    alice = workers["alice"]
    james = workers["james"]

    grid = sy.PrivateGridNetwork(*[bob, alice, james])

    x = torch.tensor([1, 2, 3, 4]).tag("#bob", "#male").send(bob)
    y = torch.tensor([1, 2, 3, 4]).tag("#alice", "#female").send(alice)
    z = torch.tensor([1, 2, 3, 4]).tag("#james", "#male").send(james)

    results = grid.search()
    assert len(results) == 3

    assert "bob" in results.keys()
    assert "alice" in results.keys()
    assert "james" in results.keys()

    results = grid.search("#bob")
    assert len(results["bob"]) == 1
    assert "alice" not in results
    assert "james" not in results

    results = grid.search("#male")
    assert len(results["bob"]) == 1
    assert "alice" not in results
    assert len(results["james"]) == 1
コード例 #5
0
async def main():
    hook = sy.TorchHook(torch)
    device = torch.device("cpu")
    optimizer = "SGD"
    epochs = 1
    shuffle = True
    model = Net()
    model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))
    # model.build(torch.zeros([2], dtype=torch.float).to(device))

    @sy.func2plan(args_shape=[(-1, 1), (-1, 1)])
    def loss_fn(target, pred):
        return ((target.view(pred.shape).float() - pred.float())**2).mean()

    batch_size = 64
    lr = 0.1
    learning_rate = lr
    optimizer_args = {"lr": lr}
    model_config = sy.ModelConfig(model=model,
                                  loss_fn=loss_fn,
                                  optimizer=optimizer,
                                  batch_size=batch_size,
                                  optimizer_args=optimizer_args,
                                  epochs=epochs,
                                  shuffle=shuffle)

    # alice = NodeClient(hook, "ws://172.16.179.20:6666" , id="alice")
    # bob = NodeClient(hook, "ws://172.16.179.21:6667" , id="bob")
    # charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie")
    #     testing = NodeClient(hook, "ws://localhost:6669" , id="testing")

    # worker_list = [alice, bob, charlie]

    worker_list = []
    for i in range(2, 2 + 12):
        worker = NodeClient(hook,
                            "ws://" + flvm_ip[i] + ":6666",
                            id="flvm-" + str(i))
        worker_list.append(worker)

    for worker in worker_list:
        model_config.send(worker)
    grid = sy.PrivateGridNetwork(*worker_list)

    num_of_parameters = len(model.parameters())
    return_ids = []
    for i in range(num_of_parameters):
        return_ids.append("p" + str(i))

    start = time.time()
    # worker_0 = worker_list[0]
    # worker_1 = worker_list[1]
    # worker_2 = worker_list[2]
    enc_results = await asyncio.gather(*[
        worker.async_model_share(worker_list, return_ids=return_ids)
        for worker in worker_list
    ])
    end = time.time()

    ## aggregation
    dst_enc_model = enc_results[0]
    agg_start = time.time()
    with torch.no_grad():
        for i in range(len(dst_enc_model)):
            layer_start = time.time()
            for j in range(1, len(enc_results)):
                add_start = time.time()
                dst_enc_model[i] += enc_results[j][i]
                print("[PROF]", "AddParams", time.time() - add_start)
            print("[PROF]", "Layer" + str(i), time.time() - layer_start)
    print("[PROF]", "AggTime", time.time() - agg_start)
コード例 #6
0
async def main():
    hook = sy.TorchHook(torch)
    device = torch.device("cpu")
    model = Net()
    model.build(torch.zeros([1, 1, 28, 28], dtype=torch.float).to(device))
    # model.build(torch.zeros([1, node_num], dtype=torch.float).to(device))

    @sy.func2plan()
    def loss_fn(pred, target):
        return nll_loss(input=pred, target=target)

    input_num = torch.randn(3, 5, requires_grad=True)
    target = torch.tensor([1, 0, 4])
    dummy_pred = F.log_softmax(input_num, dim=1)
    loss_fn.build(dummy_pred, target)

    built_model = model
    built_loss_fn = loss_fn

    epoch_num = 21
    batch_size = 64
    lr = 0.1
    learning_rate = lr
    optimizer_args = {"lr": lr}

    if ssl_args == "ssl_true":
        alice = NodeClient(hook, "wss://10.0.17.6:6666", id="alice")
    else:
        alice = NodeClient(hook, "ws://10.0.17.6:6666", id="alice")


#     bob = NodeClient(hook, "ws://172.16.179.22:6667" , id="bob")
#     charlie = NodeClient(hook, "ws://172.16.179.23:6668", id="charlie")
#     med24 = NodeClient(hook, "ws://172.16.179.24:6669", id="med24")
#     testing = NodeClient(hook, "ws://localhost:6669" , id="testing")

# worker_list = [alice, bob, charlie]
    worker_list = [alice]
    grid = sy.PrivateGridNetwork(*worker_list)

    for epoch in range(epoch_num):

        logger.info("round %s/%s", epoch, epoch_num)

        for worker in worker_list:

            built_model.id = "GlobalModel"
            # built_loss_fn.id = "LossFunc"
            # model_config = sy.ModelConfig(model=built_model,
            #                           loss_fn=built_loss_fn,
            #                           optimizer="SGD",
            #                           batch_size=batch_size,
            #                           optimizer_args={"lr": lr},
            #                           epochs=1,
            #                           max_nr_batches=-1)
            model_send_start = time.time()
            ##pdb.set_trace()
            built_model.send(worker)
            model_send_end = time.time()
            # print("[TEST]", "ModelSend", "time", model_send_start, model_send_end)
            print("[trace] ModelSend duration", worker.id,
                  model_send_end - model_send_start)

            built_model.pointers = {}
            built_loss_fn.pointers = {}

            # decay learning rate
            learning_rate = max(0.98 * learning_rate, lr * 0.01)
コード例 #7
0
def run(args):
    if args.train:
        print(f"Training over {args.epochs} epochs")
    elif args.test:
        print("Running a full evaluation")
    else:
        print("Running inference speed test")
    print("model:\t\t", args.model)
    print("dataset:\t", args.dataset)
    print("batch_size:\t", args.batch_size)

    hook = sy.TorchHook(torch)

    if args.websockets:
        alice = DataCentricFLClient(hook, "ws://localhost:7600")
        bob = DataCentricFLClient(hook, "ws://localhost:7601")
        crypto_provider = DataCentricFLClient(hook, "ws://localhost:7602")
        my_grid = sy.PrivateGridNetwork(alice, bob, crypto_provider)
        sy.local_worker.object_store.garbage_delay = 1

    else:
        bob = sy.VirtualWorker(hook, id="bob")
        alice = sy.VirtualWorker(hook, id="alice")
        crypto_provider = sy.VirtualWorker(hook, id="crypto_provider")

    workers = [alice, bob]
    sy.local_worker.clients = workers

    encryption_kwargs = dict(workers=workers,
                             crypto_provider=crypto_provider,
                             protocol=args.protocol)
    kwargs = dict(
        requires_grad=args.requires_grad,
        precision_fractional=args.precision_fractional,
        dtype=args.dtype,
        **encryption_kwargs,
    )

    if args.preprocess:
        build_prepocessing(args.model, args.dataset, args.batch_size, workers,
                           args)

    private_train_loader, private_test_loader = get_data_loaders(args,
                                                                 kwargs,
                                                                 private=True)
    public_train_loader, public_test_loader = get_data_loaders(args,
                                                               kwargs,
                                                               private=False)

    model = get_model(args.model,
                      args.dataset,
                      out_features=get_number_classes(args.dataset))

    if args.test and not args.train:
        load_state_dict(model, args.model, args.dataset)

    model.eval()

    if torch.cuda.is_available():
        sy.cuda_force = True

    if not args.public:
        model.encrypt(**kwargs)
        if args.fp_only:  # Just keep the (Autograd+) Fixed Precision feature
            model.get()

    if args.train:
        for epoch in range(args.epochs):
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.lr,
                                  momentum=args.momentum)

            if not args.public:
                optimizer = optimizer.fix_precision(
                    precision_fractional=args.precision_fractional,
                    dtype=args.dtype)
            train_time = train(args, model, private_train_loader, optimizer,
                               epoch)
            test_time, accuracy = test(args, model, private_test_loader)
    else:
        test_time, accuracy = test(args, model, private_test_loader)
        if not args.test:
            print(
                f"{ 'Online' if args.preprocess else 'Total' } time (s):\t",
                round(test_time / args.batch_size, 4),
            )
        else:
            # Compare with clear text accuracy
            print("Clear text accuracy is:")
            model = get_model(args.model,
                              args.dataset,
                              out_features=get_number_classes(args.dataset))
            load_state_dict(model, args.model, args.dataset)
            test(args, model, public_test_loader)

    if args.preprocess:
        missing_items = [len(v) for k, v in sy.preprocessed_material.items()]
        if sum(missing_items) > 0:
            print("MISSING preprocessed material")
            for key, value in sy.preprocessed_material.items():
                print(f"'{key}':", value, ",")
コード例 #8
0
async def main():
    hook = sy.TorchHook(torch)
    device = torch.device("cpu")
    model = vgg.vgg16(pretrained=False)

    # pdb.set_trace()
    model.build(torch.zeros([64, 3, 32, 32], dtype=torch.float).to(device))
    # pdb.set_trace()

    @sy.func2plan()
    def loss_fn(pred, target):
        return nll_loss(input=pred, target=target)

    input_num = torch.randn(3, 5, requires_grad=True)
    target = torch.tensor([1, 0, 4])
    dummy_pred = F.log_softmax(input_num, dim=1)
    loss_fn.build(dummy_pred, target)

    epoch_num = 11
    batch_size = 64
    lr = 0.05
    learning_rate = lr
    optimizer_args = {"lr": lr}

    alice = NodeClient(hook, "ws://172.16.179.20:6666", id="alice")
    bob = NodeClient(hook, "ws://172.16.179.21:6667", id="bob")
    charlie = NodeClient(hook, "ws://172.16.179.22:6668", id="charlie")
    #     testing = NodeClient(hook, "ws://localhost:6669" , id="testing")

    worker_list = [alice, bob, charlie]
    grid = sy.PrivateGridNetwork(*worker_list)

    for epoch in range(epoch_num):

        logger.info("Training round %s/%s", epoch, epoch_num)

        round_start_time = time.time()

        results = await asyncio.gather(*[
            fit_model_on_worker(
                worker=worker,
                built_model=model,
                built_loss_fn=loss_fn,
                encrypters=worker_list,
                batch_size=batch_size,
                curr_round=epoch,
                max_nr_batches=-1,
                lr=0.1,
            ) for worker in worker_list
        ])

        local_train_end_time = time.time()
        print("[trace]", "AllWorkersTrainingTime", "duration", "COORD",
              local_train_end_time - round_start_time)

        enc_models = {}
        loss_values = {}
        data_amounts = {}
        total_data_amount = 0

        for worker_id, enc_params, worker_loss, num_of_training_data in results:
            if enc_params is not None:
                enc_models[worker_id] = enc_params
                loss_values[worker_id] = worker_loss
                data_amounts[worker_id] = num_of_training_data
                total_data_amount += num_of_training_data

        ## aggregation
        nr_enc_models = len(enc_models)
        enc_models_list = list(enc_models.values())
        data_amounts_list = list(data_amounts.values())  ##
        dst_enc_model = enc_models_list[0]

        aggregation_start_time = time.time()
        with torch.no_grad():
            for i in range(len(dst_enc_model)):
                for j in range(1, nr_enc_models):
                    dst_enc_model[i] += enc_models_list[j][i]
        aggregation_end_time = time.time()
        print("[trace]", "AggregationTime", "duration", "COORD",
              aggregation_end_time - aggregation_start_time)

        ## decryption
        new_params = []
        decryption_start_time = time.time()
        with torch.no_grad():
            for i in range(len(dst_enc_model)):
                decrypt_para = dst_enc_model[i].get()
                new_para = decrypt_para.float_precision()
                new_para = new_para / int(total_data_amount)
                model.parameters()[i].set_(new_para)

        round_end_time = time.time()
        print("[trace]", "DecryptionTime", "duration", "COORD",
              round_end_time - decryption_start_time)
        print("[trace]", "RoundTime", "duration", "COORD",
              round_end_time - round_start_time)

        ## FedAvg
        #         nr_models = len(models)
        #         model_list = list(models.values())
        #         dst_model = model_list[0]
        #         nr_params = len(dst_model.parameters())
        #         with torch.no_grad():
        #             for i in range(1, nr_models):
        #                 src_model = model_list[i]
        #                 src_params = src_model.parameters()
        #                 dst_params = dst_model.parameters()
        #                 for i in range(nr_params):
        #                     dst_params[i].set_(src_params[i].data + dst_params[i].data)
        #             for i in range(nr_params):
        #                 dst_params[i].set_(dst_params[i].data * 1/total_data_amount)

        #         if epoch%5 == 0 or epoch == 49:
        #             evaluate_model_on_worker(
        #                 model_identifier="Federated model",
        #                 worker=testing,
        #                 dataset_key="mnist_testing",
        #                 model=model,
        #                 built_loss_fn=loss_fn,
        #                 nr_bins=10,
        #                 batch_size=64,
        #                 device=device,
        #                 print_target_hist=False,
        #             )

        model.pointers = {}
        loss_fn.pointers = {}

        # decay learning rate
        learning_rate = max(0.98 * learning_rate, lr * 0.01)
コード例 #9
0
import pandas as pd
import streamlit as st
import webbrowser

hook = sy.TorchHook(torch)
# The local worker
me = hook.local_worker
me.is_client_worker = False
# The remote workers
bob = DataCentricFLClient(hook, "http://18.220.216.78:5001/")
#alice = DataCentricFLClient(hook, "ws://localhost:5006/")
# The crypto provider
sam = DataCentricFLClient(hook, "http://18.220.216.78:5001/")
kim = DataCentricFLClient(hook, "http://18.220.216.78:5001/")

grid = sy.PrivateGridNetwork(bob, sam, kim)

query = streamlit.text_input('Data Search Query')
data = grid.search(query)

nd = [[id, val[0].description, (list(val[0].shape)), val[0].tags]
      for id, val in data.items()]

df = pd.DataFrame(nd, columns=['Location', 'Description', 'Size', 'Tags'])
streamlit.table(df)

values = df['Location'].tolist()


def genreate_url(id):
コード例 #10
0
def main(data_path, participants, epochs, modelpath):
    hook = sy.TorchHook(torch)
    kwargs_websocket = {"host": "localhost", "hook": hook}
    # kwargs_websocket = {"host": "localhost"}
    device = torch.device("cuda" if use_cuda else "cpu")

    workers = []
    for participant in participants:
        workers.append(sy.workers.websocket_client.WebsocketClientWorker(id=participant.id, port=participant.port, **kwargs_websocket))
    # alice = sy.workers.websocket_client.WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket)

    # workers = [alice, bob, charlie]
    grid = sy.PrivateGridNetwork(*workers)

    data = grid.search("#mnist", "#data")
    print(f"Search data: {len(data.keys())}")

    target = grid.search("#mnist", "#target")
    print(f"Search target: {len(target.keys())}")

    # model = Net().to(device)

    model_file = Path(modelpath)
    if model_file.is_file():
        model = torch.load(modelpath)
        print(f"model loaded from file")
        model.eval()
        model = model.to(device)
    else:
        print(f"new model created")
        model = Net().to(device)

    print(f"model: {model}")

    data = list(data.values())
    target = list(target.values())
    epoch_total = epoch_total_size(data)
    print(f"Total epochs: {epoch_total}")

    if use_cuda:
        model.cuda()
    optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            data_path,
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        ),
        batch_size=1,
        shuffle=True,
    )

    for epoch in range(1, epochs + 1):
        m = train(epoch, model, data, target, optimizer, criterion)
        test(m, device, test_loader)

    torch.save(model, modelpath)
コード例 #11
0
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))


if __name__ == "__main__":
    hook = sy.TorchHook(torch)
    kwargs_websocket = {"host": "localhost", "hook": hook}
    alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket)
    device = torch.device("cuda" if use_cuda else "cpu")

    # workers = [alice, bob, charlie]
    workers = [alice]
    grid = sy.PrivateGridNetwork(*workers)

    data = grid.search("#mnist", "#data")
    print(f"Search data: {len(data.keys())}")

    target = grid.search("#mnist", "#target")
    print(f"Search target: {len(target.keys())}")

    datasets_my = []
    for worker in data.keys():
        dataset = sy.BaseDataset(data[worker][0], target[worker][0])
        datasets_my.append(dataset)

    n_features = data['alice'][0].shape[1]
    n_targets = 1