def test_torch_tensor_simplify_generic(workers): """This tests our ability to simplify torch.Tensor objects using "all" serialization strategy """ worker = VirtualWorker(None, id="non-torch") # create a tensor input = Tensor(numpy.random.random((3, 3, 3))) # simplify the tensor output = msgpack.serde._simplify(worker, input) # make sure outer type is correct assert type(output) == tuple # make sure the object type ID is correct # (0 for torch.Tensor) assert (msgpack.serde.msgpack_global_state.detailers[output[0]] == torch_serde._detail_torch_tensor) # make sure inner type is correct assert type(output[1]) == tuple # make sure ID is correctly encoded assert output[1][0] == input.id # make sure tensor data type is correct assert type(output[1][1]) == tuple assert type(output[1][1][1]) == tuple # make sure tensor data matches assert output[1][1][1][0][1] == input.size() assert output[1][1][1][2][1] == tuple(input.flatten().tolist())
def __call__(self, protocol_function): # create the roles present in decorator roles = { role_id: Role(worker=VirtualWorker(id=role_id, hook=sy.local_worker.hook)) for role_id in self.role_names } for role_id, state_tensors in self.states.items(): for tensor in state_tensors: roles[role_id].register_state_tensor(tensor) protocol = Protocol( name=protocol_function.__name__, forward_func=protocol_function, roles=roles, id=sy.ID_PROVIDER.pop(), owner=sy.local_worker, ) try: protocol.build() except TypeError as e: raise ValueError( "Automatic build using @func2protocol failed!\nCheck that:\n" " - you have provided the correct number of shapes in args_shape\n" " - you have no simple numbers like int or float as args. If you do " "so, please consider using a tensor instead.") return protocol
def test_send_msg(): """Tests sending a message with a specific ID This is a simple test to ensure that the BaseWorker interface can properly send/receive a message containing a tensor. """ # get pointer to local worker me = sy.torch.hook.local_worker # pending time to simulate lantency (optional) me.message_pending_time = 0.1 # create a new worker (to send the object to) worker_id = sy.ID_PROVIDER.pop() bob = VirtualWorker(sy.torch.hook, id=f"bob{worker_id}") # initialize the object and save it's id obj = torch.Tensor([100, 100]) obj_id = obj.id # Send data to bob start_time = time() me.send_msg(ObjectMessage(obj), bob) elapsed_time = time() - start_time me.message_pending_time = 0 # ensure that object is now on bob's machine assert obj_id in bob._objects # ensure that object was sent 0.1 secs later assert elapsed_time > 0.1
def test_recv_msg(): """Tests the recv_msg command with 2 tests The first test uses recv_msg to send an object to alice. The second test uses recv_msg to request the object previously sent to alice.""" # TEST 1: send tensor to alice # create a worker to send data to worker_id = sy.ID_PROVIDER.pop() alice = VirtualWorker(sy.torch.hook, id=f"alice{worker_id}") # create object to send obj = torch.Tensor([100, 100]) # create/serialize message message = ObjectMessage(obj) bin_msg = serde.serialize(message) # have alice receive message alice.recv_msg(bin_msg) # ensure that object is now in alice's registry assert obj.id in alice.object_store._objects # Test 2: get tensor back from alice # Create message: Get tensor from alice message = ObjectRequestMessage(obj.id, None, "") # serialize message bin_msg = serde.serialize(message) # call receive message on alice resp = alice.recv_msg(bin_msg) obj_2 = sy.serde.deserialize(resp) # assert that response is correct type assert type(resp) == bytes # ensure that the object we receive is correct assert obj_2.id == obj.id
def test_recv_msg(): """Tests the recv_msg command with 2 tests The first test uses recv_msg to send an object to alice. The second test uses recv_msg to request the object previously sent to alice.""" # TEST 1: send tensor to alice # create a worker to send data to alice = VirtualWorker(sy.torch.hook) # create object to send obj = torch.Tensor([100, 100]) # create/serialize message msg = (MSGTYPE.OBJ, obj) bin_msg = serde.serialize(msg) # have alice receive message alice.recv_msg(bin_msg) # ensure that object is now in alice's registry assert obj.id in alice._objects # Test 2: get tensor back from alice # Create message: Get tensor from alice msg = (MSGTYPE.OBJ_REQ, obj.id) # serialize message bin_msg = serde.serialize(msg) # call receive message on alice resp = alice.recv_msg(bin_msg) obj_2 = serde.deserialize(resp) # assert that response is correct type assert type(resp) == bytes # ensure that the object we receive is correct assert obj_2.id == obj.id
def detail(worker: AbstractWorker, worker_tuple: tuple) -> Union["VirtualWorker", int, str]: detailed = BaseWorker.detail(worker, worker_tuple) if isinstance(detailed, int): result = VirtualWorker(id=detailed, hook=worker.hook) else: result = detailed return result
def test_get_unknown_worker(): hook = sy.TorchHook(torch) bob = VirtualWorker(hook, id="bob") charlie = VirtualWorker(hook, id="charlie") # if an unknown string or id representing a worker is given it fails with pytest.raises(WorkerNotFoundException): bob.get_worker("the_unknown_worker", fail_hard=True) with pytest.raises(WorkerNotFoundException): bob.get_worker(1, fail_hard=True) # if an instance of virtual worker is given it doesn't fail assert bob.get_worker(charlie).id == charlie.id assert charlie.id in bob._known_workers
def test_torch_tensor_serde_generic(workers): """This tests our ability to ser-de torch.Tensor objects using "all" serialization strategy """ worker = VirtualWorker(None, id="non-torch") # create a tensor input = Tensor(numpy.random.random((100, 100))) # ser-de the tensor output = msgpack.serde._simplify(worker, input) detailed = msgpack.serde._detail(worker, output) # check tensor contents assert input.size() == detailed.size() assert input.dtype == detailed.dtype assert (input == detailed).all()
def tests_worker_convenience_methods(): """Tests send and get object methods on BaseWorker This test comes in two parts. The first uses the simple BaseWorker.send_obj and BaseWorker.request_obj to send a tensor to Alice and to get the worker back from Alice. The second part shows that the same methods work between bob and alice directly. """ me = sy.torch.hook.local_worker worker_id = sy.ID_PROVIDER.pop() bob = VirtualWorker(sy.torch.hook, id=f"bob{worker_id}") worker_id = sy.ID_PROVIDER.pop() alice = VirtualWorker(sy.torch.hook, id=f"alice{worker_id}") obj = torch.Tensor([100, 100]) # Send data to alice me.send_obj(obj, alice) # Get data from alice resp_alice = me.request_obj(obj.id, alice) assert (resp_alice == obj).all() obj2 = torch.Tensor([200, 200]) # Set data on self bob.set_obj(obj2) # Get data from self resp_bob_self = bob.get_obj(obj2.id) assert (resp_bob_self == obj2).all() # Get data from bob as alice resp_bob_alice = alice.request_obj(obj2.id, bob) assert (resp_bob_alice == obj2).all()
def test_send_msg_using_tensor_api(): """Tests sending a message with a specific ID This is a simple test to ensure that the high level tensor .send() method correctly sends a message to another worker. """ # create worker to send object to bob = VirtualWorker(sy.torch.hook) # create a tensor to send (default on local_worker) obj = torch.Tensor([100, 100]) # save the object's id obj_id = obj.id # send the object to Bob (from local_worker) _ = obj.send(bob) # ensure tensor made it to Bob assert obj_id in bob._objects
def test_send_msg(): """Tests sending a message with a specific ID This is a simple test to ensure that the BaseWorker interface can properly send/receive a message containing a tensor. """ # get pointer to local worker me = sy.torch.hook.local_worker # create a new worker (to send the object to) bob = VirtualWorker(sy.torch.hook) # initialize the object and save it's id obj = torch.Tensor([100, 100]) obj_id = obj.id # Send data to bob me.send_msg(MSGTYPE.OBJ, obj, bob) # ensure that object is now on bob's machine assert obj_id in bob._objects
def test_search(): bob = VirtualWorker(sy.torch.hook) x = (torch.tensor([1, 2, 3, 4, 5]).tag("#fun", "#mnist").describe( "The images in the MNIST training dataset.").send(bob)) y = (torch.tensor([1, 2, 3, 4, 5]).tag("#not_fun", "#cifar").describe( "The images in the MNIST training dataset.").send(bob)) z = (torch.tensor([1, 2, 3, 4, 5]).tag("#fun", "#boston_housing").describe( "The images in the MNIST training dataset.").send(bob)) a = (torch.tensor([1, 2, 3, 4, 5]).tag( "#not_fun", "#boston_housing").describe( "The images in the MNIST training dataset.").send(bob)) assert len(bob.search("#fun")) == 2 assert len(bob.search("#mnist")) == 1 assert len(bob.search("#cifar")) == 1 assert len(bob.search("#not_fun")) == 2 assert len(bob.search("#not_fun", "#boston_housing")) == 1
def test_search(): worker_id = sy.ID_PROVIDER.pop() bob = VirtualWorker(sy.torch.hook, id=f"bob{worker_id}") x = (torch.tensor([1, 2, 3, 4, 5]).tag("#fun", "#mnist").describe( "The images in the MNIST training dataset.").send(bob)) y = (torch.tensor([1, 2, 3, 4, 5]).tag("#not_fun", "#cifar").describe( "The images in the MNIST training dataset.").send(bob)) z = (torch.tensor([1, 2, 3, 4, 5]).tag("#fun", "#boston_housing").describe( "The images in the MNIST training dataset.").send(bob)) a = (torch.tensor([1, 2, 3, 4, 5]).tag( "#not_fun", "#boston_housing").describe( "The images in the MNIST training dataset.").send(bob)) assert len(bob.search("#fun")) == 2 assert len(bob.search("#mnist")) == 1 assert len(bob.search("#cifar")) == 1 assert len(bob.search("#not_fun")) == 2 assert len(bob.search(["#not_fun", "#boston_housing"])) == 1
def __init__(self, torch, local_worker: BaseWorker = None, is_client: bool = True, verbose: bool = True): """Initializes the hook. Initialize the hook and define all the attributes pertaining to the torch hook in a special TorchAttibute class, that will be added in the syft.torch attributes. Hence, this parameters are now conveyed by the syft module. """ # Save the provided torch module as an attribute of the hook self.torch = torch self.framework = self.torch # Save the local worker as an attribute self.local_worker = local_worker if hasattr(torch, "torch_hooked"): logging.warning( "Torch was already hooked... skipping hooking process") self.local_worker = syft.local_worker return else: torch.torch_hooked = True # Add all the torch attributes in the syft.torch attr syft.torch = TorchAttributes(torch, self) syft.framework = syft.torch self.trace = Trace() # Hook some torch methods such that tensors could be created directy at workers self._hook_worker_methods() if self.local_worker is None: # Every TorchHook instance should have a local worker which is # responsible for interfacing with other workers. The worker # interface is what allows the Torch specific code in TorchHook to # be agnostic to the means by which workers communicate (such as # peer-to-peer, sockets, through local ports, or all within the # same process) self.local_worker = VirtualWorker(hook=self, is_client_worker=is_client, id="me") else: self.local_worker.hook = self self.to_auto_overload = {} self.args_hook_for_overloaded_attr = {} self._hook_native_tensor(torch.Tensor, TorchTensor) # Add all hooked tensor methods to pointer but change behaviour to have the cmd sent self._hook_pointer_tensor_methods(self.torch.Tensor) # Add all hooked tensor methods to AdditiveSharingTensor tensor but change behaviour # to all shares (when it makes sense, otherwise the method is overwritten in the # AdditiveSharingTensor class) self._hook_additive_shared_tensor_methods() # Add all hooked tensor methods to multi_pointer to change behavior to have the cmd # sent to all child pointers. self._hook_multi_pointer_tensor_methods(self.torch.Tensor) # Add all hooked tensor methods to Logging tensor but change behaviour to just forward # the cmd to the next child (behaviour can be changed in the SyftTensor class file) self._hook_syft_tensor_methods(LoggingTensor) # Add all hooked tensor methods to Paillier tensor but change behaviour to just forward # the cmd to the next child (behaviour can be changed in the SyftTensor class file) self._hook_syft_tensor_methods(PaillierTensor) # Add all hooked tensor methods to FixedPrecisionTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(FixedPrecisionTensor) # Add all hooked tensor methods to AutogradTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(AutogradTensor) # Add all hooked tensor methods to PrivateTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_private_tensor_methods(PrivateTensor) # Add all hooked tensor methods to PlaceHolder tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_placeholder_methods(self.torch.Tensor, PlaceHolder) # Add all hooked tensor methods to AdditiveSharingTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(AdditiveSharingTensor) # Add all hooked tensor methods to LargePrecisionTensor tensor self._hook_syft_tensor_methods(LargePrecisionTensor) # Add all hooked tensor methods to NumpyTensor tensor self._hook_syft_tensor_methods(HookedTensor) # Add all built-in 'str' methods to String self._hook_string_methods(owner=self.local_worker) # Add all string methods to StringPointer # This method call should strictly come after the # call to self._hook_string_methods() self._hook_string_pointer_methods() # Hook the tensor constructor function self._hook_tensor() # Hook the Parameter methods to store tensor chains in parameters self._hook_parameters() # Hook torch functions from modules like torch.add OR torch.nn.functional (containing relu, etc.) self._hook_torch_module() # Hook torch.nn (containing Linear and Convolution layers) self._hook_module() # Hook torch.optim (containing optim.SGD, Adam, etc) self._hook_optim() # Add the local_worker to syft so that it can be found if the hook is # called several times syft.local_worker = self.local_worker syft.hook = self
def create_sandbox(gbs, verbose=True, download_data=True): """There's some boilerplate stuff that most people who are just playing around would like to have. This will create that for you""" try: torch = gbs["torch"] except: torch = gbs["th"] global hook global bob global theo global alice global andy global jason global jon if download_data: # pragma: no cover from sklearn.datasets import load_boston from sklearn.datasets import load_breast_cancer from sklearn.datasets import load_digits from sklearn.datasets import load_diabetes from sklearn.datasets import load_iris from sklearn.datasets import load_wine from sklearn.datasets import load_linnerud def load_sklearn(func, *tags): dataset = func() data = (torch.tensor(dataset["data"]).float().tag(*( list(tags) + ["#data"] + dataset["DESCR"].split("\n")[0].lower().split(" "))).describe( dataset["DESCR"])) target = (torch.tensor(dataset["target"]).float().tag(*( list(tags) + ["#target"] + dataset["DESCR"].split("\n")[0].lower().split(" "))).describe( dataset["DESCR"])) return data, target def distribute_dataset(data, workers): batch_size = int(data.shape[0] / len(workers)) n_batches = len(workers) for batch_i in range(n_batches - 1): batch = data[batch_i * batch_size:(batch_i + 1) * batch_size] batch.tags = data.tags batch.description = data.description ptr = batch.send(workers[batch_i]) ptr.child.garbage_collect_data = False batch = data[(n_batches - 1) * batch_size:] batch.tags = data.tags batch.description = data.description ptr = batch.send(workers[n_batches - 1]) ptr.child.garbage_collect_data = False print("Setting up Sandbox...") if verbose: print("\t- Hooking PyTorch") hook = TorchHook(torch) if verbose: print("\t- Creating Virtual Workers:") print("\t\t- bob") bob = VirtualWorker(hook, id="bob") if verbose: print("\t\t- theo") theo = VirtualWorker(hook, id="theo") if verbose: print("\t\t- jason") jason = VirtualWorker(hook, id="jason") if verbose: print("\t\t- alice") alice = VirtualWorker(hook, id="alice") if verbose: print("\t\t- andy") andy = VirtualWorker(hook, id="andy") if verbose: print("\t\t- jon") jon = VirtualWorker(hook, id="jon") if verbose: print("\tStoring hook and workers as global variables...") gbs["hook"] = hook gbs["bob"] = bob gbs["theo"] = theo gbs["jason"] = jason gbs["alice"] = alice gbs["andy"] = andy gbs["jon"] = jon gbs["workers"] = [bob, theo, jason, alice, andy, jon] if download_data: # pragma: no cover if verbose: print("\tLoading datasets from SciKit Learn...") print("\t\t- Boston Housing Dataset") boston = load_sklearn(load_boston, *["#boston", "#housing", "#boston_housing"]) if verbose: print("\t\t- Diabetes Dataset") diabetes = load_sklearn(load_diabetes, *["#diabetes"]) if verbose: print("\t\t- Breast Cancer Dataset") breast_cancer = load_sklearn(load_breast_cancer) if verbose: print("\t- Digits Dataset") digits = load_sklearn(load_digits) if verbose: print("\t\t- Iris Dataset") iris = load_sklearn(load_iris) if verbose: print("\t\t- Wine Dataset") wine = load_sklearn(load_wine) if verbose: print("\t\t- Linnerud Dataset") linnerud = load_sklearn(load_linnerud) workers = [bob, theo, jason, alice, andy, jon] if verbose: print("\tDistributing Datasets Amongst Workers...") distribute_dataset(boston[0], workers) distribute_dataset(boston[1], workers) distribute_dataset(diabetes[0], workers) distribute_dataset(diabetes[1], workers) distribute_dataset(breast_cancer[0], workers) distribute_dataset(breast_cancer[1], workers) distribute_dataset(digits[0], workers) distribute_dataset(digits[1], workers) distribute_dataset(iris[0], workers) distribute_dataset(iris[1], workers) distribute_dataset(wine[0], workers) distribute_dataset(wine[1], workers) distribute_dataset(linnerud[0], workers) distribute_dataset(linnerud[1], workers) if verbose: print("\tCollecting workers into a VirtualGrid...") _grid = VirtualGrid(*gbs["workers"]) gbs["grid"] = _grid print("Done!")
def __init__( self, torch, local_worker: BaseWorker = None, is_client: bool = True, verbose: bool = False, seed=None, ): """ Initializes the hook. Initialize the hook and define all the attributes pertaining to the torch hook in a special TorchAttibute class, that will be added in the syft.torch attributes. Hence, this parameters are now conveyed by the syft module. """ # Save the provided torch module as an attribute of the hook self.torch = torch self.framework = self.torch if seed is not None: syft.ID_PROVIDER.seed(seed) self.verbose = verbose # Save the local worker as an attribute self.local_worker = local_worker if hasattr(torch, "torch_hooked"): logging.warning("Torch was already hooked... skipping hooking process") self.local_worker = syft.local_worker return else: torch.torch_hooked = True # Add all the torch attributes in the syft.torch attr syft.torch = TorchAttributes(torch, self) syft.framework = syft.torch """ In Syft there is a syft.framework value that can contain only one framework. Ideally it should contain a list of supported frameworks. We do this because in Plans there is method to reduce the number of actions that are traced (and then sent). The actions that are not returning a result, changing a placeholder, inplace or changing the global state are eliminated from the traced list """ if dependency_check.crypten_available: import crypten from syft.frameworks.crypten.crypten_attributes import CryptenAttributes syft.crypten = CryptenAttributes(crypten, self) # Hook some torch methods such that tensors could be created directy at workers self._hook_worker_methods() if self.local_worker is None: # Every TorchHook instance should have a local worker which is # responsible for interfacing with other workers. The worker # interface is what allows the Torch specific code in TorchHook to # be agnostic to the means by which workers communicate (such as # peer-to-peer, sockets, through local ports, or all within the # same process) self.local_worker = VirtualWorker( hook=self, is_client_worker=is_client, id="me", verbose=verbose ) else: self.local_worker.hook = self self._syft_workers = {self.local_worker} self.to_auto_overload = {} self.args_hook_for_overloaded_attr = {} self._hook_native_tensor(torch.Tensor, TorchTensor) if dependency_check.crypten_available: from syft.frameworks.crypten.hook.hook import crypten_to_auto_overload for crypten_class, method_names in crypten_to_auto_overload.items(): self.to_auto_overload[crypten_class] = method_names self._hook_syft_placeholder_methods(crypten_class, PlaceHolder) # Add all hooked tensor methods to pointer but change behaviour to have the cmd sent self._hook_pointer_tensor_methods(self.torch.Tensor) # Add all hooked tensor methods to AdditiveSharingTensor tensor but change behaviour # to all shares (when it makes sense, otherwise the method is overwritten in the # AdditiveSharingTensor class) self._hook_additive_shared_tensor_methods() # Add all hooked tensor methods to multi_pointer to change behavior to have the cmd # sent to all child pointers. self._hook_multi_pointer_tensor_methods(self.torch.Tensor) # Add all hooked tensor methods to Logging tensor but change behaviour to just forward # the cmd to the next child (behaviour can be changed in the SyftTensor class file) self._hook_syft_tensor_methods(LoggingTensor) # Add all hooked tensor methods to Paillier tensor but change behaviour to just forward # the cmd to the next child (behaviour can be changed in the SyftTensor class file) self._hook_syft_tensor_methods(PaillierTensor) # Add all hooked tensor methods to FixedPrecisionTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(FixedPrecisionTensor) # Add all hooked tensor methods to AutogradTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(AutogradTensor) # Add all hooked tensor methods to PrivateTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_private_tensor_methods(PrivateTensor) # Add all hooked tensor methods to PlaceHolder tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_placeholder_methods(self.torch.Tensor, PlaceHolder) # Add all hooked tensor methods to AdditiveSharingTensor tensor but change behaviour # to just forward the cmd to the next child (behaviour can be changed in the # SyftTensor class file) self._hook_syft_tensor_methods(AdditiveSharingTensor) # Add all hooked tensor methods to NumpyTensor tensor self._hook_syft_tensor_methods(HookedTensor) # Add all built-in 'str' methods to String self._hook_string_methods(owner=self.local_worker) # Add all string methods to StringPointer # This method call should strictly come after the # call to self._hook_string_methods() self._hook_string_pointer_methods() # Hook the tensor constructor function self._hook_tensor() # Hook the Parameter methods to store tensor chains in parameters self._hook_parameters() # Hook torch functions from modules like torch.add OR # torch.nn.functional (containing relu, etc.) self._hook_torch_module() # Hook torch.nn (containing Linear and Convolution layers) self._hook_module() # Hook torch.optim (containing optim.SGD, Adam, etc) self._hook_optim() # Hook the Crypten module if dependency_check.crypten_available: from syft.frameworks.crypten.hook.hook import hook_crypten, hook_crypten_module hook_crypten() hook_crypten_module() # Add the local_worker to syft so that it can be found if the hook is # called several times syft.local_worker = self.local_worker syft.hook = self
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) else: kwargs_websocket = { "host": "localhost", "hook": hook, "verbose": args.verbose } alice = WebsocketClientWorker(id="alice", port=8777, **kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=8778, **kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=8779, **kwargs_websocket) workers = [alice, bob, charlie] use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} federated_train_loader = sy.FederatedDataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) # 가상작업자(시뮬레이션) 사용시 이곳으로 분기 if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) list_of_object = [alice, bob, charlie] # 웹소켓작업자 사용시 이곳으로 분기 else: base_port = 10002 list_of_id = ["alice", "bob", "charlie"] list_of_ip = ["192.168.0.52", "192.168.0.53", "192.168.0.54"] list_of_object = [] for index in range(len(list_of_id)): kwargs_websockest = {"id": list_of_id[index], "hook": hook} list_of_object.append( WebsocketClientWorker(host=list_of_ip[index], port=base_port, **kwargs_websockest)) workers = list_of_object use_cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} federated_train_loader = sy.FederatedDataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ).federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) model = Net().to(device) for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches) test(model, device, test_loader) if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt")
def create_sandbox(gbs, verbose=True, download_data=True): # noqa: C901 """There's some boilerplate stuff that most people who are just playing around would like to have. This will create that for you""" try: torch = gbs["torch"] except KeyError: torch = gbs["th"] global hook global bob global theo global alice global andy global jason global jon if download_data and importlib.util.find_spec("sklearn") is None: raise DependencyError("sklearn", "scikit-learn") if download_data: # pragma: no cover from sklearn.datasets import load_boston from sklearn.datasets import load_breast_cancer from sklearn.datasets import load_digits from sklearn.datasets import load_diabetes from sklearn.datasets import load_iris from sklearn.datasets import load_wine from sklearn.datasets import load_linnerud def load_sklearn(func, *tags): dataset = func() data = (torch.tensor(dataset["data"]).float().tag(*( list(tags) + ["#data"] + dataset["DESCR"].split("\n")[0].lower().split(" "))).describe( dataset["DESCR"])) target = (torch.tensor(dataset["target"]).float().tag(*( list(tags) + ["#target"] + dataset["DESCR"].split("\n")[0].lower().split(" "))).describe( dataset["DESCR"])) return data, target def load_tf(func, *tags): num_of_records = 10000 """Int: num_of_records is a configurable limit for the cifar10 and fashion_mnist datasets. since it is a huge dataset and it requires a lot of memory resources""" ((train_images, train_labels), (test_images, test_labels)) = func() data = np.concatenate([train_images, test_images]) target = np.concatenate([train_labels, test_labels]) data = data[0:num_of_records] target = target[0:num_of_records] data = torch.IntTensor(data).tag( *(list(tags) + ["#data"])).describe(tags[0][1:]) target = torch.IntTensor(target).tag( *(list(tags) + ["#target"])).describe(tags[0][1:]) return data, target def distribute_dataset(data, workers): batch_size = int(data.shape[0] / len(workers)) n_batches = len(workers) for batch_i in range(n_batches - 1): batch = data[batch_i * batch_size:(batch_i + 1) * batch_size] batch.tags = data.tags batch.description = data.description ptr = batch.send(workers[batch_i]) ptr.child.garbage_collect_data = False batch = data[(n_batches - 1) * batch_size:] batch.tags = data.tags batch.description = data.description ptr = batch.send(workers[n_batches - 1]) ptr.child.garbage_collect_data = False print("Setting up Sandbox...") if verbose: print("\t- Hooking PyTorch") hook = TorchHook(torch) if verbose: print("\t- Creating Virtual Workers:") print("\t\t- bob") bob = VirtualWorker(hook, id="bob") if verbose: print("\t\t- theo") theo = VirtualWorker(hook, id="theo") if verbose: print("\t\t- jason") jason = VirtualWorker(hook, id="jason") if verbose: print("\t\t- alice") alice = VirtualWorker(hook, id="alice") if verbose: print("\t\t- andy") andy = VirtualWorker(hook, id="andy") if verbose: print("\t\t- jon") jon = VirtualWorker(hook, id="jon") if verbose: print("\tStoring hook and workers as global variables...") gbs["hook"] = hook gbs["bob"] = bob gbs["theo"] = theo gbs["jason"] = jason gbs["alice"] = alice gbs["andy"] = andy gbs["jon"] = jon gbs["workers"] = [bob, theo, jason, alice, andy, jon] if download_data: # pragma: no cover if verbose: print("\tLoading datasets from SciKit Learn...") print("\t\t- Boston Housing Dataset") boston = load_sklearn(load_boston, *["#boston", "#housing", "#boston_housing"]) if verbose: print("\t\t- Diabetes Dataset") diabetes = load_sklearn(load_diabetes, *["#diabetes"]) if verbose: print("\t\t- Breast Cancer Dataset") breast_cancer = load_sklearn(load_breast_cancer, *["#breast_cancer_dataset"]) if verbose: print("\t- Digits Dataset") digits = load_sklearn(load_digits, *["#digits_dataset"]) if verbose: print("\t\t- Iris Dataset") iris = load_sklearn(load_iris, *["#iris_dataset"]) if verbose: print("\t\t- Wine Dataset") wine = load_sklearn(load_wine, *["#wine_dataset"]) if verbose: print("\t\t- Linnerud Dataset") linnerud = load_sklearn(load_linnerud, *["#linnerrud_dataset"]) if tf_datasets_available: if verbose: print("\tLoading datasets from TensorFlow datasets...") print("\t\t- fashion_mnist Dataset") fashion_mnist = load_tf(datasets.fashion_mnist.load_data, *["#fashion_mnist"]) if verbose: print("\t\t- cifar10 Dataset") cifar10 = load_tf(datasets.cifar10.load_data, *["#cifar10"]) workers = [bob, theo, jason, alice, andy, jon] if verbose: print("\tDistributing Datasets Amongst Workers...") distribute_dataset(boston[0], workers) distribute_dataset(boston[1], workers) distribute_dataset(diabetes[0], workers) distribute_dataset(diabetes[1], workers) distribute_dataset(breast_cancer[0], workers) distribute_dataset(breast_cancer[1], workers) distribute_dataset(digits[0], workers) distribute_dataset(digits[1], workers) distribute_dataset(iris[0], workers) distribute_dataset(iris[1], workers) distribute_dataset(wine[0], workers) distribute_dataset(wine[1], workers) distribute_dataset(linnerud[0], workers) distribute_dataset(linnerud[1], workers) if tf_datasets_available: distribute_dataset(fashion_mnist[0], workers) distribute_dataset(fashion_mnist[1], workers) distribute_dataset(cifar10[0], workers) distribute_dataset(cifar10[1], workers) if verbose: print("\tCollecting workers into a VirtualGrid...") _grid = PrivateGridNetwork(*gbs["workers"]) gbs["grid"] = _grid print("Done!")
def main(): args = define_and_get_arguments() hook = sy.TorchHook(torch) # 가상작업자(시뮬레이션) 사용시 이곳으로 분기 if args.use_virtual: alice = VirtualWorker(id="alice", hook=hook, verbose=args.verbose) bob = VirtualWorker(id="bob", hook=hook, verbose=args.verbose) charlie = VirtualWorker(id="charlie", hook=hook, verbose=args.verbose) # 웹소켓작업자 사용시 이곳으로 분기 else: a_kwargs_websocket = {"host": "192.168.0.57", "hook": hook} b_kwargs_websocket = {"host": "192.168.0.58", "hook": hook} c_kwargs_websocket = {"host": "192.168.0.59", "hook": hook} baseport = 10002 alice = WebsocketClientWorker(id="alice", port=baseport, **a_kwargs_websocket) bob = WebsocketClientWorker(id="bob", port=baseport, **b_kwargs_websocket) charlie = WebsocketClientWorker(id="charlie", port=baseport, **c_kwargs_websocket) # 워커 객체를 리스트로 묶음 workers = [alice, bob, charlie] # 쿠다 사용 여부 use_cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} # 랜덤 시드 설정 torch.manual_seed(args.seed) labels_resampled_factorized, obs_resampled_with_noise_2 = process_data() # percentage of test/valid set to use for testing and validation from the test_valid_idx (to be called test_size) test_size = 0.1 # obtain training indices that will be used for validation num_train = len(obs_resampled_with_noise_2) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(test_size * num_train)) train_idx, test_idx = indices[split:], indices[:split] federated_train_dataset = D.TensorDataset( torch.tensor(obs_resampled_with_noise_2[train_idx]), torch.tensor(labels_resampled_factorized[train_idx])) federated_train_loader = sy.FederatedDataLoader( federated_train_dataset.federate(tuple(workers)), batch_size=args.batch_size, shuffle=True, iter_per_worker=True, **kwargs, ) test_dataset = D.TensorDataset( torch.tensor(obs_resampled_with_noise_2[test_idx]), torch.tensor(labels_resampled_factorized[test_idx])) test_loader = D.DataLoader(test_dataset, shuffle=True, batch_size=args.batch_size, num_workers=0, drop_last=True) model = Net(input_features=1, output_dim=5).to(device) criterion = nn.NLLLoss() for epoch in range(1, args.epochs + 1): logger.info("Starting epoch %s/%s", epoch, args.epochs) model = train(model, device, federated_train_loader, args.lr, args.federate_after_n_batches, criterion=criterion) test(model, test_loader, args.batch_size, criterion=criterion, train_on_gpu=use_cuda) if args.save_model: torch.save(model.state_dict(), "./Model/mnist_cnn.pt")
def test___init__(): hook = sy.TorchHook(torch) tensor = torch.tensor([1, 2, 3, 4]) worker_id = sy.ID_PROVIDER.pop() alice_id = f"alice{worker_id}" alice = VirtualWorker(hook, id=alice_id) worker_id = sy.ID_PROVIDER.pop() bob = VirtualWorker(hook, id=f"bob{worker_id}") worker_id = sy.ID_PROVIDER.pop() charlie = VirtualWorker(hook, id=f"charlie{worker_id}") worker_id = sy.ID_PROVIDER.pop() dawson = VirtualWorker(hook, id=f"dawson{worker_id}", data=[tensor]) # Ensure adding data on signup functionality works as expected assert tensor.owner == dawson assert bob.get_worker(alice_id).id == alice.id assert bob.get_worker(alice).id == alice.id assert bob.get_worker(charlie).id == charlie.id bob.get_worker("the_unknown_worker") bob.add_worker(alice)
def __init__(self, tensorflow, local_worker: BaseWorker = None, is_client: bool = True): self.tensorflow = tensorflow self.framework = self.tensorflow syft.tensorflow = TensorFlowAttributes(tf, self) syft.framework = syft.tensorflow syft.tensorflow.hook = self syft.hook = self self.local_worker = local_worker if hasattr(tensorflow, "tf_hooked"): logging.warning("TF was already hooked, skipping hooking process") self.local_worker = syft.local_worker return else: tensorflow.tf_hooked = True if self.local_worker is None: # Every TensorFlowHook instance should have a local worker which is # responsible for interfacing with other workers. The worker # interface is what allows the TensorFlow specific code in TensorFlowHook to # be agnostic to the means by which workers communicate (such as # peer-to-peer, sockets, through local ports, or all within the # same process) self.local_worker = VirtualWorker(hook=self, is_client_worker=is_client, id="me") else: self.local_worker.hook = self self.to_auto_overload = { tf.math: ["add"], Tensor: self._which_methods_should_we_auto_overload(Tensor), tf.Variable: self._which_methods_should_we_auto_overload(tf.Variable), tf.keras.layers.Layer: self._which_methods_should_we_auto_overload(tf.keras.layers.Layer), tf.keras.models.Model: self._which_methods_should_we_auto_overload(tf.keras.models.Model), ResourceVariable: self._which_methods_should_we_auto_overload(ResourceVariable), } self.args_hook_for_overloaded_attr = {} self._hook_native_tensor(Tensor, TensorFlowTensor) self._hook_variable(TensorFlowVariable) self._hook_keras_layers(tf.keras.layers.Layer, KerasLayer) self._hook_keras_model(tf.keras.models.Model, KerasModel) self._hook_pointer_tensor_methods(Tensor) self._hook_pointer_tensor_methods(tf.Variable) self._hook_pointer_tensor_methods(ResourceVariable) self._hook_pointer_tensor_methods(tf.math) self._hook_multi_pointer_tensor_methods(tf.math) self._hook_object_pointer_methods(tf.keras.layers.Layer) self._hook_object_pointer_methods(tf.keras.models.Model) self._hook_tensorflow_module() syft.local_worker = self.local_worker syft.hook = self # This must happen last! # See this functions documentation for more info. self._add_methods_to_eager_tensor()