def check_resulting_model(name: str, model: SyModule) -> float: # Download trained model grid_address = f"localhost:{DOMAIN_PORT}" grid = ModelCentricFLClient(address=grid_address, secure=False) grid.connect() trained_params = grid.retrieve_model(name, "1.0") # Inference def test(test_loader: th.utils.data.DataLoader, model: SyModule) -> th.Tensor: correct = [] model.eval() for data, target in test_loader: x = data.view(-1, 28 * 28) output = model(x) _, pred = th.max(output, 1) correct.append(th.sum(np.squeeze(pred.eq(target.data.view_as(pred))))) acc = sum(correct) / len(test_loader.dataset) return acc set_params(model, trained_params) tfs = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ) mnist_test = datasets.MNIST(get_root_data_path(), train=False, transform=tfs) test_loader = th.utils.data.DataLoader( mnist_test, batch_size=32, shuffle=True, pin_memory=True ) accuracy = test(test_loader, model) return accuracy.item()
real_user_agent = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36") opener = urllib.request.build_opener() opener.addheaders = [("User-agent", real_user_agent)] urllib.request.install_opener(opener) # https://github.com/pytorch/vision/issues/3549 datasets.MNIST.resources = [ ( "https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c", ), ] datasets.MNIST(get_root_data_path(), train=True, download=True) datasets.MNIST(get_root_data_path(), train=False, download=True)
def test_resnet_18_custom_blocks(client: sy.VirtualMachineClient) -> None: cifar10_path = get_root_data_path() cifar10_path.mkdir(exist_ok=True, parents=True) norm = (0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261) cifar_train = datasets.CIFAR10( cifar10_path, train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(*norm), ]), ) cifar_test = datasets.CIFAR10( (cifar10_path), train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize(*norm)]), ) train_batch = 64 test_batch = 1000 dry_run = True if dry_run: train_batch = 2 test_batch = 2 train_loader = th.utils.data.DataLoader(cifar_train, batch_size=train_batch, shuffle=True, pin_memory=True) test_loader = th.utils.data.DataLoader(cifar_test, batch_size=test_batch, shuffle=True, pin_memory=True) model = ResNet18(input_size=(2, 3, 32, 32)) remote_torch = ROOT_CLIENT.torch dummy_dl = sy.lib.python.List([next(iter(train_loader))]) @make_plan def train(dl: sy.lib.python.List = dummy_dl, model: SyModule = model) -> TypeList: optimizer = remote_torch.optim.AdamW(model.parameters()) for xy in dl: optimizer.zero_grad() x, y = xy[0], xy[1] out = model(x=x)[0] loss = remote_torch.nn.functional.cross_entropy(out, y) loss.backward() optimizer.step() return [model] def test(test_loader: th.utils.data.DataLoader, model: SyModule) -> float: correct = [] model.eval() for data, target in test_loader: output = model(x=data)[0] _, pred = th.max(output, 1) correct.append(th.sum(np.squeeze(pred.eq(target.data)))) if dry_run: break acc = sum(correct) / len(test_loader.dataset) return acc train_ptr = train.send(client) for i, (x, y) in enumerate(train_loader): dl = [[x, y]] res_ptr = train_ptr(dl=dl, model=model) (model, ) = res_ptr.get() if (i % 10 == 0 and i != 0) or dry_run: acc = test(test_loader, model) print(f"Iter: {i} Test accuracy: {acc:.2F}", flush=True) if i > 50 or dry_run: break
def train_with_hosted_training_plan( name: str, plan_inputs: OrderedDict, plan_output_params_idx: TypeList[int], plan_type: str = "list", ) -> None: # PyGrid Node address gridAddress = f"ws://localhost:{DOMAIN_PORT}" # Hosted model name/version model_name = name model_version = "1.0" # syft absolute # TorchVision hotfix https://github.com/pytorch/vision/issues/3549 datasets.MNIST.resources = [ ( "https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3", ), ( "https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c", ), ] datasets.MNIST(get_root_data_path(), train=True, download=True) datasets.MNIST(get_root_data_path(), train=False, download=True) tfs = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ) train_set = datasets.MNIST( get_root_data_path(), train=True, download=True, transform=tfs ) cycles_log: TypeList = [] status = {"ended": False} # Called when client is accepted into FL cycle def on_accepted(job: FLJob) -> None: print(f"Accepted into {job} cycle {len(cycles_log) + 1}.") cycle_params = job.client_config batch_size, max_updates = ( cycle_params["batch_size"], cycle_params["max_updates"], ) training_plan, model_params = job.plans["training_plan"], job.model losses: TypeList = [] accuracies: TypeList = [] train_loader = th.utils.data.DataLoader( train_set, batch_size=batch_size, drop_last=True, shuffle=True ) for batch_idx, (x, y) in enumerate(train_loader): x = x.view(-1, 28 * 28) y = th.nn.functional.one_hot(y, 10) inputs = plan_inputs inputs["xs"] = x inputs["ys"] = y inputs["params"] = [th.nn.Parameter(param) for param in model_params] if plan_type == "torchscript": res = training_plan(*inputs.values()) else: res = training_plan(**inputs) model_params = [res[idx] for idx in plan_output_params_idx] if batch_idx >= max_updates - 1: break job.report(model_params) # Save losses/accuracies from cycle cycles_log.append((losses, accuracies)) # Called when the client is rejected from cycle def on_rejected(job: FLJob, timeout: Optional[int] = None) -> None: if timeout is None: print(f"Rejected from {job} cycle without timeout, FL training complete.") else: print(f"Rejected from {job} cycle with timeout: {timeout}.") status["ended"] = True # Called when error occurred def on_error(job: FLJob, error: Exception) -> None: print(f"Error: {job} {error}") status["ended"] = True def create_client_and_run_cycle() -> None: client = FLClient(url=gridAddress, auth_token=auth_token, secure=False) client.worker_id = client.grid_worker.authenticate( client.auth_token, model_name, model_version )["data"]["worker_id"] job = client.new_job(model_name, model_version) # Override plan type to use job.plan_type = plan_type # Set event handlers job.add_listener(job.EVENT_ACCEPTED, on_accepted) job.add_listener(job.EVENT_REJECTED, on_rejected) job.add_listener(job.EVENT_ERROR, on_error) # Shoot! job.start() while not status["ended"]: create_client_and_run_cycle() time.sleep(1)
import torchvision # syft absolute from syft.util import get_root_data_path # https://github.com/pytorch/vision/issues/3549 TORCHVISION_VERSION = version.parse(torchvision.__version__) if TORCHVISION_VERSION < version.parse("0.9.1"): URL = "https://ossci-datasets.s3.amazonaws.com/mnist/" torchvision.datasets.MNIST.resources = [ ( f"{URL}train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873", ), ( f"{URL}train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432", ), ( f"{URL}t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3", ), ( f"{URL}t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c", ), ] torchvision.datasets.MNIST(get_root_data_path(), train=True, download=True) torchvision.datasets.MNIST(get_root_data_path(), train=False, download=True)