def initialize_training(parameters, learning_rate, local_dp): """ Initializes the model, optimizer and scheduler and shares the parameters with all the workers in the group. This should be sent from server to all nodes. Args: learning_rate: The learning rate for training. local_dp: bool whether to apply local_dp or not. Returns: Returns the device, model, optimizer and scheduler. """ # Determine the device to train on use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Initialize model and send parameters of server to all workers model = Net().to(device) # initializing optimizer and scheduler optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.5) if local_dp: privacy_engine = PrivacyEngine(model, batch_size=64, sample_size=60000, alphas=range(2, 32), noise_multiplier=1.3, max_grad_norm=1.0, ) privacy_engine.attach(optimizer) # returns device, model, optimizer which will be needed in train and test return device, optimizer, model
def main(): run_results = [] for _ in range(N_RUNS): model = Inception3(num_classes=10).to(DEVICE) optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0) if not DISABLE_DP: privacy_engine = PrivacyEngine( model, batch_size=BATCH_SIZE, sample_size=len(train_loader.dataset), alphas=[ 1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=SIGMA, max_grad_norm=GRAD_NORM, secure_rng=SECURE_RNG, ) privacy_engine.attach(optimizer) for epoch in range(1, EPOCHS + 1): train(model, DEVICE, test_loader, optimizer, epoch) run_results.append(test(model, DEVICE, test_loader)) if len(run_results) > 1: print( "Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100 ) )
def __init__(self, device=None, jit=False): super().__init__() self.device = device self.jit = jit self.model = models.resnet18(num_classes=10) self.model = convert_batchnorm_modules(self.model) self.model = self.model.to(device) self.example_inputs = ( torch.randn((64, 3, 32, 32), device=self.device), ) self.example_target = torch.randint(0, 10, (64,), device=self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=0.001) self.criterion = nn.CrossEntropyLoss() # This is supposed to equal the number of data points. # It is only to compute stats so dwai about the value. sample_size = 64 * 100 clipping = {"clip_per_layer": False, "enable_stat": False} self.privacy_engine = PrivacyEngine( self.model, batch_size=64, sample_size=sample_size, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.0, max_grad_norm=1.0, secure_rng=False, **clipping, ) self.privacy_engine.attach(self.optimizer)
def test_poisson_sampling(self): B = 1 N = 10 d = 10 dataset = [(i, torch.randn(d), torch.randn(d)) for i in range(N)] model = nn.Linear(d, d) optimizer = optim.SGD(model.parameters(), lr=0.1) engine = PrivacyEngine( model, sample_rate=B / N, target_epsilon=1.0, epochs=10, poisson=True, max_grad_norm=1, sample_size=N, ) engine.attach(optimizer) generator = torch.Generator() generator.manual_seed(7) sampler = UniformWithReplacementSampler( num_samples=N, sample_rate=B / N, generator=generator ) dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler) # Sampler with seed=7 should generate [], [7], [], [], [9], [0], [], [], [1], [4] for (_, x, y) in dataloader: prediction = model(x) loss = torch.mean((prediction - y) ** 2) optimizer.zero_grad() loss.backward() optimizer.step()
def test_privacy_engine_virtual_step_example(self): # IMPORTANT: When changing this code you also need to update # the docstring for opacus.privacy_engine.PrivacyEngine.virtual_step() model = nn.Linear(16, 2) dataloader = [] batch_size = 64 sample_size = 256 sample_rate = batch_size / sample_size for _ in range(64): data = torch.randn(4, 16) labels = torch.randint(0, 2, (4, )) dataloader.append((data, labels)) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.05) privacy_engine = PrivacyEngine( model, sample_rate=sample_rate, noise_multiplier=0.8, max_grad_norm=0.5, ) privacy_engine.attach(optimizer) for i, (X, y) in enumerate(dataloader): logits = model(X) loss = criterion(logits, y) loss.backward() if i % 16 == 15: optimizer.step() # this will call privacy engine's step() optimizer.zero_grad() else: optimizer.virtual_step( ) # this will call privacy engine's virtual_step()
def train_model(net,trainloader,trainset,device,dp): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(),lr=.001, momentum=.9) if dp == True: print('adding privacy engine') # if we are training with differential privacy, create the engine privacy_engine = PrivacyEngine( net, 4, len(trainloader), alphas=[1, 10, 100], noise_multiplier=1.3, max_grad_norm=1.0, ) privacy_engine.attach(optimizer) for epoch in range(5): # currently training for 5 epochs print(f'epoch: {epoch}') running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data[0].to(device), data[1].to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step()
def main(args): print(args) assert args.dpsgd torch.backends.cudnn.benchmark = True mdict = model_dict.copy() mdict['lstm'] = LSTMNet train_data, train_labels = get_data(args) model = mdict[args.experiment](vocab_size=args.max_features, batch_size=args.batch_size).cuda() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0) loss_function = nn.CrossEntropyLoss( ) if args.experiment != 'logreg' else nn.BCELoss() privacy_engine = PrivacyEngine( model, batch_size=args.batch_size, sample_size=len(train_data), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, ) privacy_engine.attach(optimizer) timings = [] for epoch in range(1, args.epochs + 1): start = time.perf_counter() dataloader = data.dataloader(train_data, train_labels, args.batch_size) for batch_idx, (x, y) in enumerate(dataloader): x, y = x.cuda(non_blocking=True), y.cuda(non_blocking=True) model.zero_grad() outputs = model(x) loss = loss_function(outputs, y) loss.backward() optimizer.step() torch.cuda.synchronize() duration = time.perf_counter() - start print("Time Taken for Epoch: ", duration) timings.append(duration) if args.dpsgd: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent( args.delta) print( f"Train Epoch: {epoch} \t" # f"Loss: {np.mean(losses):.6f} " f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}") else: print(f"Train Epoch: {epoch} \t Loss: {np.mean(losses):.6f}") if not args.no_save: utils.save_runtimes(__file__.split('.')[0], args, timings) else: print('Not saving!') print('Done!')
def setUp_privacy_engine(self, batch_size): self.privacy_engine = PrivacyEngine( self.model, sample_rate=batch_size / self.DATA_SIZE, alphas=self.ALPHAS, noise_multiplier=0, max_grad_norm=999, ) self.privacy_engine.attach(self.optimizer)
def initialize_dp(model, optimizer, sample_rate, dp_sigma): privacy_engine = PrivacyEngine( model, sample_rate = sample_rate * N_ACCUMULATION_STEPS, # epochs = EPOCHS, # target_epsilon = EPSILON, target_delta = DELTA, noise_multiplier = dp_sigma, max_grad_norm = MAX_GRAD_NORM, ) privacy_engine.attach(optimizer)
def test_privacy_engine_class_example(self): # IMPORTANT: When changing this code you also need to update # the docstring for opacus.privacy_engine.PrivacyEngine model = torch.nn.Linear(16, 32) # An example model optimizer = torch.optim.SGD(model.parameters(), lr=0.05) privacy_engine = PrivacyEngine( model, sample_rate=0.01, noise_multiplier=1.3, max_grad_norm=1.0, ) privacy_engine.attach(optimizer) # That's it! Now it's business as usual.
def on_train_start(self) -> None: if self.enable_dp: self.privacy_engine = PrivacyEngine( self, sample_rate=self.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self.sigma, max_grad_norm=self.max_per_sample_grad_norm, secure_rng=self.secure_rng, ) optimizer = self.optimizers() self.privacy_engine.attach(optimizer)
def test_model_validator(self): """ Test that the privacy engine throws on attach if there are unsupported modules """ privacy_engine = PrivacyEngine( models.resnet18(), sample_rate=self.SAMPLE_RATE, alphas=self.ALPHAS, noise_multiplier=1.3, max_grad_norm=1, ) with self.assertRaises(IncompatibleModuleException): privacy_engine.attach(self.private_optimizer)
def test_privacy_engine_to_example(self): # IMPORTANT: When changing this code you also need to update # the docstring for opacus.privacy_engine.PrivacyEngine.to() model = torch.nn.Linear(16, 32) # An example model. Default device is CPU privacy_engine = PrivacyEngine( model, sample_rate=0.01, noise_multiplier=0.8, max_grad_norm=0.5, ) device = "cpu" model.to( device ) # If we move the model to GPU, we should call the to() method of the privacy engine (next line) privacy_engine.to(device)
def setUpOptimizer( self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False ): # sample parameter values optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) optimizer.zero_grad() if privacy_engine: pe = PrivacyEngine( model, sample_rate=data_loader.batch_size / len(data_loader.dataset), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.3, max_grad_norm=1, ) pe.attach(optimizer) return optimizer
def check_condition_rdp_gp(args, radius, sample_rate, steps, alpha, delta, sigma, p1, p2): if radius == 0: return True if args.train_mode == 'DP' or args.train_mode == 'Sub-DP-no-amp': rdp = PrivacyEngine._get_renyi_divergence(sample_rate=sample_rate, noise_multiplier=sigma, alphas=[alpha]) * steps eps = rdp.cpu().detach().numpy()[0] elif args.train_mode == 'Sub-DP': _, eps = rdp_amplify(alpha, args.sub_training_size, args.training_size, sample_rate, sigma) eps *= steps alpha = alpha / radius eps = 3**(np.log2(radius)) * eps if alpha <= 1: return False val = np.e**(-eps) * p1**(alpha/(alpha-1)) - \ (np.e**eps * p2)**((alpha-1)/alpha) if val > 0: return True else: return False
def train_model(net, trainloader, trainset, device, dp): criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=LR) # optimizer = torch.optim.SGD(net.parameters(),lr=.003, momentum=.9) if dp == True: print('adding privacy engine') # if we are training with differential privacy, create the engine privacy_engine = PrivacyEngine(net, batch_size=VIRTUAL_BATCH_SIZE, sample_size=len(trainset), alphas=range(2, 32), noise_multiplier=NOISE_MULTIPLIER, max_grad_norm=MAX_GRAD_NORM) privacy_engine.attach(optimizer) for epoch in range(3): # currently training for 5 epochs print(f'epoch: {epoch}') train(net, trainloader, optimizer, epoch, device, dp)
def demo_basic(rank, weight, world_size, dp): torch.manual_seed(world_size) batch_size = 32 withdp = "with" + ("out " if not dp else "") print( f"Running basic DDP {withdp} differential privacy example on rank {rank}." ) setup(rank, world_size) # create model and move it to GPU with id rank model = ToyModel().to(rank) if dp: ddp_model = DPDDP(model) engine = PrivacyEngine( ddp_model, batch_size=batch_size, sample_size=10 * batch_size, alphas=PRIVACY_ALPHAS, noise_multiplier=0, max_grad_norm=1e8, ) else: ddp_model = DDP(model, device_ids=[rank]) loss_fn = nn.MSELoss() optimizer = optim.SGD(ddp_model.parameters(), lr=1) if dp: engine.attach(optimizer) # if rank == 0: # print(model.net1.weight) optimizer.zero_grad() labels = torch.randn(batch_size, 5).to(rank) outputs = ddp_model(torch.randn(batch_size, 10).to(rank)) loss_fn(outputs, labels).backward() optimizer.step() # if rank == 0: # print(model.net1.weight) weight.copy_(model.net1.weight.data.cpu()) cleanup()
def setUp_init_model( self, private=False, state_dict=None, model=None, **privacy_engine_kwargs ): model = model or SampleConvNet() optimizer = torch.optim.SGD(model.parameters(), lr=self.LR, momentum=0) if state_dict: model.load_state_dict(state_dict) if private: if len(privacy_engine_kwargs) == 0: privacy_engine_kwargs = self.privacy_default_params privacy_engine = PrivacyEngine( model, batch_size=self.BATCH_SIZE, sample_size=self.DATA_SIZE, alphas=self.ALPHAS, **privacy_engine_kwargs, ) privacy_engine.attach(optimizer) return model, optimizer
def __init__(self, model, trainloader, testloader, sample_rate) -> None: super().__init__() self.model = model self.trainloader = trainloader self.testloader = testloader # Create a privacy engine which will add DP and keep track of the privacy budget. self.privacy_engine = PrivacyEngine( self.model, sample_rate=sample_rate, target_delta=PRIVACY_PARAMS["target_delta"], max_grad_norm=PRIVACY_PARAMS["max_grad_norm"], noise_multiplier=PRIVACY_PARAMS["noise_multiplier"], )
def setUpOptimizer( self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False ): # sample parameter values optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) optimizer.zero_grad() if privacy_engine: pe = PrivacyEngine( model, # pyre-fixme[6]: Expected `int` for 2nd param but got `Optional[int]`. batch_size=data_loader.batch_size, # pyre-fixme[6]: Expected `Sized` for 1st param but got # `Dataset[typing.Any]`. sample_size=len(data_loader.dataset), # pyre-fixme[6]: `+` is not supported for operand types # `List[float]` and `List[int]`. alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=1.3, max_grad_norm=1, ) pe.attach(optimizer) return optimizer
def test_sampling_rate_less_than_one(self): """ Tests that when the sampling rate in the privacy engine is more than 1.0 we raise a ValueError """ self.SAMPLE_RATE = 1.5 with self.assertRaises(ValueError): PrivacyEngine( SampleConvNet(), sample_rate=self.SAMPLE_RATE, alphas=self.ALPHAS, noise_multiplier=1.0, max_grad_norm=1.0, )
def test_sampling_rate_less_than_one(self): """ Tests that when the sampling rate in the privacy engine is less than 1.0 we raise a ValueError """ self.BATCH_SIZE = 128 with self.assertRaises(ValueError): PrivacyEngine( SampleConvNet(), batch_size=self.BATCH_SIZE, sample_size=self.DATA_SIZE, alphas=self.ALPHAS, noise_multiplier=1.0, max_grad_norm=1.0, )
def demo_basic(rank, world_size, weight, dp, noise_multiplier=0, max_grad_norm=1e8): # We don't want the 2 GPUs to work on the same examples/labels in parallel torch.manual_seed(rank) batch_size = 32 withdp = "with" + ("out " if not dp else "") print( f"Running basic DDP {withdp} differential privacy example on rank {rank}." ) device = setup_and_get_device(rank, world_size) # create model and move it to GPU with id rank model = ToyModel().to(device) print(f"Initial weight: {model.net1.weight.data}") # Freeze all the parameters except one, to ensure that the noise is the same # (the DDP hook does not browse the layers in the same order as the naive implementation) model.net1.bias.requires_grad = False model.net2.bias.requires_grad = False model.net2.weight.requires_grad = False if dp: ddp_model = DPDDP(model) engine = PrivacyEngine( ddp_model, batch_size=batch_size, sample_size=10 * batch_size, alphas=PRIVACY_ALPHAS, noise_multiplier=noise_multiplier, max_grad_norm=[max_grad_norm], ) engine.random_number_generator = engine._set_seed(0) else: ddp_model = DDP(model, device_ids=[device]) loss_fn = nn.MSELoss() optimizer = optim.SGD(ddp_model.parameters(), lr=1) if dp: engine.attach(optimizer) optimizer.zero_grad() labels = torch.randn(batch_size, 5).to(device) outputs = ddp_model(torch.randn(batch_size, 10).to(device)) loss_fn(outputs, labels).backward() optimizer.step() weight.copy_(model.net1.weight.data.cpu()) cleanup()
def __generate_privacy_engine(self, name=None, **kwargs): if not name: # Default is None privacy_engine = None else: if name == 'DP': privacy_engine = PrivacyEngine( self.model, batch_size=kwargs['virtual_batch_size'], sample_size=kwargs['sample_size'], alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=kwargs['noise_multiplier'], max_grad_norm=kwargs['max_grad_norm'], ) else: raise Exception( 'Unsupported privacy engine name. Supported: Differential Privacy (DP)' ) return privacy_engine
def client(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag): # local train cur_net.train() optimizer = get_optimizer(args, cur_net) loss_func = nn.CrossEntropyLoss() if args.dp: privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng) privacy_engine.attach(optimizer) current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader) if args.dp: privacy_engine.detach() # send the state_dict to current server if args.tphe: client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([encrypt_torch_state_dict(pub_key, current_state_dict), current_loss])) else: client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([current_state_dict, current_loss])) # recv the aggregated state dict from current server aggregated_state_dict = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer)) aggregated_state_dict = pickle.loads(aggregated_state_dict) # parse aggregated state_dict parse_aggregated_state_dict(aggregated_state_dict, cur_net) # recv metadata metadata_list_pkl = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer)) loss_avg, tmp_loss_valid, next_server_rank_id = pickle.loads(metadata_list_pkl) loss_train.append(loss_avg) loss_valid.append(tmp_loss_valid) print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg)) print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid)) if tmp_loss_valid < best_valid_loss: best_valid_loss = tmp_loss_valid best_net_glob = copy.deepcopy(cur_net) print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter)) # update the metadata for server current_server_rank_id = next_server_rank_id if next_server_rank_id == args.rank: server_flag = True print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id)) return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
def demo_ddp_hook(rank, world_size, weight, dp, noise_multiplier, max_grad_norm): torch.manual_seed(rank) batch_size = 32 withdp = "with" + ("out " if not dp else "") print( f"Running DDP hook {withdp} differential privacy example on rank {rank}." ) device = setup_and_get_device(rank, world_size, nonce=1) # create model and move it to GPU with id rank model = ToyModel().to(device) model.net1.bias.requires_grad = False model.net2.bias.requires_grad = False model.net2.weight.requires_grad = False ddp_model = DDP(model, device_ids=[device]) if dp: engine = PrivacyEngine( ddp_model, batch_size=batch_size, sample_size=10 * batch_size, alphas=PRIVACY_ALPHAS, noise_multiplier=noise_multiplier, max_grad_norm=[max_grad_norm], ) engine.random_number_generator = engine._set_seed(0) loss_fn = nn.MSELoss() optimizer = optim.SGD(ddp_model.parameters(), lr=1) if dp: engine.attach(optimizer) optimizer.zero_grad() labels = torch.randn(batch_size, 5).to(device) outputs = ddp_model(torch.randn(batch_size, 10).to(device)) loss_fn(outputs, labels).backward() optimizer.step() weight.copy_(model.net1.weight.data.cpu()) del ddp_model cleanup()
def add_remove_ddp_hooks(rank, world_size, remaining_hooks, dp, noise_multiplier=0, max_grad_norm=1e8): device = setup_and_get_device(rank, world_size, nonce=2) model = ToyModel().to(device) ddp_model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) engine = PrivacyEngine( ddp_model, batch_size=1, sample_size=10, alphas=PRIVACY_ALPHAS, noise_multiplier=noise_multiplier, max_grad_norm=[max_grad_norm], ) optimizer = optim.SGD(ddp_model.parameters(), lr=1) engine.attach(optimizer) remaining_hooks["attached"] = { p: p._backward_hooks for p in engine.module.parameters() if p._backward_hooks } engine.detach() remaining_hooks["detached"] = { p: p._backward_hooks for p in engine.module.parameters() if p._backward_hooks } cleanup()
def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "-sr", "--sample-rate", type=float, default=0.001, metavar="SR", help="sample rate used for batch construction (default: 0.001)", ) parser.add_argument( "--test-batch-size", type=int, default=1024, metavar="TB", help="input batch size for testing (default: 1024)", ) parser.add_argument( "-n", "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 14)", ) parser.add_argument( "-r", "--n-runs", type=int, default=1, metavar="R", help="number of runs to average on (default: 1)", ) parser.add_argument( "--lr", type=float, default=0.1, metavar="LR", help="learning rate (default: .1)", ) parser.add_argument( "--sigma", type=float, default=1.0, metavar="S", help="Noise multiplier (default 1.0)", ) parser.add_argument( "-c", "--max-per-sample-grad_norm", type=float, default=1.0, metavar="C", help="Clip per-sample gradients to this norm (default 1.0)", ) parser.add_argument( "--delta", type=float, default=1e-5, metavar="D", help="Target delta (default: 1e-5)", ) parser.add_argument( "--device", type=str, default="cuda", help="GPU ID for this process (default: 'cuda')", ) parser.add_argument( "--save-model", action="store_true", default=False, help="Save the trained model (default: false)", ) parser.add_argument( "--disable-dp", action="store_true", default=False, help="Disable privacy training and just train with vanilla SGD", ) parser.add_argument( "--secure-rng", action="store_true", default=False, help= "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost", ) parser.add_argument( "--data-root", type=str, default="../mnist", help="Where MNIST is/will be stored", ) args = parser.parse_args() device = torch.device(args.device) kwargs = {"num_workers": 1, "pin_memory": True} if args.secure_rng: try: import torchcsprng as prng except ImportError as e: msg = ( "To use secure RNG, you must install the torchcsprng package! " "Check out the instructions here: https://github.com/pytorch/csprng#installation" ) raise ImportError(msg) from e generator = prng.create_random_device_generator("/dev/urandom") else: generator = None train_dataset = datasets.MNIST( args.data_root, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )), ]), ) train_loader = torch.utils.data.DataLoader( train_dataset, generator=generator, batch_sampler=UniformWithReplacementSampler( num_samples=len(train_dataset), sample_rate=args.sample_rate, generator=generator, ), **kwargs, ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( args.data_root, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )), ]), ), batch_size=args.test_batch_size, shuffle=True, **kwargs, ) run_results = [] for _ in range(args.n_runs): model = SampleConvNet().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0) if not args.disable_dp: privacy_engine = PrivacyEngine( model, sample_rate=args.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=args.sigma, max_grad_norm=args.max_per_sample_grad_norm, secure_rng=args.secure_rng, ) privacy_engine.attach(optimizer) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) run_results.append(test(args, model, device, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) repro_str = ( f"{model.name()}_{args.lr}_{args.sigma}_" f"{args.max_per_sample_grad_norm}_{args.sample_rate}_{args.epochs}") torch.save(run_results, f"run_results_{repro_str}.pt") if args.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
num_classes = y.max() - y.min() + 1 hidden_dim = embed_dim // 2 if args.hidden_dim is None else args.hidden_dim hidden_dims = [hidden_dim] * (args.num_layers - 1) + [1024] model = Net_embedder(embedders, hidden_dims, num_classes) print(model) model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), args.lr) if args.epsilon is not None: max_epsilon, delta, sensitivity = get_priv_params(args.epsilon) privacy_engine = PrivacyEngine(model, batch_size=args.batch_size, sample_size=len(sampler), alphas=list(range(2, 32)), noise_multiplier=args.noise_multiplier, max_grad_norm=args.max_grad_norm, target_delta=delta) privacy_engine.attach(optimizer) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs * len(dataloader_train)) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5) # Training loop best_loss = np.infty best_model = None for i in range(args.num_epochs): loss = train(model, dataloader_train, optimizer, criterion, device, scheduler=scheduler) # loss = train(model, dataloader_train, optimizer, criterion, device, scheduler=None) # scheduler.step(loss)
def main(dataset, augment=False, use_scattering=False, size=None, batch_size=2048, mini_batch_size=256, sample_batches=False, lr=1, optim="SGD", momentum=0.9, nesterov=False, noise_multiplier=1, max_grad_norm=0.1, epochs=100, input_norm=None, num_groups=None, bn_noise_multiplier=None, max_epsilon=None, logdir=None, early_stop=True, seed=0): torch.manual_seed(seed) logger = Logger(logdir) device = get_device() train_data, test_data = get_data(dataset, augment=augment) if use_scattering: scattering, K, _ = get_scatter_transform(dataset) scattering.to(device) else: scattering = None K = 3 if len(train_data.data.shape) == 4 else 1 bs = batch_size assert bs % mini_batch_size == 0 n_acc_steps = bs // mini_batch_size # Batch accumulation and data augmentation with Poisson sampling isn't implemented if sample_batches: assert n_acc_steps == 1 assert not augment train_loader = torch.utils.data.DataLoader(train_data, batch_size=mini_batch_size, shuffle=True, num_workers=1, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=mini_batch_size, shuffle=False, num_workers=1, pin_memory=True) rdp_norm = 0 if input_norm == "BN": # compute noisy data statistics or load from disk if pre-computed save_dir = f"bn_stats/{dataset}" os.makedirs(save_dir, exist_ok=True) bn_stats, rdp_norm = scatter_normalization( train_loader, scattering, K, device, len(train_data), len(train_data), noise_multiplier=bn_noise_multiplier, orders=ORDERS, save_dir=save_dir) model = CNNS[dataset](K, input_norm="BN", bn_stats=bn_stats, size=size) else: model = CNNS[dataset](K, input_norm=input_norm, num_groups=num_groups, size=size) model.to(device) if use_scattering and augment: model = nn.Sequential(scattering, model) train_loader = torch.utils.data.DataLoader(train_data, batch_size=mini_batch_size, shuffle=True, num_workers=1, pin_memory=True, drop_last=True) else: # pre-compute the scattering transform if necessery train_loader = get_scattered_loader(train_loader, scattering, device, drop_last=True, sample_batches=sample_batches) test_loader = get_scattered_loader(test_loader, scattering, device) print(f"model has {get_num_params(model)} parameters") if optim == "SGD": optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov) else: optimizer = torch.optim.Adam(model.parameters(), lr=lr) privacy_engine = PrivacyEngine( model, batch_size=bs, sample_size=len(train_data), alphas=ORDERS, noise_multiplier=noise_multiplier, max_grad_norm=max_grad_norm, ) privacy_engine.attach(optimizer) best_acc = 0 flat_count = 0 results = dict(train_zeon=[], train_xent=[], test_zeon=[], test_xent=[], epoch=[]) for epoch in range(0, epochs): print(f"\nEpoch: {epoch}") train_loss, train_acc = train(model, train_loader, optimizer, n_acc_steps=n_acc_steps) test_loss, test_acc = test(model, test_loader) results['train_zeon'].append(train_acc) results['train_xent'].append(train_loss) results['test_zeon'].append(test_acc) results['test_xent'].append(test_loss) results['epoch'].append(epoch) if noise_multiplier > 0: rdp_sgd = get_renyi_divergence( privacy_engine.sample_rate, privacy_engine.noise_multiplier) * privacy_engine.steps epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd) epsilon2, _ = get_privacy_spent(rdp_sgd) print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})") if max_epsilon is not None and epsilon >= max_epsilon: return else: epsilon = None logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc, epsilon) logger.log_scalar("epsilon/train", epsilon, epoch) # stop if we're not making progress if test_acc > best_acc: best_acc = test_acc flat_count = 0 else: flat_count += 1 if flat_count >= 20 and early_stop: print("plateau...") break # Write to file. record = { **results, **{ 'best_acc': best_acc, 'seed': seed, 'dataset': dataset } } record_path = os.path.join('.', 'record', f'{dataset}-{seed}.json') os.makedirs(os.path.dirname(record_path), exist_ok=True) with open(record_path, 'w') as f: json.dump(record, f, indent=4) import logging logging.warning(f'Wrote to file: {record_path}')