def client(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag): # local train cur_net.train() optimizer = get_optimizer(args, cur_net) loss_func = nn.CrossEntropyLoss() if args.dp: privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng) privacy_engine.attach(optimizer) current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader) if args.dp: privacy_engine.detach() # send the state_dict to current server if args.tphe: client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([encrypt_torch_state_dict(pub_key, current_state_dict), current_loss])) else: client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([current_state_dict, current_loss])) # recv the aggregated state dict from current server aggregated_state_dict = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer)) aggregated_state_dict = pickle.loads(aggregated_state_dict) # parse aggregated state_dict parse_aggregated_state_dict(aggregated_state_dict, cur_net) # recv metadata metadata_list_pkl = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer)) loss_avg, tmp_loss_valid, next_server_rank_id = pickle.loads(metadata_list_pkl) loss_train.append(loss_avg) loss_valid.append(tmp_loss_valid) print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg)) print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid)) if tmp_loss_valid < best_valid_loss: best_valid_loss = tmp_loss_valid best_net_glob = copy.deepcopy(cur_net) print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter)) # update the metadata for server current_server_rank_id = next_server_rank_id if next_server_rank_id == args.rank: server_flag = True print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id)) return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
def add_remove_ddp_hooks(rank, world_size, remaining_hooks, dp, noise_multiplier=0, max_grad_norm=1e8): device = setup_and_get_device(rank, world_size, nonce=2) model = ToyModel().to(device) ddp_model = nn.parallel.DistributedDataParallel(model, device_ids=[device]) engine = PrivacyEngine( ddp_model, batch_size=1, sample_size=10, alphas=PRIVACY_ALPHAS, noise_multiplier=noise_multiplier, max_grad_norm=[max_grad_norm], ) optimizer = optim.SGD(ddp_model.parameters(), lr=1) engine.attach(optimizer) remaining_hooks["attached"] = { p: p._backward_hooks for p in engine.module.parameters() if p._backward_hooks } engine.detach() remaining_hooks["detached"] = { p: p._backward_hooks for p in engine.module.parameters() if p._backward_hooks } cleanup()
class CTGANSynthesizer(BaseSynthesizer): """Conditional Table GAN Synthesizer. This is the core class of the CTGAN project, where the different components are orchestrated together. For more details about the process, please check the [Modeling Tabular data using Conditional GAN](https://arxiv.org/abs/1907.00503) paper. Args: embedding_dim (int): Size of the random sample passed to the Generator. Defaults to 128. generator_dim (tuple or list of ints): Size of the output samples for each one of the Residuals. A Residual Layer will be created for each one of the values provided. Defaults to (256, 256). discriminator_dim (tuple or list of ints): Size of the output samples for each one of the Discriminator Layers. A Linear Layer will be created for each one of the values provided. Defaults to (256, 256). generator_lr (float): Learning rate for the generator. Defaults to 2e-4. generator_decay (float): Generator weight decay for the Adam Optimizer. Defaults to 1e-6. discriminator_lr (float): Learning rate for the discriminator. Defaults to 2e-4. discriminator_decay (float): Discriminator weight decay for the Adam Optimizer. Defaults to 1e-6. batch_size (int): Number of data samples to process in each step. discriminator_steps (int): Number of discriminator updates to do for each generator update. From the WGAN paper: https://arxiv.org/abs/1701.07875. WGAN paper default is 5. Default used is 1 to match original CTGAN implementation. log_frequency (boolean): Whether to use log frequency of categorical levels in conditional sampling. Defaults to ``True``. verbose (boolean): Whether to have print statements for progress results. Defaults to ``False``. epochs (int): Number of training epochs. Defaults to 300. """ def __init__(self, embedding_dim=128, generator_dim=(256, 256), discriminator_dim=(256, 256), generator_lr=2e-4, generator_decay=1e-6, discriminator_lr=2e-4, discriminator_decay=0, pack=1, batch_size=500, discriminator_steps=1, log_frequency=True, verbose=False, epochs=300, epsilon=10, delta=1e-5, noise_multiplier=2, max_grad_norm=1, dp=True): assert batch_size % 2 == 0 self._embedding_dim = embedding_dim self._generator_dim = generator_dim self._discriminator_dim = discriminator_dim self._generator_lr = generator_lr self._generator_decay = generator_decay self._discriminator_lr = discriminator_lr self._discriminator_decay = discriminator_decay self._pack = pack #add this option to original CTGAN for swagness self._batch_size = batch_size self._discriminator_steps = discriminator_steps self._log_frequency = log_frequency self._verbose = verbose self._epochs = epochs self._epsilon = epsilon self._device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.trained_epochs = 0 self.trained_epsilon = 0 self._delta = delta self._noise_multiplier = noise_multiplier self.max_grad_norm = max_grad_norm self._dp = dp opacus.supported_layers_grad_samplers._create_or_extend_grad_sample = _custom_create_or_extend_grad_sample @staticmethod def _gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1): """Deals with the instability of the gumbel_softmax for older versions of torch. For more details about the issue: https://drive.google.com/file/d/1AA5wPfZ1kquaRtVruCd6BiYZGcDeNxyP/view?usp=sharing Args: logits: […, num_features] unnormalized log probabilities tau: non-negative scalar temperature hard: if True, the returned samples will be discretized as one-hot vectors, but will be differentiated as if it is the soft sample in autograd dim (int): a dimension along which softmax will be computed. Default: -1. Returns: Sampled tensor of same shape as logits from the Gumbel-Softmax distribution. """ if version.parse(torch.__version__) < version.parse("1.2.0"): for i in range(10): transformed = functional.gumbel_softmax(logits, tau=tau, hard=hard, eps=eps, dim=dim) if not torch.isnan(transformed).any(): return transformed raise ValueError("gumbel_softmax returning NaN.") return functional.gumbel_softmax(logits, tau=tau, hard=hard, eps=eps, dim=dim) def _apply_activate(self, data): """Apply proper activation function to the output of the generator.""" data_t = [] st = 0 for column_info in self._transformer.output_info_list: for span_info in column_info: if span_info.activation_fn == 'tanh': ed = st + span_info.dim data_t.append(torch.tanh(data[:, st:ed])) st = ed elif span_info.activation_fn == 'softmax': ed = st + span_info.dim transformed = self._gumbel_softmax(data[:, st:ed], tau=0.2) data_t.append(transformed) st = ed else: assert 0 return torch.cat(data_t, dim=1) def _cond_loss(self, data, c, m): """Compute the cross entropy loss on the fixed discrete column.""" loss = [] st = 0 st_c = 0 for column_info in self._transformer.output_info_list: for span_info in column_info: if len(column_info ) != 1 or span_info.activation_fn != "softmax": # not discrete column st += span_info.dim else: ed = st + span_info.dim ed_c = st_c + span_info.dim tmp = functional.cross_entropy(data[:, st:ed], torch.argmax(c[:, st_c:ed_c], dim=1), reduction='none') loss.append(tmp) st = ed st_c = ed_c loss = torch.stack(loss, dim=1) return (loss * m).sum() / data.size()[0] def _validate_discrete_columns(self, train_data, discrete_columns): """Check whether ``discrete_columns`` exists in ``train_data``. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. """ if isinstance(train_data, pd.DataFrame): invalid_columns = set(discrete_columns) - set(train_data.columns) elif isinstance(train_data, np.ndarray): invalid_columns = [] for column in discrete_columns: if column < 0 or column >= train_data.shape[1]: invalid_columns.append(column) else: raise TypeError( '``train_data`` should be either pd.DataFrame or np.array.') if invalid_columns: raise ValueError( 'Invalid columns found: {}'.format(invalid_columns)) def fit(self, train_data, discrete_columns=tuple(), epochs=None, epsilon=None): """Fit the CTGAN Synthesizer models to the training data. Args: train_data (numpy.ndarray or pandas.DataFrame): Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame. discrete_columns (list-like): List of discrete columns to be used to generate the Conditional Vector. If ``train_data`` is a Numpy array, this list should contain the integer indices of the columns. Otherwise, if it is a ``pandas.DataFrame``, this list should contain the column names. """ self._validate_discrete_columns(train_data, discrete_columns) if epochs is None: epochs = self._epochs if epsilon is None: epsilon = self._epsilon if not self._dp: self.trained_epsilon = float("inf") self._transformer = DataTransformer() self._transformer.fit(train_data, discrete_columns) train_data = self._transformer.transform(train_data) self._data_sampler = DataSampler(train_data, self._transformer.output_info_list, self._log_frequency) data_dim = self._transformer.output_dimensions self._generator = Generator( self._embedding_dim + self._data_sampler.dim_cond_vec(), self._generator_dim, data_dim).to(self._device) self._discriminator = Discriminator( data_dim + self._data_sampler.dim_cond_vec(), self._discriminator_dim, self._pack).to(self._device) self._optimizerG = optim.Adam(self._generator.parameters(), lr=self._generator_lr, betas=(0.5, 0.9), weight_decay=self._generator_decay) self._optimizerD = optim.Adam(self._discriminator.parameters(), lr=self._discriminator_lr, betas=(0.5, 0.9), weight_decay=self._discriminator_decay) if self._dp: self._privacy_engine = PrivacyEngine( self._discriminator, self._batch_size / self._pack, len(train_data), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self._noise_multiplier, max_grad_norm=self.max_grad_norm, clip_per_layer=True, loss_reduction="sum", ) self._privacy_engine.attach(self._optimizerD) mean = torch.zeros(self._batch_size, self._embedding_dim, device=self._device) std = mean + 1 one = torch.tensor(1, dtype=torch.float).to(self._device) mone = one * -1 steps_per_epoch = max(len(train_data) // self._batch_size, 1) for i in range(epochs): self.trained_epochs += 1 if self._dp: if self.trained_epsilon >= self._epsilon: print( "Privacy budget of {:.2f} exausthed. Please specify an higher one in fit() to train more or disable differential privacy." .format(self._epsilon)) return for id_ in range(steps_per_epoch): for n in range(self._discriminator_steps): fakez = torch.normal(mean=mean, std=std) condvec = self._data_sampler.sample_condvec( self._batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None real = self._data_sampler.sample_data( self._batch_size, col, opt) else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self._device) m1 = torch.from_numpy(m1).to(self._device) fakez = torch.cat([fakez, c1], dim=1) perm = np.arange(self._batch_size) np.random.shuffle(perm) real = self._data_sampler.sample_data( self._batch_size, col[perm], opt[perm]) c2 = c1[perm] fake = self._generator(fakez) fakeact = self._apply_activate(fake) real = torch.from_numpy(real.astype('float32')).to( self._device) if c1 is not None: fake_cat = torch.cat([fakeact, c1], dim=1) real_cat = torch.cat([real, c2], dim=1) else: real_cat = real fake_cat = fake self._optimizerD.zero_grad() y_fake = self._discriminator(fake_cat) y_real = self._discriminator(real_cat) if not self._dp: pen = self._discriminator.calc_gradient_penalty( real_cat, fake_cat, self._device) pen.backward(retain_graph=True) loss_d = -torch.mean(y_real) + torch.mean(y_fake) loss_d.backward() self._optimizerD.step() fakez = torch.normal(mean=mean, std=std) condvec = self._data_sampler.sample_condvec(self._batch_size) if condvec is None: c1, m1, col, opt = None, None, None, None else: c1, m1, col, opt = condvec c1 = torch.from_numpy(c1).to(self._device) m1 = torch.from_numpy(m1).to(self._device) fakez = torch.cat([fakez, c1], dim=1) fake = self._generator(fakez) fakeact = self._apply_activate(fake) if c1 is not None: y_fake = self._discriminator( torch.cat([fakeact, c1], dim=1)) else: y_fake = self._discriminator(fakeact) if condvec is None: cross_entropy = 0 else: cross_entropy = self._cond_loss(fake, c1, m1) loss_g = -torch.mean(y_fake) + cross_entropy self._optimizerG.zero_grad() loss_g.backward() self._optimizerG.step() if self._dp: for p in self._discriminator.parameters(): if hasattr(p, "grad_sample"): del p.grad_sample self.trained_epsilon, best_alpha = self._optimizerD.privacy_engine.get_privacy_spent( self._delta) if self.trained_epsilon >= epsilon: print( "Privacy budget of {:.2f} exausthed, training halted. Best alpha: {:.2f}" .format(epsilon, best_alpha)) return if self._verbose: print( f"Epoch {i+1}, epslion {self.trained_epsilon: .2f}, Loss G: {loss_g.detach().cpu(): .4f}, " f"Loss D: {loss_d.detach().cpu(): .4f}", flush=True) if self._dp: self._privacy_engine.detach() def sample(self, n, condition_column=None, condition_value=None): """Sample data similar to the training data. Choosing a condition_column and condition_value will increase the probability of the discrete condition_value happening in the condition_column. Args: n (int): Number of rows to sample. condition_column (string): Name of a discrete column. condition_value (string): Name of the category in the condition_column which we wish to increase the probability of happening. Returns: numpy.ndarray or pandas.DataFrame """ if condition_column is not None and condition_value is not None: condition_info = self._transformer.convert_column_name_value_to_id( condition_column, condition_value) global_condition_vec = self._data_sampler.generate_cond_from_condition_column_info( condition_info, self._batch_size) else: global_condition_vec = None steps = n // self._batch_size + 1 data = [] for i in range(steps): mean = torch.zeros(self._batch_size, self._embedding_dim) std = mean + 1 fakez = torch.normal(mean=mean, std=std).to(self._device) if global_condition_vec is not None: condvec = global_condition_vec.copy() else: condvec = self._data_sampler.sample_original_condvec( self._batch_size) if condvec is None: pass else: c1 = condvec c1 = torch.from_numpy(c1).to(self._device) fakez = torch.cat([fakez, c1], dim=1) fake = self._generator(fakez) fakeact = self._apply_activate(fake) data.append(fakeact.detach().cpu().numpy()) data = np.concatenate(data, axis=0) data = data[:n] return self._transformer.inverse_transform(data) def set_device(self, device): self._device = device if hasattr(self, '_generator'): self._generator.to(self._device) if hasattr(self, '_discriminator'): self._discriminator.to(self._device)
print('Round{:3d}, Average loss {:.3f}'.format(iter, loss_avg)) loss_train.append(loss_avg) net_glob.eval() acc_valid, tmp_loss_valid = test_bank(net_glob, valid_loader, args) print('Round{:3d}, Validation loss {:.3f}'.format( iter, tmp_loss_valid)) loss_valid.append(tmp_loss_valid) if tmp_loss_valid < best_valid_loss: best_valid_loss = tmp_loss_valid best_net_glob = copy.deepcopy(net_glob) print('SAVE BEST MODEL AT EPOCH {}'.format(iter)) net_glob.train() if args.dp: privacy_engine.detach() torch.save(best_net_glob, save_prefix + '_best.pt') torch.save(net_glob, save_prefix + '_final.pt') # plot loss curve plt.figure() plt.plot(range(len(loss_train)), loss_train, 'r', label='train_loss') plt.plot(range(len(loss_valid)), loss_valid, 'b', label='valid_loss') plt.ylabel('loss') plt.xlabel('epoch') plt.grid(True) plt.legend(loc=0) plt.savefig(save_prefix + '.png') # testing
def RPC_train_test(data, model, parameters, device, log_interval, local_dp, return_params, epoch, delta, if_test): """ :param data: :param model: :param parameters: :param device: :param log_interval: :param local_dp: :param return_params: :param epoch: :param delta: :param if_test: :return: """ train = data train_batch_size = 64 test_batch_size = 64 X = (train.iloc[:, 1:].values).astype('float32') Y = train.iloc[:, 0].values print(X.shape) features_train, features_test, targets_train, targets_test = train_test_split( X, Y, test_size=0.2, random_state=42) X_train = torch.from_numpy(features_train / 255.0) X_test = torch.from_numpy(features_test / 255.0) Y_train = torch.from_numpy(targets_train).type(torch.LongTensor) Y_test = torch.from_numpy(targets_test).type(torch.LongTensor) train = torch.utils.data.TensorDataset(X_train, Y_train) test = torch.utils.data.TensorDataset(X_test, Y_test) train_loader = torch.utils.data.DataLoader(train, batch_size=train_batch_size, shuffle=False) test_loader = torch.utils.data.DataLoader(test, batch_size=test_batch_size, shuffle=False) # if input is train.pt # train_loader = data test_accuracy = 0 if if_test: model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: # Send the local and target to the device (cpu/gpu) the model is at data, target = data.to(device), target.to(device) # Run the model on the local batch_size = data.shape[0] # print(batch_size) data = data.reshape(batch_size, 28, 28) data = data.unsqueeze(1) output = model(data) # Calculate the loss test_loss += F.nll_loss(output, target, reduction='sum').item() # Check whether prediction was correct pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' .format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) test_accuracy = 100. * correct / len(test_loader.dataset) else: learning_rate = 0.01 # if local_dp == True: # initializing optimizer and scheduler optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.5) if local_dp: privacy_engine = PrivacyEngine( model, batch_size=64, sample_size=60000, alphas=range(2, 32), noise_multiplier=1.3, max_grad_norm=1.0, ) privacy_engine.attach(optimizer) model.train() for epoch in range(1, epoch + 1): for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() batch_size = data.shape[0] # print(batch_size) data = data.reshape(batch_size, 28, 28) data = data.unsqueeze(1) # print(data.shape) # print(data.type()) # print(target.type()) output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'. format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) if local_dp: epsilon, alpha = optimizer.privacy_engine.get_privacy_spent( delta) print("\nEpsilon {}, best alpha {}".format(epsilon, alpha)) # detach privacy engine from optimizer. Multiple attachments lead to error privacy_engine.detach() if return_params: for parameters in model.parameters( ): # model.parameters() but should be the same since it's the argument return { 'params': parameters, 'model': model, 'test_accuracy': test_accuracy } # trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) # trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, # shuffle=True, num_workers=2) # # testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) # testloader = torch.utils.data.DataLoader(testset, batch_size=4, # shuffle=False, num_workers=2)
class LitSampleConvNetClassifier(pl.LightningModule): def __init__( self, lr: float = 0.1, enable_dp: bool = True, delta: float = 1e-5, sample_rate: float = 0.001, sigma: float = 1.0, max_per_sample_grad_norm: float = 1.0, secure_rng: bool = False, ): """A simple conv-net for classifying MNIST with differential privacy Args: lr: Learning rate enable_dp: Enables training with privacy guarantees using Opacus (if True), vanilla SGD otherwise delta: Target delta for which (eps, delta)-DP is computed sample_rate: Sample rate used for batch construction sigma: Noise multiplier max_per_sample_grad_norm: Clip per-sample gradients to this norm secure_rng: Use secure random number generator """ super().__init__() # Hyper-parameters self.lr = lr self.enable_dp = enable_dp self.delta = delta self.sample_rate = sample_rate self.sigma = sigma self.max_per_sample_grad_norm = max_per_sample_grad_norm self.secure_rng = secure_rng # Parameters self.conv1 = nn.Conv2d(1, 16, 8, 2, padding=3) self.conv2 = nn.Conv2d(16, 32, 4, 2) self.fc1 = nn.Linear(32 * 4 * 4, 32) self.fc2 = nn.Linear(32, 10) # Privacy engine self.privacy_engine = None # Created before training # Metrics self.test_accuracy = torchmetrics.Accuracy() def forward(self, x): # x of shape [B, 1, 28, 28] x = F.relu(self.conv1(x)) # -> [B, 16, 14, 14] x = F.max_pool2d(x, 2, 1) # -> [B, 16, 13, 13] x = F.relu(self.conv2(x)) # -> [B, 32, 5, 5] x = F.max_pool2d(x, 2, 1) # -> [B, 32, 4, 4] x = x.view(-1, 32 * 4 * 4) # -> [B, 512] x = F.relu(self.fc1(x)) # -> [B, 32] x = self.fc2(x) # -> [B, 10] return x def configure_optimizers(self): optimizer = optim.SGD(self.parameters(), lr=self.lr, momentum=0) return optimizer def training_step(self, batch, batch_idx): data, target = batch output = self(data) loss = F.cross_entropy(output, target) self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=True) return loss def test_step(self, batch, batch_idx): data, target = batch output = self(data) loss = F.cross_entropy(output, target) self.test_accuracy(output, target) self.log("test_loss", loss, on_step=False, on_epoch=True, prog_bar=True) self.log("test_accuracy", self.test_accuracy, on_step=False, on_epoch=True) return loss # Adding differential privacy learning def on_train_start(self) -> None: if self.enable_dp: self.privacy_engine = PrivacyEngine( self, sample_rate=self.sample_rate, alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=self.sigma, max_grad_norm=self.max_per_sample_grad_norm, secure_rng=self.secure_rng, ) optimizer = self.optimizers() self.privacy_engine.attach(optimizer) def on_train_epoch_end(self): if self.enable_dp: epsilon, best_alpha = self.privacy_engine.get_privacy_spent( self.delta) # Privacy spent: (epsilon, delta) for alpha self.log("epsilon", epsilon, on_epoch=True, prog_bar=True) self.log("alpha", best_alpha, on_epoch=True, prog_bar=True) def on_train_end(self): if self.enable_dp: self.privacy_engine.detach()
def server(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag): loss_locals = [] w_state_dict_locals = [] # local train cur_net.train() optimizer = get_optimizer(args, cur_net) loss_func = nn.CrossEntropyLoss() if args.dp: privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng) privacy_engine.attach(optimizer) current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader) if args.dp: privacy_engine.detach() loss_locals.append(current_loss) if args.tphe: w_state_dict_locals.append(encrypt_torch_state_dict(pub_key, current_state_dict)) else: w_state_dict_locals.append(current_state_dict) # receive from others loop = True while loop: # Get the list sockets which are ready to be read through select rList, wList, error_sockets = select.select(server_connection_list,[],[]) for sockfd in rList: tmp_pkl_data = sockfd.recv(int(args.buffer)) # 760586945 tmp_state_dict, tmp_loss = pickle.loads(tmp_pkl_data) w_state_dict_locals.append(tmp_state_dict) loss_locals.append(tmp_loss) if len(w_state_dict_locals) == args.num_users: loop = False break # aggregate weight state_dicts aggregated_state_dict = state_dict_aggregation(w_state_dict_locals) # distribute the aggregated weight state_dict send_aggregated_weight_state_dict_to_all(aggregated_state_dict) # parse aggregated state_dict parse_aggregated_state_dict(aggregated_state_dict, cur_net) loss_avg = sum(loss_locals) / len(loss_locals) print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg)) loss_train.append(loss_avg) cur_net.eval() acc_valid, tmp_loss_valid = test_bank(cur_net, valid_loader, args) print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid)) loss_valid.append(tmp_loss_valid) if tmp_loss_valid < best_valid_loss: best_valid_loss = tmp_loss_valid best_net_glob = copy.deepcopy(cur_net) print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter)) # pick the server for next epoch next_server_rank_id = random.randint(0, args.num_users-1) # distribute metadata send_metadata_to_all(loss_avg, tmp_loss_valid, next_server_rank_id) if next_server_rank_id != args.rank: server_flag = False current_server_rank_id = next_server_rank_id print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id)) return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
def train(self, data, categorical_columns=None, ordinal_columns=None, update_epsilon=None, verbose=False, mlflow=False): if update_epsilon: self.epsilon = update_epsilon if isinstance(data, pd.DataFrame): for col in data.columns: data[col] = pd.to_numeric(data[col], errors='ignore') self.pd_cols = data.columns self.pd_index = data.pd_index data = data.to_numpy() elif not isinstance(data, np.ndarray): raise ValueError("Data must be a numpy array or pandas dataframe") dataset = TensorDataset( torch.from_numpy(data.astype('float32')).to(self.device)) dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, drop_last=True) if not hasattr(self, "generator"): self.generator = Generator(self.latent_dim, data.shape[1], binary=self.binary).to(self.device) if not hasattr(self, "discriminator"): self.discriminator = Discriminator(data.shape[1]).to(self.device) self.optimizer_d = optim.Adam(self.discriminator.parameters(), lr=4e-4, betas=(0.5, 0.9)) if hasattr(self, "state_dict"): self.optimizer_d.load_state_dict(self.state_dict) if not hasattr(self, "privacy_engine"): privacy_engine = PrivacyEngine( self.discriminator, batch_size=self.batch_size, sample_size=len(data), alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), noise_multiplier=3.5, max_grad_norm=1.0, clip_per_layer=True).to(self.device) else: privacy_engine = self.privacy_engine privacy_engine.attach(self.optimizer_d) if hasattr(self, "privacy_engine"): epsilon, best_alpha = self.optimizer_d.privacy_engine.get_privacy_spent( self.delta) else: epsilon = 0 if not hasattr(self, "optimizer_g"): self.optimizer_g = optim.Adam(self.generator.parameters(), lr=1e-4) criterion = nn.BCELoss() for epoch in range(self.epochs): if self.epsilon < epsilon: break for i, data in enumerate(dataloader): self.discriminator.zero_grad() real_data = data[0].to(self.device) # train with fake data noise = torch.randn(self.batch_size, self.latent_dim, 1, 1, device=self.device) noise = noise.view(-1, self.latent_dim) fake_data = self.generator(noise) label_fake = torch.full((self.batch_size, 1), 0, dtype=torch.float, device=self.device) output = self.discriminator(fake_data.detach()) loss_d_fake = criterion(output, label_fake) loss_d_fake.backward() self.optimizer_d.step() # train with real data label_true = torch.full((self.batch_size, 1), 1, dtype=torch.float, device=self.device) output = self.discriminator(real_data.float()) loss_d_real = criterion(output, label_true) loss_d_real.backward() self.optimizer_d.step() loss_d = loss_d_real + loss_d_fake max_grad_norm = [] for p in self.discriminator.parameters(): param_norm = p.grad.data.norm(2).item() max_grad_norm.append(param_norm) privacy_engine.max_grad_norm = max_grad_norm # train generator self.generator.zero_grad() label_g = torch.full((self.batch_size, 1), 1, dtype=torch.float, device=self.device) output_g = self.discriminator(fake_data) loss_g = criterion(output_g, label_g) loss_g.backward() self.optimizer_g.step() # manually clear gradients for p in self.discriminator.parameters(): if hasattr(p, "grad_sample"): del p.grad_sample # autograd_grad_sample.clear_backprops(discriminator) if self.delta is None: self.delta = 1 / data.shape[0] eps, best_alpha = self.optimizer_d.privacy_engine.get_privacy_spent( self.delta) self.alpha = best_alpha if (verbose): print('eps: {:f} \t alpha: {:f} \t G: {:f} \t D: {:f}'.format( eps, best_alpha, loss_g.detach().cpu(), loss_d.detach().cpu())) if (mlflow): import mlflow mlflow.log_metric("loss_g", float(loss_g.detach().cpu()), step=epoch) mlflow.log_metric("loss_d", float(loss_d.detach().cpu()), step=epoch) mlflow.log_metric("epsilon", float(eps), step=epoch) if self.epsilon < eps: break privacy_engine.detach() self.state_dict = self.optimizer_d.state_dict() self.privacy_engine = privacy_engine