Exemple #1
0
def client(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag):
	# local train
	cur_net.train()
	optimizer = get_optimizer(args, cur_net)
	loss_func = nn.CrossEntropyLoss()
	if args.dp:
		privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader),
										alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
										noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng)
		privacy_engine.attach(optimizer)
	current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader)

	if args.dp:
		privacy_engine.detach()

	# send the state_dict to current server
	if args.tphe:
		client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([encrypt_torch_state_dict(pub_key, current_state_dict), current_loss]))
	else:
		client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([current_state_dict, current_loss]))

	# recv the aggregated state dict from current server
	aggregated_state_dict = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer))
	aggregated_state_dict = pickle.loads(aggregated_state_dict)

	# parse aggregated state_dict
	parse_aggregated_state_dict(aggregated_state_dict, cur_net)

	# recv metadata
	metadata_list_pkl = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer))
	loss_avg, tmp_loss_valid, next_server_rank_id = pickle.loads(metadata_list_pkl)
	loss_train.append(loss_avg)
	loss_valid.append(tmp_loss_valid)
	print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg))
	print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid))
	if tmp_loss_valid < best_valid_loss:
		best_valid_loss = tmp_loss_valid
		best_net_glob = copy.deepcopy(cur_net)
		print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter))

	# update the metadata for server
	current_server_rank_id = next_server_rank_id
	if next_server_rank_id == args.rank:
		server_flag = True

	print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id))

	return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
def add_remove_ddp_hooks(rank,
                         world_size,
                         remaining_hooks,
                         dp,
                         noise_multiplier=0,
                         max_grad_norm=1e8):
    device = setup_and_get_device(rank, world_size, nonce=2)

    model = ToyModel().to(device)
    ddp_model = nn.parallel.DistributedDataParallel(model, device_ids=[device])

    engine = PrivacyEngine(
        ddp_model,
        batch_size=1,
        sample_size=10,
        alphas=PRIVACY_ALPHAS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=[max_grad_norm],
    )

    optimizer = optim.SGD(ddp_model.parameters(), lr=1)

    engine.attach(optimizer)

    remaining_hooks["attached"] = {
        p: p._backward_hooks
        for p in engine.module.parameters() if p._backward_hooks
    }
    engine.detach()

    remaining_hooks["detached"] = {
        p: p._backward_hooks
        for p in engine.module.parameters() if p._backward_hooks
    }

    cleanup()
Exemple #3
0
class CTGANSynthesizer(BaseSynthesizer):
    """Conditional Table GAN Synthesizer.

    This is the core class of the CTGAN project, where the different components
    are orchestrated together.
    For more details about the process, please check the [Modeling Tabular data using
    Conditional GAN](https://arxiv.org/abs/1907.00503) paper.
    Args:
        embedding_dim (int):
            Size of the random sample passed to the Generator. Defaults to 128.
        generator_dim (tuple or list of ints):
            Size of the output samples for each one of the Residuals. A Residual Layer
            will be created for each one of the values provided. Defaults to (256, 256).
        discriminator_dim (tuple or list of ints):
            Size of the output samples for each one of the Discriminator Layers. A Linear Layer
            will be created for each one of the values provided. Defaults to (256, 256).
        generator_lr (float):
            Learning rate for the generator. Defaults to 2e-4.
        generator_decay (float):
            Generator weight decay for the Adam Optimizer. Defaults to 1e-6.
        discriminator_lr (float):
            Learning rate for the discriminator. Defaults to 2e-4.
        discriminator_decay (float):
            Discriminator weight decay for the Adam Optimizer. Defaults to 1e-6.
        batch_size (int):
            Number of data samples to process in each step.
        discriminator_steps (int):
            Number of discriminator updates to do for each generator update.
            From the WGAN paper: https://arxiv.org/abs/1701.07875. WGAN paper
            default is 5. Default used is 1 to match original CTGAN implementation.
        log_frequency (boolean):
            Whether to use log frequency of categorical levels in conditional
            sampling. Defaults to ``True``.
        verbose (boolean):
            Whether to have print statements for progress results. Defaults to ``False``.
        epochs (int):
            Number of training epochs. Defaults to 300.
    """
    def __init__(self,
                 embedding_dim=128,
                 generator_dim=(256, 256),
                 discriminator_dim=(256, 256),
                 generator_lr=2e-4,
                 generator_decay=1e-6,
                 discriminator_lr=2e-4,
                 discriminator_decay=0,
                 pack=1,
                 batch_size=500,
                 discriminator_steps=1,
                 log_frequency=True,
                 verbose=False,
                 epochs=300,
                 epsilon=10,
                 delta=1e-5,
                 noise_multiplier=2,
                 max_grad_norm=1,
                 dp=True):

        assert batch_size % 2 == 0

        self._embedding_dim = embedding_dim
        self._generator_dim = generator_dim
        self._discriminator_dim = discriminator_dim

        self._generator_lr = generator_lr
        self._generator_decay = generator_decay
        self._discriminator_lr = discriminator_lr
        self._discriminator_decay = discriminator_decay

        self._pack = pack  #add this option to original CTGAN for swagness
        self._batch_size = batch_size
        self._discriminator_steps = discriminator_steps
        self._log_frequency = log_frequency
        self._verbose = verbose
        self._epochs = epochs
        self._epsilon = epsilon
        self._device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.trained_epochs = 0
        self.trained_epsilon = 0
        self._delta = delta
        self._noise_multiplier = noise_multiplier
        self.max_grad_norm = max_grad_norm
        self._dp = dp
        opacus.supported_layers_grad_samplers._create_or_extend_grad_sample = _custom_create_or_extend_grad_sample

    @staticmethod
    def _gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
        """Deals with the instability of the gumbel_softmax for older versions of torch.

        For more details about the issue:
        https://drive.google.com/file/d/1AA5wPfZ1kquaRtVruCd6BiYZGcDeNxyP/view?usp=sharing
        Args:
            logits:
                […, num_features] unnormalized log probabilities
            tau:
                non-negative scalar temperature
            hard:
                if True, the returned samples will be discretized as one-hot vectors,
                but will be differentiated as if it is the soft sample in autograd
            dim (int):
                a dimension along which softmax will be computed. Default: -1.
        Returns:
            Sampled tensor of same shape as logits from the Gumbel-Softmax distribution.
        """
        if version.parse(torch.__version__) < version.parse("1.2.0"):
            for i in range(10):
                transformed = functional.gumbel_softmax(logits,
                                                        tau=tau,
                                                        hard=hard,
                                                        eps=eps,
                                                        dim=dim)
                if not torch.isnan(transformed).any():
                    return transformed
            raise ValueError("gumbel_softmax returning NaN.")

        return functional.gumbel_softmax(logits,
                                         tau=tau,
                                         hard=hard,
                                         eps=eps,
                                         dim=dim)

    def _apply_activate(self, data):
        """Apply proper activation function to the output of the generator."""
        data_t = []
        st = 0
        for column_info in self._transformer.output_info_list:
            for span_info in column_info:
                if span_info.activation_fn == 'tanh':
                    ed = st + span_info.dim
                    data_t.append(torch.tanh(data[:, st:ed]))
                    st = ed
                elif span_info.activation_fn == 'softmax':
                    ed = st + span_info.dim
                    transformed = self._gumbel_softmax(data[:, st:ed], tau=0.2)
                    data_t.append(transformed)
                    st = ed
                else:
                    assert 0

        return torch.cat(data_t, dim=1)

    def _cond_loss(self, data, c, m):
        """Compute the cross entropy loss on the fixed discrete column."""
        loss = []
        st = 0
        st_c = 0
        for column_info in self._transformer.output_info_list:
            for span_info in column_info:
                if len(column_info
                       ) != 1 or span_info.activation_fn != "softmax":
                    # not discrete column
                    st += span_info.dim
                else:
                    ed = st + span_info.dim
                    ed_c = st_c + span_info.dim
                    tmp = functional.cross_entropy(data[:, st:ed],
                                                   torch.argmax(c[:,
                                                                  st_c:ed_c],
                                                                dim=1),
                                                   reduction='none')
                    loss.append(tmp)
                    st = ed
                    st_c = ed_c

        loss = torch.stack(loss, dim=1)

        return (loss * m).sum() / data.size()[0]

    def _validate_discrete_columns(self, train_data, discrete_columns):
        """Check whether ``discrete_columns`` exists in ``train_data``.

        Args:
            train_data (numpy.ndarray or pandas.DataFrame):
                Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame.
            discrete_columns (list-like):
                List of discrete columns to be used to generate the Conditional
                Vector. If ``train_data`` is a Numpy array, this list should
                contain the integer indices of the columns. Otherwise, if it is
                a ``pandas.DataFrame``, this list should contain the column names.
        """
        if isinstance(train_data, pd.DataFrame):
            invalid_columns = set(discrete_columns) - set(train_data.columns)
        elif isinstance(train_data, np.ndarray):
            invalid_columns = []
            for column in discrete_columns:
                if column < 0 or column >= train_data.shape[1]:
                    invalid_columns.append(column)
        else:
            raise TypeError(
                '``train_data`` should be either pd.DataFrame or np.array.')

        if invalid_columns:
            raise ValueError(
                'Invalid columns found: {}'.format(invalid_columns))

    def fit(self,
            train_data,
            discrete_columns=tuple(),
            epochs=None,
            epsilon=None):
        """Fit the CTGAN Synthesizer models to the training data.

        Args:
            train_data (numpy.ndarray or pandas.DataFrame):
                Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame.
            discrete_columns (list-like):
                List of discrete columns to be used to generate the Conditional
                Vector. If ``train_data`` is a Numpy array, this list should
                contain the integer indices of the columns. Otherwise, if it is
                a ``pandas.DataFrame``, this list should contain the column names.
        """
        self._validate_discrete_columns(train_data, discrete_columns)

        if epochs is None:
            epochs = self._epochs
        if epsilon is None:
            epsilon = self._epsilon
        if not self._dp:
            self.trained_epsilon = float("inf")

        self._transformer = DataTransformer()
        self._transformer.fit(train_data, discrete_columns)

        train_data = self._transformer.transform(train_data)

        self._data_sampler = DataSampler(train_data,
                                         self._transformer.output_info_list,
                                         self._log_frequency)

        data_dim = self._transformer.output_dimensions

        self._generator = Generator(
            self._embedding_dim + self._data_sampler.dim_cond_vec(),
            self._generator_dim, data_dim).to(self._device)

        self._discriminator = Discriminator(
            data_dim + self._data_sampler.dim_cond_vec(),
            self._discriminator_dim, self._pack).to(self._device)

        self._optimizerG = optim.Adam(self._generator.parameters(),
                                      lr=self._generator_lr,
                                      betas=(0.5, 0.9),
                                      weight_decay=self._generator_decay)

        self._optimizerD = optim.Adam(self._discriminator.parameters(),
                                      lr=self._discriminator_lr,
                                      betas=(0.5, 0.9),
                                      weight_decay=self._discriminator_decay)

        if self._dp:
            self._privacy_engine = PrivacyEngine(
                self._discriminator,
                self._batch_size / self._pack,
                len(train_data),
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=self._noise_multiplier,
                max_grad_norm=self.max_grad_norm,
                clip_per_layer=True,
                loss_reduction="sum",
            )
            self._privacy_engine.attach(self._optimizerD)

        mean = torch.zeros(self._batch_size,
                           self._embedding_dim,
                           device=self._device)
        std = mean + 1
        one = torch.tensor(1, dtype=torch.float).to(self._device)
        mone = one * -1

        steps_per_epoch = max(len(train_data) // self._batch_size, 1)
        for i in range(epochs):
            self.trained_epochs += 1

            if self._dp:
                if self.trained_epsilon >= self._epsilon:
                    print(
                        "Privacy budget of {:.2f} exausthed. Please specify an higher one in fit() to train more or disable differential privacy."
                        .format(self._epsilon))
                    return

            for id_ in range(steps_per_epoch):

                for n in range(self._discriminator_steps):
                    fakez = torch.normal(mean=mean, std=std)

                    condvec = self._data_sampler.sample_condvec(
                        self._batch_size)
                    if condvec is None:
                        c1, m1, col, opt = None, None, None, None
                        real = self._data_sampler.sample_data(
                            self._batch_size, col, opt)
                    else:
                        c1, m1, col, opt = condvec
                        c1 = torch.from_numpy(c1).to(self._device)
                        m1 = torch.from_numpy(m1).to(self._device)
                        fakez = torch.cat([fakez, c1], dim=1)

                        perm = np.arange(self._batch_size)
                        np.random.shuffle(perm)
                        real = self._data_sampler.sample_data(
                            self._batch_size, col[perm], opt[perm])
                        c2 = c1[perm]

                    fake = self._generator(fakez)
                    fakeact = self._apply_activate(fake)

                    real = torch.from_numpy(real.astype('float32')).to(
                        self._device)

                    if c1 is not None:
                        fake_cat = torch.cat([fakeact, c1], dim=1)
                        real_cat = torch.cat([real, c2], dim=1)
                    else:
                        real_cat = real
                        fake_cat = fake

                    self._optimizerD.zero_grad()

                    y_fake = self._discriminator(fake_cat)
                    y_real = self._discriminator(real_cat)

                    if not self._dp:
                        pen = self._discriminator.calc_gradient_penalty(
                            real_cat, fake_cat, self._device)
                        pen.backward(retain_graph=True)
                    loss_d = -torch.mean(y_real) + torch.mean(y_fake)

                    loss_d.backward()
                    self._optimizerD.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self._data_sampler.sample_condvec(self._batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self._device)
                    m1 = torch.from_numpy(m1).to(self._device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self._generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = self._discriminator(
                        torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = self._discriminator(fakeact)

                if condvec is None:
                    cross_entropy = 0
                else:
                    cross_entropy = self._cond_loss(fake, c1, m1)

                loss_g = -torch.mean(y_fake) + cross_entropy

                self._optimizerG.zero_grad()
                loss_g.backward()
                self._optimizerG.step()

                if self._dp:
                    for p in self._discriminator.parameters():
                        if hasattr(p, "grad_sample"):
                            del p.grad_sample

                    self.trained_epsilon, best_alpha = self._optimizerD.privacy_engine.get_privacy_spent(
                        self._delta)
                    if self.trained_epsilon >= epsilon:
                        print(
                            "Privacy budget of {:.2f} exausthed, training halted. Best alpha: {:.2f}"
                            .format(epsilon, best_alpha))
                        return

            if self._verbose:
                print(
                    f"Epoch {i+1}, epslion {self.trained_epsilon: .2f}, Loss G: {loss_g.detach().cpu(): .4f}, "
                    f"Loss D: {loss_d.detach().cpu(): .4f}",
                    flush=True)

        if self._dp:
            self._privacy_engine.detach()

    def sample(self, n, condition_column=None, condition_value=None):
        """Sample data similar to the training data.

        Choosing a condition_column and condition_value will increase the probability of the
        discrete condition_value happening in the condition_column.
        Args:
            n (int):
                Number of rows to sample.
            condition_column (string):
                Name of a discrete column.
            condition_value (string):
                Name of the category in the condition_column which we wish to increase the
                probability of happening.
        Returns:
            numpy.ndarray or pandas.DataFrame
        """
        if condition_column is not None and condition_value is not None:
            condition_info = self._transformer.convert_column_name_value_to_id(
                condition_column, condition_value)
            global_condition_vec = self._data_sampler.generate_cond_from_condition_column_info(
                condition_info, self._batch_size)
        else:
            global_condition_vec = None

        steps = n // self._batch_size + 1
        data = []
        for i in range(steps):
            mean = torch.zeros(self._batch_size, self._embedding_dim)
            std = mean + 1
            fakez = torch.normal(mean=mean, std=std).to(self._device)

            if global_condition_vec is not None:
                condvec = global_condition_vec.copy()
            else:
                condvec = self._data_sampler.sample_original_condvec(
                    self._batch_size)

            if condvec is None:
                pass
            else:
                c1 = condvec
                c1 = torch.from_numpy(c1).to(self._device)
                fakez = torch.cat([fakez, c1], dim=1)

            fake = self._generator(fakez)
            fakeact = self._apply_activate(fake)
            data.append(fakeact.detach().cpu().numpy())

        data = np.concatenate(data, axis=0)
        data = data[:n]

        return self._transformer.inverse_transform(data)

    def set_device(self, device):
        self._device = device
        if hasattr(self, '_generator'):
            self._generator.to(self._device)
        if hasattr(self, '_discriminator'):
            self._discriminator.to(self._device)
Exemple #4
0
            print('Round{:3d}, Average loss {:.3f}'.format(iter, loss_avg))
            loss_train.append(loss_avg)

            net_glob.eval()
            acc_valid, tmp_loss_valid = test_bank(net_glob, valid_loader, args)
            print('Round{:3d}, Validation loss {:.3f}'.format(
                iter, tmp_loss_valid))
            loss_valid.append(tmp_loss_valid)
            if tmp_loss_valid < best_valid_loss:
                best_valid_loss = tmp_loss_valid
                best_net_glob = copy.deepcopy(net_glob)
                print('SAVE BEST MODEL AT EPOCH {}'.format(iter))
            net_glob.train()

    if args.dp:
        privacy_engine.detach()

    torch.save(best_net_glob, save_prefix + '_best.pt')
    torch.save(net_glob, save_prefix + '_final.pt')

    # plot loss curve
    plt.figure()
    plt.plot(range(len(loss_train)), loss_train, 'r', label='train_loss')
    plt.plot(range(len(loss_valid)), loss_valid, 'b', label='valid_loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.grid(True)
    plt.legend(loc=0)
    plt.savefig(save_prefix + '.png')

    # testing
Exemple #5
0
def RPC_train_test(data, model, parameters, device, log_interval, local_dp,
                   return_params, epoch, delta, if_test):
    """

    :param data:
    :param model:
    :param parameters:
    :param device:
    :param log_interval:
    :param local_dp:
    :param return_params:
    :param epoch:
    :param delta:
    :param if_test:
    :return:
    """
    train = data
    train_batch_size = 64
    test_batch_size = 64

    X = (train.iloc[:, 1:].values).astype('float32')
    Y = train.iloc[:, 0].values
    print(X.shape)
    features_train, features_test, targets_train, targets_test = train_test_split(
        X, Y, test_size=0.2, random_state=42)
    X_train = torch.from_numpy(features_train / 255.0)
    X_test = torch.from_numpy(features_test / 255.0)

    Y_train = torch.from_numpy(targets_train).type(torch.LongTensor)
    Y_test = torch.from_numpy(targets_test).type(torch.LongTensor)

    train = torch.utils.data.TensorDataset(X_train, Y_train)
    test = torch.utils.data.TensorDataset(X_test, Y_test)

    train_loader = torch.utils.data.DataLoader(train,
                                               batch_size=train_batch_size,
                                               shuffle=False)
    test_loader = torch.utils.data.DataLoader(test,
                                              batch_size=test_batch_size,
                                              shuffle=False)

    # if input is train.pt
    # train_loader = data

    test_accuracy = 0
    if if_test:
        model.eval()

        test_loss = 0
        correct = 0
        with torch.no_grad():
            for data, target in test_loader:
                # Send the local and target to the device (cpu/gpu) the model is at
                data, target = data.to(device), target.to(device)
                # Run the model on the local
                batch_size = data.shape[0]
                # print(batch_size)
                data = data.reshape(batch_size, 28, 28)
                data = data.unsqueeze(1)
                output = model(data)
                # Calculate the loss
                test_loss += F.nll_loss(output, target, reduction='sum').item()
                # Check whether prediction was correct
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

            test_loss /= len(test_loader.dataset)

            print(
                '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
                .format(test_loss, correct, len(test_loader.dataset),
                        100. * correct / len(test_loader.dataset)))
            test_accuracy = 100. * correct / len(test_loader.dataset)

    else:
        learning_rate = 0.01

        # if local_dp == True:
        # initializing optimizer and scheduler
        optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.5)

        if local_dp:
            privacy_engine = PrivacyEngine(
                model,
                batch_size=64,
                sample_size=60000,
                alphas=range(2, 32),
                noise_multiplier=1.3,
                max_grad_norm=1.0,
            )
            privacy_engine.attach(optimizer)

        model.train()
        for epoch in range(1, epoch + 1):
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()

                batch_size = data.shape[0]
                # print(batch_size)
                data = data.reshape(batch_size, 28, 28)
                data = data.unsqueeze(1)
                # print(data.shape)
                # print(data.type())
                # print(target.type())
                output = model(data)

                loss = F.nll_loss(output, target)
                loss.backward()
                optimizer.step()

                if batch_idx % log_interval == 0:
                    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                          format(epoch, batch_idx * len(data),
                                 len(train_loader.dataset),
                                 100. * batch_idx / len(train_loader),
                                 loss.item()))

            if local_dp:
                epsilon, alpha = optimizer.privacy_engine.get_privacy_spent(
                    delta)
                print("\nEpsilon {}, best alpha {}".format(epsilon, alpha))

        # detach privacy engine from optimizer. Multiple attachments lead to error
        privacy_engine.detach()

    if return_params:
        for parameters in model.parameters(
        ):  # model.parameters() but should be the same since it's the argument
            return {
                'params': parameters,
                'model': model,
                'test_accuracy': test_accuracy
            }


# trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
#                                           shuffle=True, num_workers=2)
#
# testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
# testloader = torch.utils.data.DataLoader(testset, batch_size=4,
#                                           shuffle=False, num_workers=2)
class LitSampleConvNetClassifier(pl.LightningModule):
    def __init__(
        self,
        lr: float = 0.1,
        enable_dp: bool = True,
        delta: float = 1e-5,
        sample_rate: float = 0.001,
        sigma: float = 1.0,
        max_per_sample_grad_norm: float = 1.0,
        secure_rng: bool = False,
    ):
        """A simple conv-net for classifying MNIST with differential privacy

        Args:
            lr: Learning rate
            enable_dp: Enables training with privacy guarantees using Opacus (if True), vanilla SGD otherwise
            delta: Target delta for which (eps, delta)-DP is computed
            sample_rate: Sample rate used for batch construction
            sigma: Noise multiplier
            max_per_sample_grad_norm: Clip per-sample gradients to this norm
            secure_rng: Use secure random number generator
        """
        super().__init__()

        # Hyper-parameters
        self.lr = lr
        self.enable_dp = enable_dp
        self.delta = delta
        self.sample_rate = sample_rate
        self.sigma = sigma
        self.max_per_sample_grad_norm = max_per_sample_grad_norm
        self.secure_rng = secure_rng

        # Parameters
        self.conv1 = nn.Conv2d(1, 16, 8, 2, padding=3)
        self.conv2 = nn.Conv2d(16, 32, 4, 2)
        self.fc1 = nn.Linear(32 * 4 * 4, 32)
        self.fc2 = nn.Linear(32, 10)

        # Privacy engine
        self.privacy_engine = None  # Created before training

        # Metrics
        self.test_accuracy = torchmetrics.Accuracy()

    def forward(self, x):
        # x of shape [B, 1, 28, 28]
        x = F.relu(self.conv1(x))  # -> [B, 16, 14, 14]
        x = F.max_pool2d(x, 2, 1)  # -> [B, 16, 13, 13]
        x = F.relu(self.conv2(x))  # -> [B, 32, 5, 5]
        x = F.max_pool2d(x, 2, 1)  # -> [B, 32, 4, 4]
        x = x.view(-1, 32 * 4 * 4)  # -> [B, 512]
        x = F.relu(self.fc1(x))  # -> [B, 32]
        x = self.fc2(x)  # -> [B, 10]
        return x

    def configure_optimizers(self):
        optimizer = optim.SGD(self.parameters(), lr=self.lr, momentum=0)
        return optimizer

    def training_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.log("train_loss",
                 loss,
                 on_step=False,
                 on_epoch=True,
                 prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        data, target = batch
        output = self(data)
        loss = F.cross_entropy(output, target)
        self.test_accuracy(output, target)
        self.log("test_loss",
                 loss,
                 on_step=False,
                 on_epoch=True,
                 prog_bar=True)
        self.log("test_accuracy",
                 self.test_accuracy,
                 on_step=False,
                 on_epoch=True)
        return loss

    # Adding differential privacy learning

    def on_train_start(self) -> None:
        if self.enable_dp:
            self.privacy_engine = PrivacyEngine(
                self,
                sample_rate=self.sample_rate,
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=self.sigma,
                max_grad_norm=self.max_per_sample_grad_norm,
                secure_rng=self.secure_rng,
            )

            optimizer = self.optimizers()
            self.privacy_engine.attach(optimizer)

    def on_train_epoch_end(self):
        if self.enable_dp:
            epsilon, best_alpha = self.privacy_engine.get_privacy_spent(
                self.delta)
            # Privacy spent: (epsilon, delta) for alpha
            self.log("epsilon", epsilon, on_epoch=True, prog_bar=True)
            self.log("alpha", best_alpha, on_epoch=True, prog_bar=True)

    def on_train_end(self):
        if self.enable_dp:
            self.privacy_engine.detach()
Exemple #7
0
def server(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag):
	loss_locals = []
	w_state_dict_locals = []

	# local train
	cur_net.train()
	optimizer = get_optimizer(args, cur_net)
	loss_func = nn.CrossEntropyLoss()
	if args.dp:
		privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader),
										alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
										noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng)
		privacy_engine.attach(optimizer)
	current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader)

	if args.dp:
		privacy_engine.detach()

	loss_locals.append(current_loss)
	if args.tphe:
		w_state_dict_locals.append(encrypt_torch_state_dict(pub_key, current_state_dict))
	else:
		w_state_dict_locals.append(current_state_dict)

	# receive from others
	loop = True
	while loop:
		# Get the list sockets which are ready to be read through select
		rList, wList, error_sockets = select.select(server_connection_list,[],[])
		for sockfd in rList:
			tmp_pkl_data = sockfd.recv(int(args.buffer)) # 760586945
			tmp_state_dict, tmp_loss = pickle.loads(tmp_pkl_data)
			w_state_dict_locals.append(tmp_state_dict)
			loss_locals.append(tmp_loss)
			if len(w_state_dict_locals) == args.num_users:
				loop = False
				break

	# aggregate weight state_dicts
	aggregated_state_dict = state_dict_aggregation(w_state_dict_locals)

	# distribute the aggregated weight state_dict
	send_aggregated_weight_state_dict_to_all(aggregated_state_dict)

	# parse aggregated state_dict
	parse_aggregated_state_dict(aggregated_state_dict, cur_net)

	loss_avg = sum(loss_locals) / len(loss_locals)
	print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg))
	loss_train.append(loss_avg)
	cur_net.eval()
	acc_valid, tmp_loss_valid = test_bank(cur_net, valid_loader, args)
	print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid))
	loss_valid.append(tmp_loss_valid)
	if tmp_loss_valid < best_valid_loss:
		best_valid_loss = tmp_loss_valid
		best_net_glob = copy.deepcopy(cur_net)
		print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter))

	# pick the server for next epoch
	next_server_rank_id = random.randint(0, args.num_users-1)

	# distribute metadata
	send_metadata_to_all(loss_avg, tmp_loss_valid, next_server_rank_id)

	if next_server_rank_id != args.rank:
		server_flag = False
		current_server_rank_id = next_server_rank_id

	print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id))

	return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
Exemple #8
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None,
              verbose=False,
              mlflow=False):
        if update_epsilon:
            self.epsilon = update_epsilon

        if isinstance(data, pd.DataFrame):
            for col in data.columns:
                data[col] = pd.to_numeric(data[col], errors='ignore')
            self.pd_cols = data.columns
            self.pd_index = data.pd_index
            data = data.to_numpy()
        elif not isinstance(data, np.ndarray):
            raise ValueError("Data must be a numpy array or pandas dataframe")

        dataset = TensorDataset(
            torch.from_numpy(data.astype('float32')).to(self.device))
        dataloader = DataLoader(dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                drop_last=True)

        if not hasattr(self, "generator"):
            self.generator = Generator(self.latent_dim,
                                       data.shape[1],
                                       binary=self.binary).to(self.device)
        if not hasattr(self, "discriminator"):
            self.discriminator = Discriminator(data.shape[1]).to(self.device)

        self.optimizer_d = optim.Adam(self.discriminator.parameters(),
                                      lr=4e-4,
                                      betas=(0.5, 0.9))
        if hasattr(self, "state_dict"):
            self.optimizer_d.load_state_dict(self.state_dict)

        if not hasattr(self, "privacy_engine"):
            privacy_engine = PrivacyEngine(
                self.discriminator,
                batch_size=self.batch_size,
                sample_size=len(data),
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=3.5,
                max_grad_norm=1.0,
                clip_per_layer=True).to(self.device)
        else:
            privacy_engine = self.privacy_engine

        privacy_engine.attach(self.optimizer_d)

        if hasattr(self, "privacy_engine"):
            epsilon, best_alpha = self.optimizer_d.privacy_engine.get_privacy_spent(
                self.delta)
        else:
            epsilon = 0

        if not hasattr(self, "optimizer_g"):
            self.optimizer_g = optim.Adam(self.generator.parameters(), lr=1e-4)

        criterion = nn.BCELoss()

        for epoch in range(self.epochs):

            if self.epsilon < epsilon:
                break

            for i, data in enumerate(dataloader):
                self.discriminator.zero_grad()

                real_data = data[0].to(self.device)

                # train with fake data
                noise = torch.randn(self.batch_size,
                                    self.latent_dim,
                                    1,
                                    1,
                                    device=self.device)
                noise = noise.view(-1, self.latent_dim)
                fake_data = self.generator(noise)
                label_fake = torch.full((self.batch_size, 1),
                                        0,
                                        dtype=torch.float,
                                        device=self.device)
                output = self.discriminator(fake_data.detach())
                loss_d_fake = criterion(output, label_fake)
                loss_d_fake.backward()
                self.optimizer_d.step()

                # train with real data
                label_true = torch.full((self.batch_size, 1),
                                        1,
                                        dtype=torch.float,
                                        device=self.device)
                output = self.discriminator(real_data.float())
                loss_d_real = criterion(output, label_true)
                loss_d_real.backward()
                self.optimizer_d.step()

                loss_d = loss_d_real + loss_d_fake

                max_grad_norm = []
                for p in self.discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)

                privacy_engine.max_grad_norm = max_grad_norm

                # train generator
                self.generator.zero_grad()
                label_g = torch.full((self.batch_size, 1),
                                     1,
                                     dtype=torch.float,
                                     device=self.device)
                output_g = self.discriminator(fake_data)
                loss_g = criterion(output_g, label_g)
                loss_g.backward()
                self.optimizer_g.step()

                # manually clear gradients
                for p in self.discriminator.parameters():
                    if hasattr(p, "grad_sample"):
                        del p.grad_sample
                # autograd_grad_sample.clear_backprops(discriminator)

                if self.delta is None:
                    self.delta = 1 / data.shape[0]

                eps, best_alpha = self.optimizer_d.privacy_engine.get_privacy_spent(
                    self.delta)
                self.alpha = best_alpha

            if (verbose):
                print('eps: {:f} \t alpha: {:f} \t G: {:f} \t D: {:f}'.format(
                    eps, best_alpha,
                    loss_g.detach().cpu(),
                    loss_d.detach().cpu()))

            if (mlflow):
                import mlflow
                mlflow.log_metric("loss_g",
                                  float(loss_g.detach().cpu()),
                                  step=epoch)
                mlflow.log_metric("loss_d",
                                  float(loss_d.detach().cpu()),
                                  step=epoch)
                mlflow.log_metric("epsilon", float(eps), step=epoch)

            if self.epsilon < eps:
                break

        privacy_engine.detach()
        self.state_dict = self.optimizer_d.state_dict()
        self.privacy_engine = privacy_engine