def __init__(
        self,
        model,
        optimizer,
        noise_multiplier,
        max_grad_norm,
        batch_size,
        dataset_size,
        target_delta,
        alphas,
    ):
        self.noise_multiplier = noise_multiplier
        self.max_grad_norm = max_grad_norm
        self.batch_size = batch_size
        self.dataset_size = dataset_size
        self.target_delta = target_delta
        self.alphas = alphas

        self._privacy_engine = opacus.PrivacyEngine(
            model,
            self.batch_size,
            self.dataset_size,
            self.alphas,
            noise_multiplier=self.noise_multiplier,
            max_grad_norm=self.max_grad_norm,
            target_delta=self.target_delta,
        )
        self._privacy_engine.attach(optimizer)
Beispiel #2
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None):
        if update_epsilon:
            self.epsilon = update_epsilon

        self.transformer = DataTransformer()
        self.transformer.fit(data, discrete_columns=categorical_columns)
        train_data = self.transformer.transform(data)

        data_sampler = Sampler(train_data, self.transformer.output_info)

        data_dim = self.transformer.output_dimensions
        self.cond_generator = ConditionalGenerator(
            train_data, self.transformer.output_info, self.log_frequency)

        self.generator = Generator(
            self.embedding_dim + self.cond_generator.n_opt, self.gen_dim,
            data_dim).to(self.device)

        discriminator = Discriminator(data_dim + self.cond_generator.n_opt,
                                      self.dis_dim, self.loss,
                                      self.pack).to(self.device)

        optimizerG = optim.Adam(self.generator.parameters(),
                                lr=2e-4,
                                betas=(0.5, 0.9),
                                weight_decay=self.l2scale)
        optimizerD = optim.Adam(discriminator.parameters(),
                                lr=2e-4,
                                betas=(0.5, 0.9))

        privacy_engine = opacus.PrivacyEngine(
            discriminator,
            batch_size=self.batch_size,
            sample_size=train_data.shape[0],
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=self.sigma,
            max_grad_norm=self.max_per_sample_grad_norm,
            clip_per_layer=True)

        if not self.disabled_dp:
            privacy_engine.attach(optimizerD)

        one = torch.tensor(1, dtype=torch.float).to(self.device)
        mone = one * -1

        REAL_LABEL = 1
        FAKE_LABEL = 0
        criterion = nn.BCELoss()

        assert self.batch_size % 2 == 0
        mean = torch.zeros(self.batch_size,
                           self.embedding_dim,
                           device=self.device)
        std = mean + 1

        steps_per_epoch = len(train_data) // self.batch_size
        for i in range(self.epochs):
            for id_ in range(steps_per_epoch):
                fakez = torch.normal(mean=mean, std=std)

                condvec = self.cond_generator.sample(self.batch_size)
                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                    real = data_sampler.sample(self.batch_size, col, opt)
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                    perm = np.arange(self.batch_size)
                    np.random.shuffle(perm)
                    real = data_sampler.sample(self.batch_size, col[perm],
                                               opt[perm])
                    c2 = c1[perm]

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                real = torch.from_numpy(real.astype('float32')).to(self.device)

                if c1 is not None:
                    fake_cat = torch.cat([fakeact, c1], dim=1)
                    real_cat = torch.cat([real, c2], dim=1)
                else:
                    real_cat = real
                    fake_cat = fake

                optimizerD.zero_grad()

                if self.loss == 'cross_entropy':
                    y_fake = discriminator(fake_cat)

                    #   print ('y_fake is {}'.format(y_fake))
                    label_fake = torch.full(
                        (int(self.batch_size / self.pack), ),
                        FAKE_LABEL,
                        dtype=torch.float,
                        device=self.device)

                    #    print ('label_fake is {}'.format(label_fake))

                    errD_fake = criterion(y_fake, label_fake)
                    errD_fake.backward()
                    optimizerD.step()

                    # train with real
                    label_true = torch.full(
                        (int(self.batch_size / self.pack), ),
                        REAL_LABEL,
                        dtype=torch.float,
                        device=self.device)
                    y_real = discriminator(real_cat)
                    errD_real = criterion(y_real, label_true)
                    errD_real.backward()
                    optimizerD.step()

                    loss_d = errD_real + errD_fake

                else:

                    y_fake = discriminator(fake_cat)
                    mean_fake = torch.mean(y_fake)
                    mean_fake.backward(one)

                    y_real = discriminator(real_cat)
                    mean_real = torch.mean(y_real)
                    mean_real.backward(mone)

                    optimizerD.step()

                    loss_d = -(mean_real - mean_fake)

                max_grad_norm = []
                for p in discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)
                #pen = calc_gradient_penalty(discriminator, real_cat, fake_cat, self.device)

                #pen.backward(retain_graph=True)
                #loss_d.backward()
                #optimizerD.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self.cond_generator.sample(self.batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = discriminator(torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = discriminator(fakeact)

                #if condvec is None:
                cross_entropy = 0
                #else:
                #    cross_entropy = self._cond_loss(fake, c1, m1)

                if self.loss == 'cross_entropy':
                    label_g = torch.full((int(self.batch_size / self.pack), ),
                                         REAL_LABEL,
                                         dtype=torch.float,
                                         device=self.device)
                    #label_g = torch.full(int(self.batch_size/self.pack,),1,device=self.device)
                    loss_g = criterion(y_fake, label_g)
                    loss_g = loss_g + cross_entropy
                else:
                    loss_g = -torch.mean(y_fake) + cross_entropy

                optimizerG.zero_grad()
                loss_g.backward()
                optimizerG.step()

                if not self.disabled_dp:
                    #if self.loss == 'cross_entropy':
                    #    autograd_grad_sample.clear_backprops(discriminator)
                    #else:
                    for p in discriminator.parameters():
                        if hasattr(p, "grad_sample"):
                            del p.grad_sample

                    if self.target_delta is None:
                        self.target_delta = 1 / train_data.shape[0]

                    epsilon, best_alpha = optimizerD.privacy_engine.get_privacy_spent(
                        self.target_delta)

                    self.epsilon_list.append(epsilon)
                    self.alpha_list.append(best_alpha)
                    #if self.verbose:

            if not self.disabled_dp:
                if self.epsilon < epsilon:
                    break
            self.loss_d_list.append(loss_d)
            self.loss_g_list.append(loss_g)
            if self.verbose:
                print("Epoch %d, Loss G: %.4f, Loss D: %.4f" %
                      (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()),
                      flush=True)
                print('epsilon is {e}, alpha is {a}'.format(e=epsilon,
                                                            a=best_alpha))

        return self.loss_d_list, self.loss_g_list, self.epsilon_list, self.alpha_list
Beispiel #3
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None):
        if update_epsilon:
            self.epsilon = update_epsilon

        for col in categorical_columns:
            if str(data[col].dtype).startswith('float'):
                raise ValueError(
                    "It looks like you are passing in a vector of continuous values"
                    f"to a categorical column at [{col}]."
                    "Please discretize and pass in categorical columns with"
                    "unsigned integer or string category names.")

        self._transformer = DataTransformer(self.preprocessor_eps)
        self._transformer.fit(data, discrete_columns=categorical_columns)
        # for tinfo in self._transformer._column_transform_info_list:
        #    if tinfo.column_type == "continuous":
        #        raise ValueError("We don't support continuous values on this synthesizer.  Please discretize values.")

        train_data = self._transformer.transform(data)

        sampler_eps = 0.0

        if categorical_columns and self._category_epsilon_pct:
            sampler_eps = self.epsilon * self._category_epsilon_pct
            per_col_sampler_eps = sampler_eps / len(categorical_columns)
            self.epsilon = self.epsilon - sampler_eps
        else:
            per_col_sampler_eps = None

        self._data_sampler = DataSampler(
            train_data,
            self._transformer.output_info_list,
            self._log_frequency,
            per_column_epsilon=per_col_sampler_eps)

        spent = self._data_sampler.total_spent
        if (spent > sampler_eps and not np.isclose(spent, sampler_eps)):
            raise AssertionError(
                f"The data sampler used {spent} epsilon and was budgeted for {sampler_eps}"
            )

        data_dim = self._transformer.output_dimensions

        self._generator = Generator(
            self._embedding_dim + self._data_sampler.dim_cond_vec(),
            self._generator_dim, data_dim).to(self._device)

        discriminator = Discriminator(
            data_dim + self._data_sampler.dim_cond_vec(),
            self._discriminator_dim, self.loss, self.pac).to(self._device)

        optimizerG = optim.Adam(self._generator.parameters(),
                                lr=self._generator_lr,
                                betas=(0.5, 0.9),
                                weight_decay=self._generator_decay)
        optimizerD = optim.Adam(discriminator.parameters(),
                                lr=self._discriminator_lr,
                                betas=(0.5, 0.9),
                                weight_decay=self._discriminator_decay)

        privacy_engine = opacus.PrivacyEngine(
            discriminator,
            batch_size=self._batch_size,
            sample_size=train_data.shape[0],
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=self.sigma,
            max_grad_norm=self.max_per_sample_grad_norm,
            clip_per_layer=True,
        )

        if not self.disabled_dp:
            privacy_engine.attach(optimizerD)

        one = torch.tensor(1, dtype=torch.float).to(self._device)
        mone = one * -1

        real_label = 1
        fake_label = 0
        criterion = nn.BCELoss()

        assert self._batch_size % 2 == 0
        mean = torch.zeros(self._batch_size,
                           self._embedding_dim,
                           device=self._device)
        std = mean + 1

        steps_per_epoch = max(len(train_data) // self._batch_size, 1)
        for i in range(self._epochs):
            if not self.disabled_dp:
                # if self.loss == 'cross_entropy':
                #    autograd_grad_sample.clear_backprops(discriminator)
                # else:
                for p in discriminator.parameters():
                    if hasattr(p, "grad_sample"):
                        del p.grad_sample

                if self.delta is None:
                    self.delta = 1 / (train_data.shape[0] *
                                      np.sqrt(train_data.shape[0]))

                epsilon, best_alpha = optimizerD.privacy_engine.get_privacy_spent(
                    self.delta)

                self.epsilon_list.append(epsilon)
                self.alpha_list.append(best_alpha)
                if self.epsilon < epsilon:
                    if self._epochs == 1:
                        raise ValueError(
                            "Inputted epsilon and sigma parameters are too small to"
                            +
                            " create a private dataset. Try increasing either parameter "
                            + "and rerunning.")
                    else:
                        break

            for id_ in range(steps_per_epoch):
                fakez = torch.normal(mean=mean, std=std)

                condvec = self._data_sampler.sample_condvec(self._batch_size)
                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                    real = self._data_sampler.sample_data(
                        self._batch_size, col, opt)
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self._device)
                    m1 = torch.from_numpy(m1).to(self._device)
                    fakez = torch.cat([fakez, c1], dim=1)

                    perm = np.arange(self._batch_size)
                    np.random.shuffle(perm)
                    real = self._data_sampler.sample_data(
                        self._batch_size, col[perm], opt[perm])
                    c2 = c1[perm]

                fake = self._generator(fakez)
                fakeact = self._apply_activate(fake)

                real = torch.from_numpy(real.astype("float32")).to(
                    self._device)

                if c1 is not None:
                    fake_cat = torch.cat([fakeact, c1], dim=1)
                    real_cat = torch.cat([real, c2], dim=1)
                else:
                    real_cat = real
                    fake_cat = fakeact

                optimizerD.zero_grad()

                if self.loss == "cross_entropy":
                    y_fake = discriminator(fake_cat)

                    #   print ('y_fake is {}'.format(y_fake))
                    label_fake = torch.full(
                        (int(self._batch_size / self.pac), ),
                        fake_label,
                        dtype=torch.float,
                        device=self._device,
                    )

                    #    print ('label_fake is {}'.format(label_fake))

                    error_d_fake = criterion(y_fake.squeeze(), label_fake)
                    error_d_fake.backward()
                    optimizerD.step()

                    # train with real
                    label_true = torch.full(
                        (int(self._batch_size / self.pac), ),
                        real_label,
                        dtype=torch.float,
                        device=self._device,
                    )
                    y_real = discriminator(real_cat)
                    error_d_real = criterion(y_real.squeeze(), label_true)
                    error_d_real.backward()
                    optimizerD.step()

                    loss_d = error_d_real + error_d_fake

                else:

                    y_fake = discriminator(fake_cat)
                    mean_fake = torch.mean(y_fake)
                    mean_fake.backward(one)

                    y_real = discriminator(real_cat)
                    mean_real = torch.mean(y_real)
                    mean_real.backward(mone)

                    optimizerD.step()

                    loss_d = -(mean_real - mean_fake)

                max_grad_norm = []
                for p in discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)
                # pen = calc_gradient_penalty(discriminator, real_cat, fake_cat, self.device)

                # pen.backward(retain_graph=True)
                # loss_d.backward()
                # optimizer_d.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self._data_sampler.sample_condvec(self._batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self._device)
                    m1 = torch.from_numpy(m1).to(self._device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self._generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = discriminator(torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = discriminator(fakeact)

                # if condvec is None:
                cross_entropy = 0
                # else:
                #    cross_entropy = self._cond_loss(fake, c1, m1)

                if self.loss == "cross_entropy":
                    label_g = torch.full(
                        (int(self._batch_size / self.pac), ),
                        real_label,
                        dtype=torch.float,
                        device=self._device,
                    )
                    # label_g = torch.full(int(self.batch_size/self.pack,),1,device=self.device)
                    loss_g = criterion(y_fake.squeeze(), label_g)
                    loss_g = loss_g + cross_entropy
                else:
                    loss_g = -torch.mean(y_fake) + cross_entropy

                optimizerG.zero_grad()
                loss_g.backward()
                optimizerG.step()

            self.loss_d_list.append(loss_d)
            self.loss_g_list.append(loss_g)
            if self.verbose:
                print(
                    "Epoch %d, Loss G: %.4f, Loss D: %.4f" %
                    (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()),
                    flush=True,
                )
                print("epsilon is {e}, alpha is {a}".format(e=epsilon,
                                                            a=best_alpha))

        return self.loss_d_list, self.loss_g_list, self.epsilon_list, self.alpha_list