Beispiel #1
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None):
        if update_epsilon:
            self.epsilon = update_epsilon

        self.transformer = DataTransformer()
        self.transformer.fit(data, discrete_columns=categorical_columns)
        train_data = self.transformer.transform(data)

        data_sampler = Sampler(train_data, self.transformer.output_info)

        data_dim = self.transformer.output_dimensions
        self.cond_generator = ConditionalGenerator(
            train_data, self.transformer.output_info, self.log_frequency)

        self.generator = Generator(
            self.embedding_dim + self.cond_generator.n_opt, self.gen_dim,
            data_dim).to(self.device)

        discriminator = Discriminator(data_dim + self.cond_generator.n_opt,
                                      self.dis_dim, self.loss,
                                      self.pack).to(self.device)

        optimizer_g = optim.Adam(self.generator.parameters(),
                                 lr=2e-4,
                                 betas=(0.5, 0.9),
                                 weight_decay=self.l2scale)
        optimizer_d = optim.Adam(discriminator.parameters(),
                                 lr=2e-4,
                                 betas=(0.5, 0.9))

        privacy_engine = opacus.PrivacyEngine(
            discriminator,
            batch_size=self.batch_size,
            sample_size=train_data.shape[0],
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=self.sigma,
            max_grad_norm=self.max_per_sample_grad_norm,
            clip_per_layer=True,
        )

        if not self.disabled_dp:
            privacy_engine.attach(optimizer_d)

        one = torch.tensor(1, dtype=torch.float).to(self.device)
        mone = one * -1

        real_label = 1
        fake_label = 0
        criterion = nn.BCELoss()

        assert self.batch_size % 2 == 0
        mean = torch.zeros(self.batch_size,
                           self.embedding_dim,
                           device=self.device)
        std = mean + 1

        steps_per_epoch = len(train_data) // self.batch_size
        for i in range(self.epochs):
            for id_ in range(steps_per_epoch):
                fakez = torch.normal(mean=mean, std=std)

                condvec = self.cond_generator.sample(self.batch_size)
                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                    real = data_sampler.sample(self.batch_size, col, opt)
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                    perm = np.arange(self.batch_size)
                    np.random.shuffle(perm)
                    real = data_sampler.sample(self.batch_size, col[perm],
                                               opt[perm])
                    c2 = c1[perm]

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                real = torch.from_numpy(real.astype("float32")).to(self.device)

                if c1 is not None:
                    fake_cat = torch.cat([fakeact, c1], dim=1)
                    real_cat = torch.cat([real, c2], dim=1)
                else:
                    real_cat = real
                    fake_cat = fake

                optimizer_d.zero_grad()

                if self.loss == "cross_entropy":
                    y_fake = discriminator(fake_cat)

                    #   print ('y_fake is {}'.format(y_fake))
                    label_fake = torch.full(
                        (int(self.batch_size / self.pack), ),
                        fake_label,
                        dtype=torch.float,
                        device=self.device,
                    )

                    #    print ('label_fake is {}'.format(label_fake))

                    error_d_fake = criterion(y_fake, label_fake)
                    error_d_fake.backward()
                    optimizer_d.step()

                    # train with real
                    label_true = torch.full(
                        (int(self.batch_size / self.pack), ),
                        real_label,
                        dtype=torch.float,
                        device=self.device,
                    )
                    y_real = discriminator(real_cat)
                    error_d_real = criterion(y_real, label_true)
                    error_d_real.backward()
                    optimizer_d.step()

                    loss_d = error_d_real + error_d_fake

                else:

                    y_fake = discriminator(fake_cat)
                    mean_fake = torch.mean(y_fake)
                    mean_fake.backward(one)

                    y_real = discriminator(real_cat)
                    mean_real = torch.mean(y_real)
                    mean_real.backward(mone)

                    optimizer_d.step()

                    loss_d = -(mean_real - mean_fake)

                max_grad_norm = []
                for p in discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)
                # pen = calc_gradient_penalty(discriminator, real_cat, fake_cat, self.device)

                # pen.backward(retain_graph=True)
                # loss_d.backward()
                # optimizer_d.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self.cond_generator.sample(self.batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = discriminator(torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = discriminator(fakeact)

                # if condvec is None:
                cross_entropy = 0
                # else:
                #    cross_entropy = self._cond_loss(fake, c1, m1)

                if self.loss == "cross_entropy":
                    label_g = torch.full(
                        (int(self.batch_size / self.pack), ),
                        real_label,
                        dtype=torch.float,
                        device=self.device,
                    )
                    # label_g = torch.full(int(self.batch_size/self.pack,),1,device=self.device)
                    loss_g = criterion(y_fake, label_g)
                    loss_g = loss_g + cross_entropy
                else:
                    loss_g = -torch.mean(y_fake) + cross_entropy

                optimizer_g.zero_grad()
                loss_g.backward()
                optimizer_g.step()

                if not self.disabled_dp:
                    # if self.loss == 'cross_entropy':
                    #    autograd_grad_sample.clear_backprops(discriminator)
                    # else:
                    for p in discriminator.parameters():
                        if hasattr(p, "grad_sample"):
                            del p.grad_sample

                    if self.target_delta is None:
                        self.target_delta = 1 / train_data.shape[0]

                    epsilon, best_alpha = optimizer_d.privacy_engine.get_privacy_spent(
                        self.target_delta)

                    self.epsilon_list.append(epsilon)
                    self.alpha_list.append(best_alpha)
                    # if self.verbose:

            if not self.disabled_dp:
                if self.epsilon < epsilon:
                    break
            self.loss_d_list.append(loss_d)
            self.loss_g_list.append(loss_g)
            if self.verbose:
                print(
                    "Epoch %d, Loss G: %.4f, Loss D: %.4f" %
                    (i + 1, loss_g.detach().cpu(), loss_d.detach().cpu()),
                    flush=True,
                )
                print("epsilon is {e}, alpha is {a}".format(e=epsilon,
                                                            a=best_alpha))

        return self.loss_d_list, self.loss_g_list, self.epsilon_list, self.alpha_list
Beispiel #2
0
    def fit(self, train_data, discrete_columns=tuple(), epochs=300, log_frequency=True):
        """Fit the CTGAN Synthesizer models to the training data.

        Args:
            train_data (numpy.ndarray or pandas.DataFrame):
                Training Data. It must be a 2-dimensional numpy array or a
                pandas.DataFrame.
            discrete_columns (list-like):
                List of discrete columns to be used to generate the Conditional
                Vector. If ``train_data`` is a Numpy array, this list should
                contain the integer indices of the columns. Otherwise, if it is
                a ``pandas.DataFrame``, this list should contain the column names.
            epochs (int):
                Number of training epochs. Defaults to 300.
            log_frequency (boolean):
                Whether to use log frequency of categorical levels in conditional
                sampling. Defaults to ``True``.
        """

        self.transformer = DataTransformer()
        self.transformer.fit(train_data, discrete_columns)
        train_data = self.transformer.transform(train_data)

        data_sampler = Sampler(train_data, self.transformer.output_info)

        data_dim = self.transformer.output_dimensions
        self.cond_generator = ConditionalGenerator(
            train_data, self.transformer.output_info, log_frequency
        )

        self.generator = Generator(
            self.embedding_dim + self.cond_generator.n_opt, self.gen_dim, data_dim
        ).to(self.device)

        discriminator = Discriminator(
            data_dim + self.cond_generator.n_opt, self.dis_dim
        ).to(self.device)

        optimizerG = optim.Adam(
            self.generator.parameters(),
            lr=2e-4,
            betas=(0.5, 0.9),
            weight_decay=self.l2scale,
        )
        optimizerD = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.9))

        assert self.batch_size % 2 == 0
        mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device)
        std = mean + 1

        train_losses = []
        early_stopping = EarlyStopping(patience=self.patience, verbose=False)

        steps_per_epoch = max(len(train_data) // self.batch_size, 1)
        for i in range(epochs):
            for id_ in range(steps_per_epoch):
                fakez = torch.normal(mean=mean, std=std)

                condvec = self.cond_generator.sample(self.batch_size)
                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                    real = data_sampler.sample(self.batch_size, col, opt)
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                    perm = np.arange(self.batch_size)
                    np.random.shuffle(perm)
                    real = data_sampler.sample(self.batch_size, col[perm], opt[perm])
                    c2 = c1[perm]

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                real = torch.from_numpy(real.astype("float32")).to(self.device)

                if c1 is not None:
                    fake_cat = torch.cat([fakeact, c1], dim=1)
                    real_cat = torch.cat([real, c2], dim=1)
                else:
                    real_cat = real
                    fake_cat = fake

                y_fake = discriminator(fake_cat)
                y_real = discriminator(real_cat)

                pen = discriminator.calc_gradient_penalty(
                    real_cat, fake_cat, self.device
                )
                loss_d = -(torch.mean(y_real) - torch.mean(y_fake))
                train_losses.append(loss_d.item())
                optimizerD.zero_grad()
                pen.backward(retain_graph=True)
                loss_d.backward()
                optimizerD.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self.cond_generator.sample(self.batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = discriminator(torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = discriminator(fakeact)

                if condvec is None:
                    cross_entropy = 0
                else:
                    cross_entropy = self._cond_loss(fake, c1, m1)

                loss_g = -torch.mean(y_fake) + cross_entropy
                train_losses.append(loss_g.item())
                optimizerG.zero_grad()
                loss_g.backward()
                optimizerG.step()
            early_stopping(np.average(train_losses))
            if early_stopping.early_stop:
                print("GAN: Early stopping after epochs {}".format(i))
                break
            train_losses = []
Beispiel #3
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None):
        if update_epsilon:
            self.epsilon = update_epsilon

        sample_per_teacher = self.sample_per_teacher if self.sample_per_teacher < len(
            data) else 1000
        self.num_teachers = int(len(data) / sample_per_teacher) + 1
        self.transformer = DataTransformer()
        self.transformer.fit(data, discrete_columns=categorical_columns)
        data = self.transformer.transform(data)
        data_partitions = np.array_split(data, self.num_teachers)

        data_dim = self.transformer.output_dimensions

        self.cond_generator = ConditionalGenerator(
            data, self.transformer.output_info, self.log_frequency)

        # create conditional generator for each teacher model
        cond_generator = [
            ConditionalGenerator(d, self.transformer.output_info,
                                 self.log_frequency) for d in data_partitions
        ]

        self.generator = Generator(
            self.embedding_dim + self.cond_generator.n_opt, self.gen_dim,
            data_dim).to(self.device)

        discriminator = Discriminator(data_dim + self.cond_generator.n_opt,
                                      self.dis_dim, self.loss,
                                      self.pack).to(self.device)

        student_disc = discriminator
        student_disc.apply(weights_init)

        teacher_disc = [discriminator for i in range(self.num_teachers)]
        for i in range(self.num_teachers):
            teacher_disc[i].apply(weights_init)

        optimizerG = optim.Adam(self.generator.parameters(),
                                lr=2e-4,
                                betas=(0.5, 0.9),
                                weight_decay=self.l2scale)
        optimizerS = optim.Adam(student_disc.parameters(),
                                lr=2e-4,
                                betas=(0.5, 0.9))
        optimizerT = [
            optim.Adam(teacher_disc[i].parameters(), lr=2e-4, betas=(0.5, 0.9))
            for i in range(self.num_teachers)
        ]

        noise_multiplier = self.noise_multiplier
        alphas = torch.tensor([0.0 for i in range(self.moments_order)],
                              device=self.device)
        l_list = 1 + torch.tensor(range(self.moments_order),
                                  device=self.device)
        eps = 0

        mean = torch.zeros(self.batch_size,
                           self.embedding_dim,
                           device=self.device)
        std = mean + 1

        REAL_LABEL = 1
        FAKE_LABEL = 0

        criterion = nn.BCELoss() if (self.loss
                                     == "cross_entropy") else self.WLoss

        if (self.verbose):
            print("using loss {} and regularization {}".format(
                self.loss, self.regularization))

        while eps < self.epsilon:
            # train teacher discriminators
            for t_2 in range(self.teacher_iters):
                for i in range(self.num_teachers):
                    partition_data = data_partitions[i]
                    data_sampler = Sampler(partition_data,
                                           self.transformer.output_info)
                    fakez = torch.normal(mean, std=std).to(self.device)

                    condvec = cond_generator[i].sample(self.batch_size)

                    if condvec is None:
                        c1, m1, col, opt = None, None, None, None
                        real = data_sampler.sample(self.batch_size, col, opt)
                    else:
                        c1, m1, col, opt = condvec
                        c1 = torch.from_numpy(c1).to(self.device)
                        m1 = torch.from_numpy(m1).to(self.device)
                        fakez = torch.cat([fakez, c1], dim=1)
                        perm = np.arange(self.batch_size)
                        np.random.shuffle(perm)
                        real = data_sampler.sample(self.batch_size, col[perm],
                                                   opt[perm])
                        c2 = c1[perm]

                    fake = self.generator(fakez)
                    fakeact = self._apply_activate(fake)

                    real = torch.from_numpy(real.astype('float32')).to(
                        self.device)

                    if c1 is not None:
                        fake_cat = torch.cat([fakeact, c1], dim=1)
                        real_cat = torch.cat([real, c2], dim=1)
                    else:
                        real_cat = real
                        fake_cat = fake

                    optimizerT[i].zero_grad()

                    y_all = torch.cat(
                        [teacher_disc[i](fake_cat), teacher_disc[i](real_cat)])
                    label_fake = torch.full(
                        (int(self.batch_size / self.pack), 1),
                        FAKE_LABEL,
                        dtype=torch.float,
                        device=self.device)
                    label_true = torch.full(
                        (int(self.batch_size / self.pack), 1),
                        REAL_LABEL,
                        dtype=torch.float,
                        device=self.device)
                    labels = torch.cat([label_fake, label_true])

                    errD = criterion(y_all, labels)
                    errD.backward()

                    if (self.regularization == 'dragan'):
                        pen = teacher_disc[i].dragan_penalty(
                            real_cat, device=self.device)
                        pen.backward(retain_graph=True)

                    optimizerT[i].step()

            # train student discriminator
            for t_3 in range(self.student_iters):
                data_sampler = Sampler(data, self.transformer.output_info)
                fakez = torch.normal(mean, std=std)

                condvec = self.cond_generator.sample(self.batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                    real = data_sampler.sample(self.batch_size, col, opt)
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self.device)
                    m1 = torch.from_numpy(m1).to(self.device)
                    fakez = torch.cat([fakez, c1], dim=1)

                    perm = np.arange(self.batch_size)
                    np.random.shuffle(perm)
                    real = data_sampler.sample(self.batch_size, col[perm],
                                               opt[perm])
                    c2 = c1[perm]

                fake = self.generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    fake_cat = torch.cat([fakeact, c1], dim=1)
                else:
                    fake_cat = fake

                fake_data = fake_cat
                predictions, votes = pate(fake_data,
                                          teacher_disc,
                                          noise_multiplier,
                                          device=self.device)

                output = student_disc(fake_data.detach())

                # update moments accountant
                alphas = alphas + moments_acc(self.num_teachers,
                                              votes,
                                              noise_multiplier,
                                              l_list,
                                              device=self.device)

                loss_s = criterion(output, predictions.float().to(self.device))

                optimizerS.zero_grad()
                loss_s.backward()

                if (self.regularization == 'dragan'):
                    vals = torch.cat([predictions, fake_data], axis=1)
                    ordered = vals[vals[:, 0].sort()[1]]
                    data_list = torch.split(
                        ordered,
                        predictions.shape[0] - int(predictions.sum().item()))
                    synth_cat = torch.cat(data_list[1:], axis=0)[:, 1:]
                    pen = student_disc.dragan_penalty(synth_cat,
                                                      device=self.device)
                    pen.backward(retain_graph=True)

                optimizerS.step()

            # print ('iterator {i}, student discriminator loss is {j}'.format(i=t_3, j=loss_s))

            #train generator
            fakez = torch.normal(mean=mean, std=std)
            condvec = self.cond_generator.sample(self.batch_size)

            if condvec is None:
                c1, m1, col, opt = None, None, None, None
            else:
                c1, m1, col, opt = condvec
                c1 = torch.from_numpy(c1).to(self.device)
                m1 = torch.from_numpy(m1).to(self.device)
                fakez = torch.cat([fakez, c1], dim=1)

            fake = self.generator(fakez)
            fakeact = self._apply_activate(fake)

            if c1 is not None:
                y_fake = student_disc(torch.cat([fakeact, c1], dim=1))
            else:
                y_fake = student_disc(fakeact)

            if condvec is None:
                cross_entropy = 0
            else:
                cross_entropy = self._cond_loss(fake, c1, m1)

            if self.loss == 'cross_entropy':
                label_g = torch.full((int(self.batch_size / self.pack), 1),
                                     REAL_LABEL,
                                     dtype=torch.float,
                                     device=self.device)
                loss_g = criterion(y_fake, label_g.float())
                loss_g = loss_g + cross_entropy
            else:
                loss_g = -torch.mean(y_fake) + cross_entropy

            optimizerG.zero_grad()
            loss_g.backward()
            optimizerG.step()

            eps = min((alphas - math.log(self.delta)) / l_list)

            if (self.verbose):
                print('eps: {:f} \t G: {:f} \t D: {:f}'.format(
                    eps,
                    loss_g.detach().cpu(),
                    loss_s.detach().cpu()))