Exemple #1
0
    def fit(self, train_data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.transformer = GeneralTransformer(act='tanh')
        self.transformer.fit(train_data, categorical_columns, ordinal_columns)
        train_data = self.transformer.transform(train_data)
        dataset = TensorDataset(torch.from_numpy(train_data.astype('float32')).to(self.device))
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, drop_last=True)

        data_dim = self.transformer.output_dim
        self.generator = Generator(self.embedding_dim, self.gen_dim, data_dim).to(self.device)
        discriminator = Discriminator(self.embedding_dim + data_dim, self.dis_dim).to(self.device)
        reconstructor = Reconstructor(data_dim, self.rec_dim, self.embedding_dim).to(self.device)

        optimizer_params = dict(lr=1e-3, betas=(0.5, 0.9), weight_decay=self.l2scale)
        optimizerG = Adam(self.generator.parameters(), **optimizer_params)
        optimizerD = Adam(discriminator.parameters(), **optimizer_params)
        optimizerR = Adam(reconstructor.parameters(), **optimizer_params)

        mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device)
        std = mean + 1
        for i in range(self.epochs):
            for id_, data in enumerate(loader):
                real = data[0].to(self.device)
                realz = reconstructor(real)
                y_real = discriminator(torch.cat([real, realz], dim=1))

                fakez = torch.normal(mean=mean, std=std)
                fake = self.generator(fakez, self.transformer.output_info)
                fakezrec = reconstructor(fake)
                y_fake = discriminator(torch.cat([fake, fakez], dim=1))

                loss_d = (
                    -(torch.log(torch.sigmoid(y_real) + 1e-4).mean())
                    - (torch.log(1. - torch.sigmoid(y_fake) + 1e-4).mean())
                )

                numerator = -y_fake.mean() + mse_loss(fakezrec, fakez, reduction='mean')
                loss_g = numerator / self.embedding_dim
                loss_r = numerator / self.embedding_dim
                optimizerD.zero_grad()
                loss_d.backward(retain_graph=True)
                optimizerD.step()
                optimizerG.zero_grad()
                loss_g.backward(retain_graph=True)
                optimizerG.step()
                optimizerR.zero_grad()
                loss_r.backward()
                optimizerR.step()
Exemple #2
0
    def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.transformer = GeneralTransformer()
        self.transformer.fit(data, categorical_columns, ordinal_columns)
        data = self.transformer.transform(data)
        dataset = TensorDataset(
            torch.from_numpy(data.astype('float32')).to(self.device))
        loader = DataLoader(dataset,
                            batch_size=self.batch_size,
                            shuffle=True,
                            drop_last=True)

        data_dim = self.transformer.output_dim
        encoder = Encoder(data_dim, self.compress_dims,
                          self.embedding_dim).to(self.device)
        self.decoder = Decoder(self.embedding_dim, self.compress_dims,
                               data_dim).to(self.device)
        optimizerAE = Adam(list(encoder.parameters()) +
                           list(self.decoder.parameters()),
                           weight_decay=self.l2scale)

        for i in range(self.pretrain_epoch):
            for id_, data in enumerate(loader):
                optimizerAE.zero_grad()
                real = data[0].to(self.device)
                emb = encoder(real)
                rec = self.decoder(emb, self.transformer.output_info)
                loss = aeloss(rec, real, self.transformer.output_info)
                loss.backward()
                optimizerAE.step()

        self.generator = Generator(self.random_dim, self.generator_dims,
                                   self.bn_decay).to(self.device)
        discriminator = Discriminator(data_dim,
                                      self.discriminator_dims).to(self.device)
        optimizerG = Adam(list(self.generator.parameters()) +
                          list(self.decoder.parameters()),
                          weight_decay=self.l2scale)
        optimizerD = Adam(discriminator.parameters(),
                          weight_decay=self.l2scale)

        mean = torch.zeros(self.batch_size,
                           self.random_dim,
                           device=self.device)
        std = mean + 1
        for i in range(self.epochs):
            n_d = 2
            n_g = 1
            for id_, data in enumerate(loader):
                real = data[0].to(self.device)
                noise = torch.normal(mean=mean, std=std)
                emb = self.generator(noise)
                fake = self.decoder(emb, self.transformer.output_info)

                optimizerD.zero_grad()
                y_real = discriminator(real)
                y_fake = discriminator(fake)
                real_loss = -(torch.log(y_real + 1e-4).mean())
                fake_loss = (torch.log(1.0 - y_fake + 1e-4).mean())
                loss_d = real_loss - fake_loss
                loss_d.backward()
                optimizerD.step()

                if i % n_d == 0:
                    for _ in range(n_g):
                        noise = torch.normal(mean=mean, std=std)
                        emb = self.generator(noise)
                        fake = self.decoder(emb, self.transformer.output_info)
                        optimizerG.zero_grad()
                        y_fake = discriminator(fake)
                        loss_g = -(torch.log(y_fake + 1e-4).mean())
                        loss_g.backward()
                        optimizerG.step()
Exemple #3
0
class MedGAN(LegacySingleTableBaseline):
    """docstring for IdentitySynthesizer."""
    def __init__(
            self,
            embedding_dim=128,
            random_dim=128,
            generator_dims=(128, 128),  # 128 -> 128 -> 128
            discriminator_dims=(256, 128, 1),  # datadim * 2 -> 256 -> 128 -> 1
            compress_dims=(),  # datadim -> embedding_dim
            decompress_dims=(),  # embedding_dim -> datadim
            bn_decay=0.99,
            l2scale=0.001,
            pretrain_epoch=200,
            batch_size=1000,
            epochs=2000):

        self.embedding_dim = embedding_dim
        self.random_dim = random_dim
        self.generator_dims = generator_dims
        self.discriminator_dims = discriminator_dims

        self.compress_dims = compress_dims
        self.decompress_dims = decompress_dims
        self.bn_decay = bn_decay
        self.l2scale = l2scale

        self.pretrain_epoch = pretrain_epoch
        self.batch_size = batch_size
        self.epochs = epochs

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.transformer = None

    def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.transformer = GeneralTransformer()
        self.transformer.fit(data, categorical_columns, ordinal_columns)
        data = self.transformer.transform(data)
        dataset = TensorDataset(
            torch.from_numpy(data.astype('float32')).to(self.device))
        loader = DataLoader(dataset,
                            batch_size=self.batch_size,
                            shuffle=True,
                            drop_last=True)

        data_dim = self.transformer.output_dim
        encoder = Encoder(data_dim, self.compress_dims,
                          self.embedding_dim).to(self.device)
        self.decoder = Decoder(self.embedding_dim, self.compress_dims,
                               data_dim).to(self.device)
        optimizerAE = Adam(list(encoder.parameters()) +
                           list(self.decoder.parameters()),
                           weight_decay=self.l2scale)

        for i in range(self.pretrain_epoch):
            for id_, data in enumerate(loader):
                optimizerAE.zero_grad()
                real = data[0].to(self.device)
                emb = encoder(real)
                rec = self.decoder(emb, self.transformer.output_info)
                loss = aeloss(rec, real, self.transformer.output_info)
                loss.backward()
                optimizerAE.step()

        self.generator = Generator(self.random_dim, self.generator_dims,
                                   self.bn_decay).to(self.device)
        discriminator = Discriminator(data_dim,
                                      self.discriminator_dims).to(self.device)
        optimizerG = Adam(list(self.generator.parameters()) +
                          list(self.decoder.parameters()),
                          weight_decay=self.l2scale)
        optimizerD = Adam(discriminator.parameters(),
                          weight_decay=self.l2scale)

        mean = torch.zeros(self.batch_size,
                           self.random_dim,
                           device=self.device)
        std = mean + 1
        for i in range(self.epochs):
            n_d = 2
            n_g = 1
            for id_, data in enumerate(loader):
                real = data[0].to(self.device)
                noise = torch.normal(mean=mean, std=std)
                emb = self.generator(noise)
                fake = self.decoder(emb, self.transformer.output_info)

                optimizerD.zero_grad()
                y_real = discriminator(real)
                y_fake = discriminator(fake)
                real_loss = -(torch.log(y_real + 1e-4).mean())
                fake_loss = (torch.log(1.0 - y_fake + 1e-4).mean())
                loss_d = real_loss - fake_loss
                loss_d.backward()
                optimizerD.step()

                if i % n_d == 0:
                    for _ in range(n_g):
                        noise = torch.normal(mean=mean, std=std)
                        emb = self.generator(noise)
                        fake = self.decoder(emb, self.transformer.output_info)
                        optimizerG.zero_grad()
                        y_fake = discriminator(fake)
                        loss_g = -(torch.log(y_fake + 1e-4).mean())
                        loss_g.backward()
                        optimizerG.step()

    def sample(self, n):
        self.generator.eval()
        self.decoder.eval()

        steps = n // self.batch_size + 1
        data = []
        for i in range(steps):
            mean = torch.zeros(self.batch_size, self.random_dim)
            std = mean + 1
            noise = torch.normal(mean=mean, std=std).to(self.device)
            emb = self.generator(noise)
            fake = self.decoder(emb, self.transformer.output_info)
            fake = torch.sigmoid(fake)
            data.append(fake.detach().cpu().numpy())
        data = np.concatenate(data, axis=0)
        data = data[:n]
        return self.transformer.inverse_transform(data)
Exemple #4
0
class VEEGANSynthesizer(BaseSynthesizer):
    """VEEGANSynthesizer."""

    def __init__(
        self,
        embedding_dim=32,
        gen_dim=(128, 128),
        dis_dim=(128, ),
        rec_dim=(128, 128),
        l2scale=1e-6,
        batch_size=500,
        epochs=300
    ):

        self.embedding_dim = embedding_dim
        self.gen_dim = gen_dim
        self.dis_dim = dis_dim
        self.rec_dim = rec_dim

        self.l2scale = l2scale
        self.batch_size = batch_size
        self.epochs = epochs

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    def fit(self, train_data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.transformer = GeneralTransformer(act='tanh')
        self.transformer.fit(train_data, categorical_columns, ordinal_columns)
        train_data = self.transformer.transform(train_data)
        dataset = TensorDataset(torch.from_numpy(train_data.astype('float32')).to(self.device))
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, drop_last=True)

        data_dim = self.transformer.output_dim
        self.generator = Generator(self.embedding_dim, self.gen_dim, data_dim).to(self.device)
        discriminator = Discriminator(self.embedding_dim + data_dim, self.dis_dim).to(self.device)
        reconstructor = Reconstructor(data_dim, self.rec_dim, self.embedding_dim).to(self.device)

        optimizer_params = dict(lr=1e-3, betas=(0.5, 0.9), weight_decay=self.l2scale)
        optimizerG = Adam(self.generator.parameters(), **optimizer_params)
        optimizerD = Adam(discriminator.parameters(), **optimizer_params)
        optimizerR = Adam(reconstructor.parameters(), **optimizer_params)

        mean = torch.zeros(self.batch_size, self.embedding_dim, device=self.device)
        std = mean + 1
        for i in range(self.epochs):
            for id_, data in enumerate(loader):
                real = data[0].to(self.device)
                realz = reconstructor(real)
                y_real = discriminator(torch.cat([real, realz], dim=1))

                fakez = torch.normal(mean=mean, std=std)
                fake = self.generator(fakez, self.transformer.output_info)
                fakezrec = reconstructor(fake)
                y_fake = discriminator(torch.cat([fake, fakez], dim=1))

                loss_d = (
                    -(torch.log(torch.sigmoid(y_real) + 1e-4).mean())
                    - (torch.log(1. - torch.sigmoid(y_fake) + 1e-4).mean())
                )

                numerator = -y_fake.mean() + mse_loss(fakezrec, fakez, reduction='mean')
                loss_g = numerator / self.embedding_dim
                loss_r = numerator / self.embedding_dim
                optimizerD.zero_grad()
                loss_d.backward(retain_graph=True)
                optimizerD.step()
                optimizerG.zero_grad()
                loss_g.backward(retain_graph=True)
                optimizerG.step()
                optimizerR.zero_grad()
                loss_r.backward()
                optimizerR.step()

    def sample(self, n):
        self.generator.eval()

        output_info = self.transformer.output_info
        steps = n // self.batch_size + 1
        data = []
        for i in range(steps):
            mean = torch.zeros(self.batch_size, self.embedding_dim)
            std = mean + 1
            noise = torch.normal(mean=mean, std=std).to(self.device)
            fake = self.generator(noise, output_info)
            data.append(fake.detach().cpu().numpy())

        data = np.concatenate(data, axis=0)
        data = data[:n]
        return self.transformer.inverse_transform(data)