Example #1
0
    def main_step(self, model: Seq2Seq, discriminator: Discriminator,
                  input_batches: Dict[str, Batch], gtruth_batches: Dict[str, Batch],
                  adv_targets: Dict[str, Variable], sos_indices: Dict[str, int], teacher_forcing: bool):
        model.train()
        discriminator.eval()
        self.main_optimizer.zero_grad()
        results = dict()
        for key in input_batches:
            input_batch = input_batches[key]
            sos_index = sos_indices[key]
            gtruth_variable = None
            if teacher_forcing:
                gtruth_variable = gtruth_batches[key].variable
            results[key] = model.forward(input_batch.variable, input_batch.lengths, sos_index, gtruth_variable)

        main_loss_computer = MainLossCompute(self.vocabulary, self.use_cuda)
        adv_loss_computer = DiscriminatorLossCompute(discriminator)
        losses = dict()
        for key, result in results.items():
            main_loss = main_loss_computer.compute(result[1], gtruth_batches[key].variable)
            adv_loss = adv_loss_computer.compute(result[0], adv_targets[key])
            losses[key] = (main_loss/sum(gtruth_batches[key].lengths), adv_loss/adv_targets[key].size(0))
        loss = sum([sum(pair) for pair in losses.values()])
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), 5)
        self.main_optimizer.step()
        
        losses_data = []
        for pair in losses.values():
            losses_data += [pair[0].data[0], pair[1].data[0]]
        return losses_data
Example #2
0
    def test_too_small_size(self):
        """
        Tests that building a discriminator with too small of a size raises a
        DiscriminatorSizeError
        """
        for size in range(0, 8):
            with self.assertRaises(DiscriminatorSizeError):
                Discriminator.build_model(in_size=size, in_channels=3)

            with self.assertRaises(DiscriminatorSizeError):
                Discriminator.build_model(in_size=size, in_channels=1)
Example #3
0
def save_model(model: Seq2Seq, discriminator: Discriminator, main_optimizer,
               discriminator_optimizer, filename):
    model_state_dict = model.state_dict()
    for key in model_state_dict.keys():
        model_state_dict[key] = model_state_dict[key].cpu()
    discriminator_state_dict = discriminator.state_dict()
    for key in discriminator_state_dict.keys():
        discriminator_state_dict[key] = discriminator_state_dict[key].cpu()
    torch.save(
        {
            'model': model_state_dict,
            'encoder_n_layers': model.encoder_n_layers,
            'decoder_n_layers': model.decoder_n_layers,
            'rnn_size': model.rnn_size,
            'dropout': model.dropout,
            'output_size': model.output_size,
            'embedding_dim': model.embedding_dim,
            'bidirectional': model.bidirectional,
            'attention': model.use_attention,
            'max_length': model.max_length,
            'enable_embedding_training': model.enable_embedding_training,
            'discriminator': discriminator_state_dict,
            'discriminator_hidden_size': discriminator.hidden_size,
            'discriminator_n_layers': discriminator.n_layers,
            'main_optimizer': main_optimizer.state_dict(),
            'discriminator_optimizer': discriminator_optimizer.state_dict()
        }, filename)
Example #4
0
    def discriminator_step(self, model: Seq2Seq, discriminator: Discriminator,
                           input_batches: Dict[str, Batch], adv_targets: Dict[str, Variable]):
        discriminator.train()
        model.eval()
        self.discriminator_optimizer.zero_grad()
        adv_loss_computer = DiscriminatorLossCompute(discriminator)

        losses = []
        for key in input_batches:
            input_batch = input_batches[key]
            target = adv_targets[key]
            encoder_output, _ = model.encoder(input_batch.variable, input_batch.lengths)
            losses.append(adv_loss_computer.compute(encoder_output, target))

        discriminator_loss = sum(losses)
        discriminator_loss.backward()
        nn.utils.clip_grad_norm(discriminator.parameters(), 5)
        self.discriminator_optimizer.step()
        return discriminator_loss.data[0]
Example #5
0
 def train(self, model: Seq2Seq, discriminator: Discriminator,
           src_file_names: List[str], tgt_file_names: List[str],
           unsupervised_big_epochs: int, print_every: int, save_every: int,
           num_words_in_batch: int, max_length: int, teacher_forcing: bool,
           save_file: str="model", n_unsupervised_batches: int=None,
           enable_unsupervised_backtranslation: bool=False):
     if self.main_optimizer is None or self.discriminator_optimizer is None:
         logger.info("Initializing optimizers...")
         self.main_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                                          lr=self.main_lr, betas=self.main_betas)
         self.discriminator_optimizer = optim.RMSprop(discriminator.parameters(), lr=self.discriminator_lr)
     for big_epoch in range(unsupervised_big_epochs):
         src_batch_gen = BatchGenerator(src_file_names, num_words_in_batch, max_len=max_length,
                                        vocabulary=self.vocabulary, language="src",
                                        max_batch_count=n_unsupervised_batches)
         tgt_batch_gen = BatchGenerator(tgt_file_names, num_words_in_batch, max_len=max_length,
                                        vocabulary=self.vocabulary, language="tgt",
                                        max_batch_count=n_unsupervised_batches)
         logger.debug("Src batch:" + str(next(iter(src_batch_gen))))
         logger.debug("Tgt batch:" + str(next(iter(tgt_batch_gen))))
         timer = time.time()
         main_loss_total = 0
         discriminator_loss_total = 0
         epoch = 0
         for src_batch, tgt_batch in zip(src_batch_gen, tgt_batch_gen):
             model.train()
             discriminator_loss, losses = self.train_batch(model, discriminator, src_batch,
                                                           tgt_batch, teacher_forcing)
             main_loss = sum(losses)
             main_loss_total += main_loss
             discriminator_loss_total += discriminator_loss
             if epoch % save_every == 0 and epoch != 0:
                 save_model(model, discriminator, self.main_optimizer,
                            self.discriminator_optimizer, save_file + ".pt")
             if epoch % print_every == 0 and epoch != 0:
                 main_loss_avg = main_loss_total / print_every
                 discriminator_loss_avg = discriminator_loss_total / print_every
                 main_loss_total = 0
                 discriminator_loss_total = 0
                 diff = time.time() - timer
                 timer = time.time()
                 translator = Translator(model, self.vocabulary, self.use_cuda)
                 logger.debug("Auto: " + translator.translate_sentence("you can prepare your meals here .",
                                                                       "src", "src"))
                 logger.debug("Translated: " + translator.translate_sentence("you can prepare your meals here .",
                                                                             "src", "tgt"))
                 logger.info('%s big epoch, %s epoch, %s sec, %.4f main loss, '
                              '%.4f discriminator loss, current losses: %s' %
                              (big_epoch, epoch, diff, main_loss_avg, discriminator_loss_avg, losses))
             epoch += 1
         save_model(model, discriminator, self.main_optimizer,
                    self.discriminator_optimizer, save_file + ".pt")
         if enable_unsupervised_backtranslation:
             self.current_translation_model = Translator(model, self.vocabulary, self.use_cuda)
             model = copy.deepcopy(model)
Example #6
0
 def _make_discriminator_get_output_shapes(in_size: int,
                                           in_channels: int) -> list:
     """
     Makes a discriminator and returns a list of its layers' output shapes
     :param in_size: input size
     :param in_channels: input channels
     :return: a list of tuples for each layer's output shape
     """
     return [layer.output_shape for layer in
             Discriminator.build_model(in_size=in_size,
                                       in_channels=in_channels).layers]
Example #7
0
def init_optimizers(model: Seq2Seq,
                    discriminator: Discriminator,
                    discriminator_lr=0.0005,
                    main_lr=0.0003,
                    main_betas=(0.5, 0.999)):
    logging.info("Initializing optimizers...")
    main_optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=main_lr,
                                betas=main_betas)
    discriminator_optimizer = optim.RMSprop(discriminator.parameters(),
                                            lr=discriminator_lr)
    return main_optimizer, discriminator_optimizer
Example #8
0
def test():
    """Test Notebook API"""
    dataset = MelFromDisk(path="data/test")
    dataloader = torch.utils.data.DataLoader(dataset)
    loaders = OrderedDict({"train": dataloader})
    generator = Generator(80)
    discriminator = Discriminator()

    model = torch.nn.ModuleDict({
        "generator": generator,
        "discriminator": discriminator
    })
    optimizer = {
        "opt_g": torch.optim.Adam(generator.parameters()),
        "opt_d": torch.optim.Adam(discriminator.parameters()),
    }
    callbacks = {
        "loss_g":
        GeneratorLossCallback(),
        "loss_d":
        DiscriminatorLossCallback(),
        "o_g":
        dl.OptimizerCallback(metric_key="generator_loss",
                             optimizer_key="opt_g"),
        "o_d":
        dl.OptimizerCallback(metric_key="discriminator_loss",
                             optimizer_key="opt_d"),
    }
    runner = MelGANRunner()

    runner.train(
        model=model,
        loaders=loaders,
        optimizer=optimizer,
        callbacks=callbacks,
        check=True,
        main_metric="discriminator_loss",
    )
Example #9
0
    def __init__(self, configs):
        self.configs = configs

        wandb.init(project=self.configs['project_name'],
                   name=self.configs['experiment_name'],
                   sync_tensorboard=True)

        self.fake_pool_b2a = ImagePool(self.configs['pool_size'])
        self.fake_pool_a2b = ImagePool(self.configs['pool_size'])

        self.loss_gen_total_metrics = tf.keras.metrics.Mean(
            'loss_gen_total_metrics', dtype=tf.float32)
        self.loss_dis_total_metrics = tf.keras.metrics.Mean(
            'loss_dis_total_metrics', dtype=tf.float32)
        self.loss_cycle_a2b2a_metrics = tf.keras.metrics.Mean(
            'loss_cycle_a2b2a_metrics', dtype=tf.float32)
        self.loss_cycle_b2a2b_metrics = tf.keras.metrics.Mean(
            'loss_cycle_b2a2b_metrics', dtype=tf.float32)
        self.loss_gen_a2b_metrics = tf.keras.metrics.Mean(
            'loss_gen_a2b_metrics', dtype=tf.float32)
        self.loss_gen_b2a_metrics = tf.keras.metrics.Mean(
            'loss_gen_b2a_metrics', dtype=tf.float32)
        self.loss_dis_b_metrics = tf.keras.metrics.Mean('loss_dis_b_metrics',
                                                        dtype=tf.float32)
        self.loss_dis_a_metrics = tf.keras.metrics.Mean('loss_dis_a_metrics',
                                                        dtype=tf.float32)
        self.loss_id_b2a_metrics = tf.keras.metrics.Mean('loss_id_b2a_metrics',
                                                         dtype=tf.float32)
        self.loss_id_a2b_metrics = tf.keras.metrics.Mean('loss_id_a2b_metrics',
                                                         dtype=tf.float32)

        self.mse_loss = tf.keras.losses.MeanSquaredError()
        self.mae_loss = tf.keras.losses.MeanAbsoluteError()

        self.dataset = self.get_dataset()

        self.generator_a2b = Generator(
            input_size=self.configs['input_size'],
            n_res_blocks=self.configs['residual_blocks'])
        self.generator_b2a = Generator(
            input_size=self.configs['input_size'],
            n_res_blocks=self.configs['residual_blocks'])
        self.discriminator_a = Discriminator(
            input_size=self.configs['input_size'])
        self.discriminator_b = Discriminator(
            input_size=self.configs['input_size'])

        total_batches = count_batches(self.dataset)
        self.generator_lr_scheduler = LinearDecay(
            initial_learning_rate=self.configs['lr'],
            total_steps=self.configs['epochs'] * total_batches,
            step_decay=self.configs['decay_epochs'] * total_batches)
        self.discriminator_lr_scheduler = LinearDecay(
            initial_learning_rate=self.configs['lr'],
            total_steps=self.configs['epochs'] * total_batches,
            step_decay=self.configs['decay_epochs'] * total_batches)

        self.generator_optimizer = tf.keras.optimizers.Adam(
            self.generator_lr_scheduler, self.configs['adam_beta_1'])
        self.discriminator_optimizer = tf.keras.optimizers.Adam(
            self.discriminator_lr_scheduler, self.configs['adam_beta_1'])

        self.checkpoint, self.checkpoint_manager = self.make_checkpoints()
Example #10
0
def train(
    max_int: int = 128,
    batch_size: int = 16,
    training_steps: int = 500,
    learning_rate: float = 0.001,
    print_output_every_n_steps: int = 10,
):
    """Trains the even GAN

    Args:
        max_int: The maximum integer our dataset goes to.  It is used to set the size of the binary
            lists
        batch_size: The number of examples in a training batch
        training_steps: The number of steps to train on.
        learning_rate: The learning rate for the generator and discriminator
        print_output_every_n_steps: The number of training steps before we print generated output

    Returns:
        generator: The trained generator model
        discriminator: The trained discriminator model
    """
    input_length = int(math.log(max_int, 2))

    # Models
    generator = Generator(input_length)
    discriminator = Discriminator(input_length)

    # Optimizers
    generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)
    discriminator_optimizer = torch.optim.Adam(discriminator.parameters(),
                                               lr=0.001)

    # loss
    loss = nn.BCELoss()
    gen_loss = []
    dis_loss = []

    for i in range(training_steps):
        # zero the gradients on each iteration
        generator_optimizer.zero_grad()

        # Create noisy input for generator
        # Need float type instead of int
        noise = torch.randint(0, 2, size=(batch_size, input_length)).float()
        generated_data = generator(noise)

        # Generate examples of even real data
        # true labels: [1,1,1,1,1,1,....] i.e all ones
        # true data: [[0,0,0,0,1,0,0],....] i.e binary code for even numbers
        true_labels, true_data = generate_even_data(max_int,
                                                    batch_size=batch_size)
        true_labels = torch.tensor(true_labels).float()
        true_data = torch.tensor(true_data).float()

        # Train the generator
        # We invert the labels here and don't train the discriminator because we want the generator
        # to make things the discriminator classifies as true.
        # true labels: [1,1,1,1,....]
        discriminator_out_gen_data = discriminator(generated_data)
        generator_loss = loss(discriminator_out_gen_data.squeeze(),
                              true_labels)
        gen_loss.append(generator_loss.item())
        generator_loss.backward()
        generator_optimizer.step()

        # Train the discriminator
        # Teach Discriminator to distinguish true data with true label i.e [1,1,1,1,....]
        discriminator_optimizer.zero_grad()
        discriminator_out_true_data = discriminator(true_data)
        discriminator_loss_true_data = loss(
            discriminator_out_true_data.squeeze(), true_labels)

        # add .detach() here think about this
        discriminator_out_fake_data = discriminator(generated_data.detach())
        fake_labels = torch.zeros(batch_size)  # [0,0,0,.....]
        discriminator_loss_fake_data = loss(
            discriminator_out_fake_data.squeeze(), fake_labels)
        # total discriminator loss
        discriminator_loss = (discriminator_loss_true_data +
                              discriminator_loss_fake_data) / 2

        dis_loss.append(discriminator_loss.item())

        discriminator_loss.backward()
        discriminator_optimizer.step()
        if i % print_output_every_n_steps == 0:
            output = convert_float_matrix_to_int_list(generated_data)
            even_count = len(list(filter(lambda x: (x % 2 == 0), output)))
            print(
                f"steps: {i}, output: {output}, even count: {even_count}/16, Gen Loss: {np.round(generator_loss.item(),4)}, Dis Loss: {np.round(discriminator_loss.item(),4)}"
            )

    history = {}
    history['dis_loss'] = dis_loss
    history['gen_loss'] = gen_loss

    return generator, discriminator, history
    help='number of cpu threads to use during batch generation')
opt = parser.parse_args()
print(opt)

info = 'test'

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

###### Definition of variables ######
# Networks
netG_A2B = Generator(opt.input_nc, opt.output_nc)
netG_B2A = Generator(opt.output_nc, opt.input_nc)
netD_A = Discriminator(opt.input_nc)
netD_B = Discriminator(opt.output_nc)

if opt.cuda:
    netG_A2B.cuda()
    netG_B2A.cuda()
    netD_A.cuda()
    netD_B.cuda()

netG_A2B.apply(weights_init_normal)
netG_B2A.apply(weights_init_normal)
netD_A.apply(weights_init_normal)
netD_B.apply(weights_init_normal)

# Lossess
criterion_GAN = torch.nn.MSELoss()
Example #12
0
 def _make_discriminator(self) -> K.Model:
     """
     :return: Make a discriminator model for this instance
     """
     return Discriminator.build_model(in_size=self._size,
                                      in_channels=self._channels)
Example #13
0
print(len(train_dataset))
x, y = train_dataset[0]
print(x.shape, y.shape)

plt.imshow(ToPILImage()(x))
plt.show()
plt.imshow(ToPILImage()(y))
plt.show()

val_dataset = ValidationDataset(glob('./VOC2012/JPEGImages/*')[16000:17000], 4)
print(len(val_dataset))
x, y, y_res = val_dataset[0]
print(x.shape, y.shape, y_res.shape)

plt.imshow(ToPILImage()(x))
plt.show()
plt.imshow(ToPILImage()(y))
plt.show()
plt.imshow(ToPILImage()(y_res))
plt.show()

generator = Generator(scale=2)
x = torch.ones((1, 3, 44, 44))
y = generator(x)
print(x.shape, y.shape)

discriminator = Discriminator()
x = torch.ones((1, 3, 88, 88))
y = discriminator(x)
print(x.shape, y.shape)
Example #14
0
 def get_models(self):
     generator = Generator(self.config['scale']).to(self.device)
     discriminator = Discriminator().to(self.device)
     return generator, discriminator
)

device = torch.device("cuda:0" if opt.cuda else "cpu")
ngpu = int(opt.ngpu)
nz = int(opt.nz)
ngf = int(opt.ngf)
ndf = int(opt.ndf)


generator = Generator(nz, nc, ngf, opt.imageSize, ngpu).to(device)
generator.apply(weights_init)
if opt.generator != "":
    generator.load_state_dict(torch.load(opt.generator))
print(generator)

discriminator = Discriminator(nc, ndf, opt.imageSize, ngpu).to(device)
discriminator.apply(weights_init)
if opt.discriminator != "":
    discriminator.load_state_dict(torch.load(opt.discriminator))
print(discriminator)

# setup optimizer
optimizerD = optim.Adam(
    discriminator.parameters(), lr=opt.lr_d, betas=(opt.beta1, 0.999)
)
optimizerG = optim.Adam(generator.parameters(), lr=opt.lr_g, betas=(opt.beta1, 0.999))

fixed_noise = (
    torch.from_numpy(truncated_noise_sample(batch_size=64, dim_z=nz, truncation=0.4))
    .view(64, nz, 1, 1)
    .to(device)