def main_step(self, model: Seq2Seq, discriminator: Discriminator, input_batches: Dict[str, Batch], gtruth_batches: Dict[str, Batch], adv_targets: Dict[str, Variable], sos_indices: Dict[str, int], teacher_forcing: bool): model.train() discriminator.eval() self.main_optimizer.zero_grad() results = dict() for key in input_batches: input_batch = input_batches[key] sos_index = sos_indices[key] gtruth_variable = None if teacher_forcing: gtruth_variable = gtruth_batches[key].variable results[key] = model.forward(input_batch.variable, input_batch.lengths, sos_index, gtruth_variable) main_loss_computer = MainLossCompute(self.vocabulary, self.use_cuda) adv_loss_computer = DiscriminatorLossCompute(discriminator) losses = dict() for key, result in results.items(): main_loss = main_loss_computer.compute(result[1], gtruth_batches[key].variable) adv_loss = adv_loss_computer.compute(result[0], adv_targets[key]) losses[key] = (main_loss/sum(gtruth_batches[key].lengths), adv_loss/adv_targets[key].size(0)) loss = sum([sum(pair) for pair in losses.values()]) loss.backward() nn.utils.clip_grad_norm(model.parameters(), 5) self.main_optimizer.step() losses_data = [] for pair in losses.values(): losses_data += [pair[0].data[0], pair[1].data[0]] return losses_data
def test_too_small_size(self): """ Tests that building a discriminator with too small of a size raises a DiscriminatorSizeError """ for size in range(0, 8): with self.assertRaises(DiscriminatorSizeError): Discriminator.build_model(in_size=size, in_channels=3) with self.assertRaises(DiscriminatorSizeError): Discriminator.build_model(in_size=size, in_channels=1)
def save_model(model: Seq2Seq, discriminator: Discriminator, main_optimizer, discriminator_optimizer, filename): model_state_dict = model.state_dict() for key in model_state_dict.keys(): model_state_dict[key] = model_state_dict[key].cpu() discriminator_state_dict = discriminator.state_dict() for key in discriminator_state_dict.keys(): discriminator_state_dict[key] = discriminator_state_dict[key].cpu() torch.save( { 'model': model_state_dict, 'encoder_n_layers': model.encoder_n_layers, 'decoder_n_layers': model.decoder_n_layers, 'rnn_size': model.rnn_size, 'dropout': model.dropout, 'output_size': model.output_size, 'embedding_dim': model.embedding_dim, 'bidirectional': model.bidirectional, 'attention': model.use_attention, 'max_length': model.max_length, 'enable_embedding_training': model.enable_embedding_training, 'discriminator': discriminator_state_dict, 'discriminator_hidden_size': discriminator.hidden_size, 'discriminator_n_layers': discriminator.n_layers, 'main_optimizer': main_optimizer.state_dict(), 'discriminator_optimizer': discriminator_optimizer.state_dict() }, filename)
def discriminator_step(self, model: Seq2Seq, discriminator: Discriminator, input_batches: Dict[str, Batch], adv_targets: Dict[str, Variable]): discriminator.train() model.eval() self.discriminator_optimizer.zero_grad() adv_loss_computer = DiscriminatorLossCompute(discriminator) losses = [] for key in input_batches: input_batch = input_batches[key] target = adv_targets[key] encoder_output, _ = model.encoder(input_batch.variable, input_batch.lengths) losses.append(adv_loss_computer.compute(encoder_output, target)) discriminator_loss = sum(losses) discriminator_loss.backward() nn.utils.clip_grad_norm(discriminator.parameters(), 5) self.discriminator_optimizer.step() return discriminator_loss.data[0]
def train(self, model: Seq2Seq, discriminator: Discriminator, src_file_names: List[str], tgt_file_names: List[str], unsupervised_big_epochs: int, print_every: int, save_every: int, num_words_in_batch: int, max_length: int, teacher_forcing: bool, save_file: str="model", n_unsupervised_batches: int=None, enable_unsupervised_backtranslation: bool=False): if self.main_optimizer is None or self.discriminator_optimizer is None: logger.info("Initializing optimizers...") self.main_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=self.main_lr, betas=self.main_betas) self.discriminator_optimizer = optim.RMSprop(discriminator.parameters(), lr=self.discriminator_lr) for big_epoch in range(unsupervised_big_epochs): src_batch_gen = BatchGenerator(src_file_names, num_words_in_batch, max_len=max_length, vocabulary=self.vocabulary, language="src", max_batch_count=n_unsupervised_batches) tgt_batch_gen = BatchGenerator(tgt_file_names, num_words_in_batch, max_len=max_length, vocabulary=self.vocabulary, language="tgt", max_batch_count=n_unsupervised_batches) logger.debug("Src batch:" + str(next(iter(src_batch_gen)))) logger.debug("Tgt batch:" + str(next(iter(tgt_batch_gen)))) timer = time.time() main_loss_total = 0 discriminator_loss_total = 0 epoch = 0 for src_batch, tgt_batch in zip(src_batch_gen, tgt_batch_gen): model.train() discriminator_loss, losses = self.train_batch(model, discriminator, src_batch, tgt_batch, teacher_forcing) main_loss = sum(losses) main_loss_total += main_loss discriminator_loss_total += discriminator_loss if epoch % save_every == 0 and epoch != 0: save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + ".pt") if epoch % print_every == 0 and epoch != 0: main_loss_avg = main_loss_total / print_every discriminator_loss_avg = discriminator_loss_total / print_every main_loss_total = 0 discriminator_loss_total = 0 diff = time.time() - timer timer = time.time() translator = Translator(model, self.vocabulary, self.use_cuda) logger.debug("Auto: " + translator.translate_sentence("you can prepare your meals here .", "src", "src")) logger.debug("Translated: " + translator.translate_sentence("you can prepare your meals here .", "src", "tgt")) logger.info('%s big epoch, %s epoch, %s sec, %.4f main loss, ' '%.4f discriminator loss, current losses: %s' % (big_epoch, epoch, diff, main_loss_avg, discriminator_loss_avg, losses)) epoch += 1 save_model(model, discriminator, self.main_optimizer, self.discriminator_optimizer, save_file + ".pt") if enable_unsupervised_backtranslation: self.current_translation_model = Translator(model, self.vocabulary, self.use_cuda) model = copy.deepcopy(model)
def _make_discriminator_get_output_shapes(in_size: int, in_channels: int) -> list: """ Makes a discriminator and returns a list of its layers' output shapes :param in_size: input size :param in_channels: input channels :return: a list of tuples for each layer's output shape """ return [layer.output_shape for layer in Discriminator.build_model(in_size=in_size, in_channels=in_channels).layers]
def init_optimizers(model: Seq2Seq, discriminator: Discriminator, discriminator_lr=0.0005, main_lr=0.0003, main_betas=(0.5, 0.999)): logging.info("Initializing optimizers...") main_optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=main_lr, betas=main_betas) discriminator_optimizer = optim.RMSprop(discriminator.parameters(), lr=discriminator_lr) return main_optimizer, discriminator_optimizer
def test(): """Test Notebook API""" dataset = MelFromDisk(path="data/test") dataloader = torch.utils.data.DataLoader(dataset) loaders = OrderedDict({"train": dataloader}) generator = Generator(80) discriminator = Discriminator() model = torch.nn.ModuleDict({ "generator": generator, "discriminator": discriminator }) optimizer = { "opt_g": torch.optim.Adam(generator.parameters()), "opt_d": torch.optim.Adam(discriminator.parameters()), } callbacks = { "loss_g": GeneratorLossCallback(), "loss_d": DiscriminatorLossCallback(), "o_g": dl.OptimizerCallback(metric_key="generator_loss", optimizer_key="opt_g"), "o_d": dl.OptimizerCallback(metric_key="discriminator_loss", optimizer_key="opt_d"), } runner = MelGANRunner() runner.train( model=model, loaders=loaders, optimizer=optimizer, callbacks=callbacks, check=True, main_metric="discriminator_loss", )
def __init__(self, configs): self.configs = configs wandb.init(project=self.configs['project_name'], name=self.configs['experiment_name'], sync_tensorboard=True) self.fake_pool_b2a = ImagePool(self.configs['pool_size']) self.fake_pool_a2b = ImagePool(self.configs['pool_size']) self.loss_gen_total_metrics = tf.keras.metrics.Mean( 'loss_gen_total_metrics', dtype=tf.float32) self.loss_dis_total_metrics = tf.keras.metrics.Mean( 'loss_dis_total_metrics', dtype=tf.float32) self.loss_cycle_a2b2a_metrics = tf.keras.metrics.Mean( 'loss_cycle_a2b2a_metrics', dtype=tf.float32) self.loss_cycle_b2a2b_metrics = tf.keras.metrics.Mean( 'loss_cycle_b2a2b_metrics', dtype=tf.float32) self.loss_gen_a2b_metrics = tf.keras.metrics.Mean( 'loss_gen_a2b_metrics', dtype=tf.float32) self.loss_gen_b2a_metrics = tf.keras.metrics.Mean( 'loss_gen_b2a_metrics', dtype=tf.float32) self.loss_dis_b_metrics = tf.keras.metrics.Mean('loss_dis_b_metrics', dtype=tf.float32) self.loss_dis_a_metrics = tf.keras.metrics.Mean('loss_dis_a_metrics', dtype=tf.float32) self.loss_id_b2a_metrics = tf.keras.metrics.Mean('loss_id_b2a_metrics', dtype=tf.float32) self.loss_id_a2b_metrics = tf.keras.metrics.Mean('loss_id_a2b_metrics', dtype=tf.float32) self.mse_loss = tf.keras.losses.MeanSquaredError() self.mae_loss = tf.keras.losses.MeanAbsoluteError() self.dataset = self.get_dataset() self.generator_a2b = Generator( input_size=self.configs['input_size'], n_res_blocks=self.configs['residual_blocks']) self.generator_b2a = Generator( input_size=self.configs['input_size'], n_res_blocks=self.configs['residual_blocks']) self.discriminator_a = Discriminator( input_size=self.configs['input_size']) self.discriminator_b = Discriminator( input_size=self.configs['input_size']) total_batches = count_batches(self.dataset) self.generator_lr_scheduler = LinearDecay( initial_learning_rate=self.configs['lr'], total_steps=self.configs['epochs'] * total_batches, step_decay=self.configs['decay_epochs'] * total_batches) self.discriminator_lr_scheduler = LinearDecay( initial_learning_rate=self.configs['lr'], total_steps=self.configs['epochs'] * total_batches, step_decay=self.configs['decay_epochs'] * total_batches) self.generator_optimizer = tf.keras.optimizers.Adam( self.generator_lr_scheduler, self.configs['adam_beta_1']) self.discriminator_optimizer = tf.keras.optimizers.Adam( self.discriminator_lr_scheduler, self.configs['adam_beta_1']) self.checkpoint, self.checkpoint_manager = self.make_checkpoints()
def train( max_int: int = 128, batch_size: int = 16, training_steps: int = 500, learning_rate: float = 0.001, print_output_every_n_steps: int = 10, ): """Trains the even GAN Args: max_int: The maximum integer our dataset goes to. It is used to set the size of the binary lists batch_size: The number of examples in a training batch training_steps: The number of steps to train on. learning_rate: The learning rate for the generator and discriminator print_output_every_n_steps: The number of training steps before we print generated output Returns: generator: The trained generator model discriminator: The trained discriminator model """ input_length = int(math.log(max_int, 2)) # Models generator = Generator(input_length) discriminator = Discriminator(input_length) # Optimizers generator_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001) discriminator_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001) # loss loss = nn.BCELoss() gen_loss = [] dis_loss = [] for i in range(training_steps): # zero the gradients on each iteration generator_optimizer.zero_grad() # Create noisy input for generator # Need float type instead of int noise = torch.randint(0, 2, size=(batch_size, input_length)).float() generated_data = generator(noise) # Generate examples of even real data # true labels: [1,1,1,1,1,1,....] i.e all ones # true data: [[0,0,0,0,1,0,0],....] i.e binary code for even numbers true_labels, true_data = generate_even_data(max_int, batch_size=batch_size) true_labels = torch.tensor(true_labels).float() true_data = torch.tensor(true_data).float() # Train the generator # We invert the labels here and don't train the discriminator because we want the generator # to make things the discriminator classifies as true. # true labels: [1,1,1,1,....] discriminator_out_gen_data = discriminator(generated_data) generator_loss = loss(discriminator_out_gen_data.squeeze(), true_labels) gen_loss.append(generator_loss.item()) generator_loss.backward() generator_optimizer.step() # Train the discriminator # Teach Discriminator to distinguish true data with true label i.e [1,1,1,1,....] discriminator_optimizer.zero_grad() discriminator_out_true_data = discriminator(true_data) discriminator_loss_true_data = loss( discriminator_out_true_data.squeeze(), true_labels) # add .detach() here think about this discriminator_out_fake_data = discriminator(generated_data.detach()) fake_labels = torch.zeros(batch_size) # [0,0,0,.....] discriminator_loss_fake_data = loss( discriminator_out_fake_data.squeeze(), fake_labels) # total discriminator loss discriminator_loss = (discriminator_loss_true_data + discriminator_loss_fake_data) / 2 dis_loss.append(discriminator_loss.item()) discriminator_loss.backward() discriminator_optimizer.step() if i % print_output_every_n_steps == 0: output = convert_float_matrix_to_int_list(generated_data) even_count = len(list(filter(lambda x: (x % 2 == 0), output))) print( f"steps: {i}, output: {output}, even count: {even_count}/16, Gen Loss: {np.round(generator_loss.item(),4)}, Dis Loss: {np.round(discriminator_loss.item(),4)}" ) history = {} history['dis_loss'] = dis_loss history['gen_loss'] = gen_loss return generator, discriminator, history
help='number of cpu threads to use during batch generation') opt = parser.parse_args() print(opt) info = 'test' if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) ###### Definition of variables ###### # Networks netG_A2B = Generator(opt.input_nc, opt.output_nc) netG_B2A = Generator(opt.output_nc, opt.input_nc) netD_A = Discriminator(opt.input_nc) netD_B = Discriminator(opt.output_nc) if opt.cuda: netG_A2B.cuda() netG_B2A.cuda() netD_A.cuda() netD_B.cuda() netG_A2B.apply(weights_init_normal) netG_B2A.apply(weights_init_normal) netD_A.apply(weights_init_normal) netD_B.apply(weights_init_normal) # Lossess criterion_GAN = torch.nn.MSELoss()
def _make_discriminator(self) -> K.Model: """ :return: Make a discriminator model for this instance """ return Discriminator.build_model(in_size=self._size, in_channels=self._channels)
print(len(train_dataset)) x, y = train_dataset[0] print(x.shape, y.shape) plt.imshow(ToPILImage()(x)) plt.show() plt.imshow(ToPILImage()(y)) plt.show() val_dataset = ValidationDataset(glob('./VOC2012/JPEGImages/*')[16000:17000], 4) print(len(val_dataset)) x, y, y_res = val_dataset[0] print(x.shape, y.shape, y_res.shape) plt.imshow(ToPILImage()(x)) plt.show() plt.imshow(ToPILImage()(y)) plt.show() plt.imshow(ToPILImage()(y_res)) plt.show() generator = Generator(scale=2) x = torch.ones((1, 3, 44, 44)) y = generator(x) print(x.shape, y.shape) discriminator = Discriminator() x = torch.ones((1, 3, 88, 88)) y = discriminator(x) print(x.shape, y.shape)
def get_models(self): generator = Generator(self.config['scale']).to(self.device) discriminator = Discriminator().to(self.device) return generator, discriminator
) device = torch.device("cuda:0" if opt.cuda else "cpu") ngpu = int(opt.ngpu) nz = int(opt.nz) ngf = int(opt.ngf) ndf = int(opt.ndf) generator = Generator(nz, nc, ngf, opt.imageSize, ngpu).to(device) generator.apply(weights_init) if opt.generator != "": generator.load_state_dict(torch.load(opt.generator)) print(generator) discriminator = Discriminator(nc, ndf, opt.imageSize, ngpu).to(device) discriminator.apply(weights_init) if opt.discriminator != "": discriminator.load_state_dict(torch.load(opt.discriminator)) print(discriminator) # setup optimizer optimizerD = optim.Adam( discriminator.parameters(), lr=opt.lr_d, betas=(opt.beta1, 0.999) ) optimizerG = optim.Adam(generator.parameters(), lr=opt.lr_g, betas=(opt.beta1, 0.999)) fixed_noise = ( torch.from_numpy(truncated_noise_sample(batch_size=64, dim_z=nz, truncation=0.4)) .view(64, nz, 1, 1) .to(device)