Beispiel #1
0
    def run(self):
        # get data
        latent_vector_file = open(self.input_data_path, "r")
        latent_space_mols = np.array(json.load(latent_vector_file))
        shape = latent_space_mols.shape  # expecting tuple (set_size, dim_1, dim_2)

        data_shape = tuple([shape[1], shape[2]])
        # create Discriminator
        D = Discriminator(data_shape)

        # save Discriminator
        if not os.path.exists(self.output_model_folder):
            os.makedirs(self.output_model_folder)
        discriminator_path = os.path.join(self.output_model_folder,
                                          'discriminator.txt')
        D.save(discriminator_path)

        # create Generator
        G = Generator(data_shape, latent_dim=shape[2])

        # save generator
        generator_path = os.path.join(self.output_model_folder,
                                      'generator.txt')
        G.save(generator_path)

        return True
Beispiel #2
0
 def test_sampler_cuda(self):
     # Verify that the output of sampler is a CUDA tensor and not a CPU tensor when input is on CUDA
     with TemporaryDirectory() as tmpdirname:
         latent = np.random.rand(64, 1, 512)
         os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                     exist_ok=True)
         with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
             json.dump(latent.tolist(), f)
         C = CreateModelRunner(input_data_path=tmpdirname +
                               '/encoded_smiles.latent',
                               output_model_folder=tmpdirname)
         C.run()
         G = Generator.load(tmpdirname + '/generator.txt')
         json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
         latent_space_mols = np.array(json.load(json_smiles))
         testSampler = Sampler(G)
         latent_space_mols = latent_space_mols.reshape(
             latent_space_mols.shape[0], 512)
         T = torch.cuda.FloatTensor
         G.cuda()
         dataloader = torch.utils.data.DataLoader(
             LatentMolsDataset(latent_space_mols),
             shuffle=True,
             batch_size=64,
             drop_last=True)
         for _, real_mols in enumerate(dataloader):
             real_mols = real_mols.type(T)
             fake_mols = testSampler.sample(real_mols.shape[0])
             self.assertTrue(type(real_mols) == type(fake_mols))
             break
Beispiel #3
0
def sample(generator_path, output_sampled_latent_file, sample_number=50000, message='samling the generator',
           decode_sampled=False, output_decoded_smiles_file=''):
    print(message)
    sys.stdout.flush()
    torch.no_grad()

    # load generator
    G = Generator.load(generator_path)
    G.eval()

    cuda = True if torch.cuda.is_available() else False
    if cuda:
        G.cuda()
    # Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    S = Sampler(generator=G)
    print('Sampling model')
    sys.stdout.flush()
    latent = S.sample(sample_number)

    latent = latent.detach().cpu().numpy().tolist()

    with open(output_sampled_latent_file, 'w') as json_file:
        # array_fake_mols = fake_mols.data
        json.dump(latent, json_file)

    print('Sampling finished')
    sys.stdout.flush()
    del latent, json_file, G, S

    # decoding sampled mols
    if decode_sampled:
        print('Decoding sampled mols')
        sys.stdout.flush()
        decode(output_sampled_latent_file, output_decoded_smiles_file, message='Decoding mol. Call from sample script.')
    def CreateGenerator(self):
        # create Generator
        G = Generator(self.data_shape, latent_dim= self.data_shape[1])

        # save generator
        if not os.path.exists(self.output_model_folder):
            os.makedirs(self.output_model_folder)
        generator_path = os.path.join(self.output_model_folder, 'generator.txt')
        G.save(generator_path)
Beispiel #5
0
 def __init__(self, args):
     self.args = args
     torch.manual_seed(self.args.seed)
     np.random.seed(self.args.seed)
     print('{} detection...'.format(args.dataset))
     white_noise = dp.DatasetReader(white_noise=self.args.dataset,
                                    data_path=data_path,
                                    len_seg=self.args.len_seg
                                    )
     self.testset = torch.tensor(torch.from_numpy(white_noise.dataset_), dtype=torch.float32)
     self.spots = np.load('{}/spots.npy'.format(info_path))
     self.Generator = Generator(args)  # Generator
     self.Discriminator = Discriminator(args)  # Discriminator
Beispiel #6
0
    def __init__(self, output_smiles_file, input_model_path, sample_number):
        # init params
        self.input_model_path = input_model_path
        self.output_smiles_file = output_smiles_file
        self.sample_number = sample_number

        self.G = Generator.load(input_model_path)

        # Tensor
        cuda = True if torch.cuda.is_available() else False
        if cuda:
            self.G.cuda()
        self.Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
def load_models(epoch, hparams, hidden_dim):
    from models.Discriminator import Discriminator
    from models.Recovery import Recovery
    from models.Generator import Generator
    from models.Embedder import Embedder
    from models.Supervisor import Supervisor

    if epoch % 50 != 0:
        return 'Only insert epochs that are divisible by 50.'
    else:
        # Only use when you want to load the models
        e_model_pre_trained = Embedder('logs/e_model_pre_train',
                                       hparams,
                                       hidden_dim,
                                       dimensionality=11)
        e_model_pre_trained.load_weights(
            'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/embedder/epoch_'
            + str(epoch)).expect_partial()
        e_model_pre_trained.build([])

        r_model_pre_trained = Recovery('logs/r_model_pre_train',
                                       hparams,
                                       hidden_dim,
                                       dimensionality=11)
        r_model_pre_trained.load_weights(
            'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/recovery/epoch_'
            + str(epoch)).expect_partial()
        r_model_pre_trained.build([])

        s_model_pre_trained = Supervisor('logs/s_model_pre_train', hparams,
                                         hidden_dim)
        s_model_pre_trained.load_weights(
            'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/supervisor/epoch_'
            + str(epoch)).expect_partial()
        s_model_pre_trained.build([])

        g_model_pre_trained = Generator('logs/g_model_pre_train', hparams,
                                        hidden_dim)
        g_model_pre_trained.load_weights(
            'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/generator/epoch_'
            + str(epoch)).expect_partial()
        g_model_pre_trained.build([])

        d_model_pre_trained = Discriminator('logs/d_model_pre_train', hparams,
                                            hidden_dim)
        d_model_pre_trained.load_weights(
            'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/discriminator/epoch_'
            + str(epoch)).expect_partial()
        d_model_pre_trained.build([])

        return e_model_pre_trained, r_model_pre_trained, s_model_pre_trained, g_model_pre_trained, d_model_pre_trained
Beispiel #8
0
def gen_pretrain():
    print("start pre-training generator...")
    conf = gen_config()
    train_data, test_data = get_pn_data('data/gen_data')
    train_loader = DataLoader(train_data,
                              conf.batch_size,
                              shuffle=True,
                              num_workers=conf.num_workers,
                              collate_fn=collate_fn)
    test_loader = DataLoader(train_data,
                             conf.batch_size,
                             shuffle=True,
                             num_workers=conf.num_workers,
                             collate_fn=collate_fn)
    Gen = Generator(conf)
    Gen.pretrain(train_loader, test_loader)
Beispiel #9
0
def gan_train():
    print("start gan training...")
    conf = gan_config()
    gen = Generator(conf)
    dis = Discriminator(conf)
    gen.trainModel.load('generator.pkl')
    train_data = get_pos_data('data/gan_data')
    test_data = get_neg_data('data/dis_data')
    train_loader = DataLoader(train_data,
                              conf.batch_size,
                              shuffle=True,
                              num_workers=conf.num_workers,
                              collate_fn=collate_fn,
                              drop_last=True)
    test_loader = DataLoader(test_data,
                             conf.batch_size,
                             shuffle=True,
                             num_workers=conf.num_workers,
                             collate_fn=collate_fn,
                             drop_last=True)

    avg_reward = 0
    for epoch in range(conf.n_epochs):
        dis.trainModel.load('discriminator.pkl')
        epoch_loss = 0
        epoch_reward = 0
        for i, batch_data in enumerate(train_loader):
            data, label = batch_data
            gen_step = gen.gen_step(data)
            high, low = next(gen_step)
            losses, reward = dis.dis_step(high, low)
            reward = reward - avg_reward
            epoch_loss += losses.data[0]
            epoch_reward += reward
            gen_step.send(reward)
        avg_reward = epoch_reward / conf.batch_size

        print('Epoch{}/{}, Train_Loss={:.3f}'.format(
            epoch + 1, conf.n_epochs, epoch_loss / conf.batch_size))
        worst_acc = 1
        if epoch % conf.epoch_per_test == 0:
            true_y, pred_y = predict(dis.trainModel, test_loader)
            eval_acc = acc_metric(true_y, pred_y)
            if worst_acc > eval_acc:
                worst_acc = eval_acc
                gen.trainModel.save(conf.model_name)
                print('gan_valid_acc is {:.3f}'.format(worst_acc))
Beispiel #10
0
 def test_gradient_penalty_non_zero(self):
     # Test to verify that a non-zero gradient penalty is computed on the from the first training step
     with TemporaryDirectory() as tmpdirname:
         latent = np.random.rand(64, 1, 512)
         os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                     exist_ok=True)
         with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
             json.dump(latent.tolist(), f)
         C = CreateModelRunner(input_data_path=tmpdirname +
                               '/encoded_smiles.latent',
                               output_model_folder=tmpdirname)
         C.run()
         D = Discriminator.load(tmpdirname + '/discriminator.txt')
         G = Generator.load(tmpdirname + '/generator.txt')
         json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
         latent_space_mols = np.array(json.load(json_smiles))
         testSampler = Sampler(G)
         latent_space_mols = latent_space_mols.reshape(
             latent_space_mols.shape[0], 512)
         T = torch.cuda.FloatTensor
         G.cuda()
         D.cuda()
         dataloader = torch.utils.data.DataLoader(
             LatentMolsDataset(latent_space_mols),
             shuffle=True,
             batch_size=64,
             drop_last=True)
         for _, real_mols in enumerate(dataloader):
             real_mols = real_mols.type(T)
             fake_mols = testSampler.sample(real_mols.shape[0])
             alpha = T(np.random.random((real_mols.size(0), 1)))
             interpolates = (alpha * real_mols +
                             ((1 - alpha) * fake_mols)).requires_grad_(True)
             d_interpolates = D(interpolates)
             fake = T(real_mols.shape[0], 1).fill_(1.0)
             gradients = autograd.grad(
                 outputs=d_interpolates,
                 inputs=interpolates,
                 grad_outputs=fake,
                 create_graph=True,
                 retain_graph=True,
                 only_inputs=True,
             )[0]
             gradients = gradients.view(gradients.size(0), -1)
             gradient_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean()
             self.assertTrue(gradient_penalty.data != 0)
             break
Beispiel #11
0
 def __init__(self, args):
     self.args = args
     torch.manual_seed(self.args.seed)
     np.random.seed(self.args.seed)
     print('> Training arguments:')
     for arg in vars(args):
         print('>>> {}: {}'.format(arg, getattr(args, arg)))
     white_noise = dp.DatasetReader(white_noise=self.args.dataset,
                                    data_path=data_path,
                                    data_source=args.data,
                                    len_seg=self.args.len_seg)
     dataset, _ = white_noise(args.net_name)
     self.data_loader = DataLoader(dataset=dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True)
     self.Generator = Generator(args)  # Generator
     self.Discriminator = Discriminator(args)  # Discriminator
Beispiel #12
0
def main():
    G = Generator(z_dim=20)
    D = Discriminator(z_dim=20)
    E = Encoder(z_dim=20)
    G.apply(weights_init)
    D.apply(weights_init)
    E.apply(weights_init)

    train_img_list=make_datapath_list(num=200)
    mean = (0.5,)
    std = (0.5,)
    train_dataset = GAN_Img_Dataset(file_list=train_img_list, transform=ImageTransform(mean, std))

    batch_size = 64
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    num_epochs = 1500
    G_update, D_update, E_update = train_model(G, D, E, dataloader=train_dataloader, num_epochs=num_epochs, save_model_name='Efficient_GAN')
Beispiel #13
0
 def test_sampler_n(self):
     # Verify that the sampler outputs the desired number of output latent vectors.
     with TemporaryDirectory() as tmpdirname:
         latent = np.random.rand(64, 1, 512)
         os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                     exist_ok=True)
         with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
             json.dump(latent.tolist(), f)
         C = CreateModelRunner(input_data_path=tmpdirname +
                               '/encoded_smiles.latent',
                               output_model_folder=tmpdirname)
         C.run()
         G = Generator.load(tmpdirname + '/generator.txt')
         G.cuda()
         testSampler = Sampler(G)
         samples = testSampler.sample(256)
         self.assertEqual(
             samples.shape[0], 256,
             "Sampler produced a different number of latent vectors than specified"
         )
Beispiel #14
0
 def test_generator_shape(self):
     # Test to verify that the same dimension network is created invariant of smiles input file size
     with TemporaryDirectory() as tmpdirname:
         for j in [1, 64, 256, 1024]:
             latent = np.random.rand(j, 1, 512)
             os.makedirs(os.path.dirname(tmpdirname +
                                         '/encoded_smiles.latent'),
                         exist_ok=True)
             with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                 json.dump(latent.tolist(), f)
             C = CreateModelRunner(input_data_path=tmpdirname +
                                   '/encoded_smiles.latent',
                                   output_model_folder=tmpdirname)
             C.run()
             G = Generator.load(tmpdirname + '/generator.txt')
             G_params = []
             for param in G.parameters():
                 G_params.append(param.view(-1))
             G_params = torch.cat(G_params)
             reference = 1283968
             self.assertEqual(G_params.shape[0], reference,
                              "Network does not match expected size")
Beispiel #15
0
    def build_model(self):
        # code_dim=100, n_class=1000
        self.Generator = Generator(chn=self.g_conv_dim, k_size= 3, res_num= self.res_num).to(self.device)
        self.Discriminator = Discriminator(chn=self.d_conv_dim, k_size= 3).to(self.device)
        self.Transform = Transform_block().to(self.device)
        if self.parallel:

            print('use parallel...')
            print('gpuids ', self.gpus)
            gpus = [int(i) for i in self.gpus.split(',')]
    
            self.Generator      = nn.DataParallel(self.Generator, device_ids=gpus)
            self.Discriminator  = nn.DataParallel(self.Discriminator, device_ids=gpus)
            self.Transform      = nn.DataParallel(self.Transform, device_ids=gpus)

        # self.G.apply(weights_init)
        # self.D.apply(weights_init)

        # Loss and optimizer
        # self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2])

        self.g_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, 
                                    self.Generator.parameters()), self.g_lr, [self.beta1, self.beta2])
        # self.decoder_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, 
        #                             self.Decoder.parameters()), self.g_lr, [self.beta1, self.beta2])
        self.d_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, 
                                    self.Discriminator.parameters()), self.d_lr, [self.beta1, self.beta2])
        # self.L1_loss = torch.nn.L1Loss()
        self.MSE_loss = torch.nn.MSELoss()
        self.L1_loss = torch.nn.SmoothL1Loss()
        self.C_loss = torch.nn.BCEWithLogitsLoss()
        # self.TV_loss = TVLoss(self.TVLossWeight,self.imsize,self.batch_size)
        
        # print networks
        logging.info("Generator structure:")
        logging.info(self.Generator)
        # print(self.Decoder)
        logging.info("Discriminator structure:")
        logging.info(self.Discriminator)
Beispiel #16
0
def inference():
    filepath = tf.convert_to_tensor(FLAGS.test_img, tf.string)
    imgs_LR = tf.read_file(filepath)
    imgs_LR = tf.image.decode_png(imgs_LR, channels=3)
    imgs_LR = imgs_LR / 255

    ##################################################
    #          GENERATOR - SR IMAGE created          #
    ##################################################
    generator = Generator()
    imgs_LR_ph = tf.placeholder(tf.float32, [None, None, None, 3])
    imgs_SR = generator.fit(imgs_LR_ph, train=True, reuse=False)

    # Restore
    if FLAGS.load_gen:
        variables_to_restore_srgan = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='generator')
        srgan_saver = tf.train.Saver(variables_to_restore_srgan)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        if FLAGS.load_gen:
            print('Loading pre-trained Generator...')
            srgan_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'srgan')))
        _img = sess.run(imgs_LR)
        _img = np.asarray(_img)
        sr = sess.run(imgs_SR, feed_dict={imgs_LR_ph: [_img]})
    converted_img = convert_back(sr[0], LR=False)
    filename = FLAGS.test_img.replace('.png', '_SRGAN_MSE_70k_ac.png')
    import cv2
    cv2.imwrite(filename, cv2.cvtColor(converted_img, cv2.COLOR_RGB2BGR))
Beispiel #17
0
    def test_separate_optimizers(self):
        # Verify that two different instances of the optimizer is created using the TrainModelRunner.py initialization
        # This ensures the two components train separately
        with TemporaryDirectory() as tmpdirname:

            latent = np.random.rand(64, 1, 512)
            os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                        exist_ok=True)
            with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                json.dump(latent.tolist(), f)

            C = CreateModelRunner(input_data_path=tmpdirname +
                                  '/encoded_smiles.latent',
                                  output_model_folder=tmpdirname)
            C.run()
            D = Discriminator.load(tmpdirname + '/discriminator.txt')
            G = Generator.load(tmpdirname + '/generator.txt')
            optimizer_G = torch.optim.Adam(G.parameters())
            optimizer_D = torch.optim.Adam(D.parameters())
            self.assertTrue(type(optimizer_G) == type(
                optimizer_D))  # must return the same type of object
            self.assertTrue(
                optimizer_G
                is not optimizer_D)  # object identity MUST be different
Beispiel #18
0
    # np.save('./logs/ssenet_our.npy', feature_all)
    # np.save('./logs/label.npy', label_all)

    summary = np.array(summary).mean()
    print('[EVAL]', 'curr_acc: %0.3f' % summary)


if __name__ == '__main__':
    psm_files = './low_pssms/*.npy'
    sse_dataset = SSEDataset(
        psm_files,
        config.psm_fake_data_path_prefix,
        config.sequence_data_path_prefix,
        config.label_data_path_prefix,
    )
    sse_loader = DataLoader(sse_dataset,
                            batch_size=1,
                            num_workers=config.batch_size,
                            collate_fn=sse_dataset.collate_fn,
                            shuffle=False)

    teacher = SSENet(input_dim=config.embed_dim + config.profile_width)
    student = SSENet(input_dim=config.embed_dim + config.profile_width)
    generator = Generator()

    # try load pretrained model
    teacher, student, generator = try_get_pretrained(teacher,
                                                     student,
                                                     generator,
                                                     scratch=False)
    inference(sse_loader, generator, student)
Beispiel #19
0
    def test_model_trains(self):
        # Performs one step of training and verifies that the weights are updated, implying some training occurs.
        with TemporaryDirectory() as tmpdirname:
            T = torch.cuda.FloatTensor
            latent = np.random.rand(64, 1, 512)
            os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                        exist_ok=True)
            with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                json.dump(latent.tolist(), f)

            C = CreateModelRunner(input_data_path=tmpdirname +
                                  '/encoded_smiles.latent',
                                  output_model_folder=tmpdirname)
            C.run()
            D = Discriminator.load(tmpdirname + '/discriminator.txt')
            G = Generator.load(tmpdirname + '/generator.txt')
            G.cuda()
            D.cuda()
            optimizer_G = torch.optim.Adam(G.parameters())
            optimizer_D = torch.optim.Adam(D.parameters())
            json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
            latent_space_mols = np.array(json.load(json_smiles))
            testSampler = Sampler(G)
            latent_space_mols = latent_space_mols.reshape(
                latent_space_mols.shape[0], 512)
            dataloader = torch.utils.data.DataLoader(
                LatentMolsDataset(latent_space_mols),
                shuffle=True,
                batch_size=64,
                drop_last=True)
            for _, real_mols in enumerate(dataloader):
                real_mols = real_mols.type(T)
                before_G_params = []
                before_D_params = []
                for param in G.parameters():
                    before_G_params.append(param.view(-1))
                before_G_params = torch.cat(before_G_params)
                for param in D.parameters():
                    before_D_params.append(param.view(-1))
                before_D_params = torch.cat(before_D_params)

                optimizer_D.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                real_validity = D(real_mols)
                fake_validity = D(fake_mols)
                #It is not relevant to compute gradient penalty. The test is only interested in if there is a change in
                #the weights (training), not in giving proper training
                d_loss = -torch.mean(real_validity) + torch.mean(fake_validity)
                d_loss.backward()
                optimizer_D.step()
                optimizer_G.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                fake_validity = D(fake_mols)
                g_loss = -torch.mean(fake_validity)
                g_loss.backward()
                optimizer_G.step()
                after_G_params = []
                after_D_params = []
                for param in G.parameters():
                    after_G_params.append(param.view(-1))
                after_G_params = torch.cat(after_G_params)
                for param in D.parameters():
                    after_D_params.append(param.view(-1))
                after_D_params = torch.cat(after_D_params)
                self.assertTrue(
                    torch.any(torch.ne(after_G_params, before_G_params)))
                self.assertTrue(
                    torch.any(torch.ne(after_D_params, before_D_params)))

                break
Beispiel #20
0
    feature_all = np.concatenate(feature_all, axis=0)
    # np.save('./logs/ssenet_real.npy', feature_all)

    # statistic
    summary_np = np.array(summary).mean()
    print('[EVAL]', 'curr_acc: %0.3f' % summary_np)


if __name__ == '__main__':
    psm_files = './low_pssms/*.npy'
    sse_dataset = SSEDataset(
        psm_files,
        config.psm_fake_data_path_prefix,
        config.sequence_data_path_prefix,
        config.label_data_path_prefix,
    )
    sse_loader = DataLoader(sse_dataset,
                            batch_size=1,
                            num_workers=config.batch_size,
                            collate_fn=sse_dataset.collate_fn,
                            shuffle=False)

    ssenet = SSENet(input_dim=config.embed_dim + config.profile_width)
    generator = Generator(pure_bert=True)

    # try load pretrained model
    ssenet = try_get_pretrained(ssenet, scratch=False)

    test_sse(sse_loader, ssenet)
def run(parameters,
        hparams,
        X_train,
        X_test,
        load=False,
        load_epochs=150,
        load_log_dir=""):

    # Network Parameters
    hidden_dim = parameters['hidden_dim']
    num_layers = parameters['num_layers']  # Still have to implement
    iterations = parameters['iterations']  # Test run to check for overfitting
    batch_size = parameters[
        'batch_size'] * mirrored_strategy.num_replicas_in_sync  # To scale the batch size according to the mirrored strategy
    module_name = parameters[
        'module_name']  # 'lstm' or 'GRU'' --> Still have to implement this
    z_dim = parameters['z_dim']
    lambda_val = 1  # Hyperparameter for ..
    eta = 1  # Hyperparameter for ..
    kappa = 1  # Hyperparameter for feature matching
    gamma = 1  # Hyperparameter for the gradient penalty in WGAN-GP

    if load:  # Write to already defined log directory?
        log_dir = load_log_dir
    else:  # Or create new log directory?
        # Define the TensorBoard such that we can visualize the results
        log_dir = 'logs/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    summary_writer_train = tf.summary.create_file_writer(log_dir + '/train')
    summary_writer_test = tf.summary.create_file_writer(log_dir + '/test')
    summary_writer_bottom = tf.summary.create_file_writer(log_dir + '/bottom')
    summary_writer_top = tf.summary.create_file_writer(log_dir + '/top')
    summary_writer_real_data = tf.summary.create_file_writer(log_dir +
                                                             '/real_data')
    summary_writer_fake_data = tf.summary.create_file_writer(log_dir +
                                                             '/fake_data')
    summary_writer_lower_bound = tf.summary.create_file_writer(log_dir +
                                                               '/lower_bound')

    if load:
        embedder_model, recovery_model, supervisor_model, generator_model, discriminator_model = load_models(
            load_epochs, hparams, hidden_dim)
    else:
        with mirrored_strategy.scope():
            # Create an instance of all neural networks models (All LSTM)
            embedder_model = Embedder('logs/embedder',
                                      hparams,
                                      hidden_dim,
                                      dimensionality=11)
            recovery_model = Recovery(
                'logs/recovery', hparams, hidden_dim,
                dimensionality=11)  # If used for EONIA rate only
            supervisor_model = Supervisor('logs/supervisor', hparams,
                                          hidden_dim)
            generator_model = Generator('logs/generator', hparams, hidden_dim)
            discriminator_model = Discriminator('logs/TimeGAN', hparams,
                                                hidden_dim)

    r_loss_train = tf.keras.metrics.Mean(name='r_loss_train')  # Step 1 metrics
    r_loss_test = tf.keras.metrics.Mean(name='r_loss_test')

    grad_embedder_ll = tf.keras.metrics.Mean(
        name='e_grad_lower_layer')  # Step 1 gradient
    grad_embedder_ul = tf.keras.metrics.Mean(name='e_grad_upper_layer')
    grad_recovery_ll = tf.keras.metrics.Mean(name='r_grad_lower_layer')
    grad_recovery_ul = tf.keras.metrics.Mean(name='r_grad_upper_layer')

    g_loss_s_train = tf.keras.metrics.Mean(
        name='g_loss_s_train')  # Step 2 metrics
    g_loss_s_test = tf.keras.metrics.Mean(name='g_loss_s_test')

    grad_supervisor_ll = tf.keras.metrics.Mean(
        name='s_grad_lower_layer')  # Step 2 gradients
    grad_supervisor_ul = tf.keras.metrics.Mean(name='s_grad_upper_layer')

    e_loss_T0 = tf.keras.metrics.Mean(
        name='e_loss_T0')  # Step 3 metrics (train)
    g_loss_s_embedder = tf.keras.metrics.Mean(name='g_loss_s_embedder')
    g_loss_s = tf.keras.metrics.Mean(name='g_loss_s')
    d_loss = tf.keras.metrics.Mean(name='d_loss')
    g_loss_u_e = tf.keras.metrics.Mean(name='g_loss_u_e')

    e_loss_T0_test = tf.keras.metrics.Mean(
        name='e_loss_T0_test')  # Step 3 metrics (test)
    g_loss_s_embedder_test = tf.keras.metrics.Mean(name='e_loss_T0_test')
    g_loss_s_test = tf.keras.metrics.Mean(name='g_loss_s_test')
    g_loss_u_e_test = tf.keras.metrics.Mean(name='g_loss_u_e_test')
    d_loss_test = tf.keras.metrics.Mean(name='d_loss_test')

    grad_discriminator_ll = tf.keras.metrics.Mean(
        name='d_grad_lower_layer')  # Step 3 gradients
    grad_discriminator_ul = tf.keras.metrics.Mean(name='d_grad_upper_layer')
    grad_generator_ll = tf.keras.metrics.Mean(name='g_grad_lower_layer')
    grad_generator_ul = tf.keras.metrics.Mean(name='g_grad_upper_layer')

    loss_object_accuracy = tf.keras.metrics.Accuracy()  # To calculate accuracy

    # Create the loss object, optimizer, and training function
    loss_object = tf.keras.losses.MeanSquaredError(
        reduction=tf.keras.losses.Reduction.NONE)  # Rename this to MSE
    loss_object_adversarial = tf.losses.BinaryCrossentropy(
        from_logits=True,
        reduction=tf.keras.losses.Reduction.NONE)  # More stable
    # from_logits = True because the last dense layers is linear and
    # does not have an activation -- could be differently specified

    # Activate the optimizer using the Mirrored Strategy approach
    with mirrored_strategy.scope():
        optimizer = tf.keras.optimizers.Adam(
            0.01
        )  # Possibly increase the learning rate to stir up the GAN training

    # Change the input dataset to be used by the mirrored strategy
    X_train = mirrored_strategy.experimental_distribute_dataset(X_train)
    X_test = mirrored_strategy.experimental_distribute_dataset(X_test)

    # Compute the loss according to the MirroredStrategy approach
    def compute_loss(real, regenerate):
        per_example_loss = loss_object(real, regenerate)
        return tf.nn.compute_average_loss(per_example_loss,
                                          global_batch_size=batch_size)

    # 1. Start with embedder training (Optimal LSTM auto encoder network)
    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def train_step_embedder(X_train):
        with tf.GradientTape() as tape:
            # Apply Embedder to data and Recovery to predicted hidden states
            e_pred_train = embedder_model(X_train)
            r_pred_train = recovery_model(e_pred_train)

            # Compute loss for LSTM autoencoder
            #R_loss_train = loss_object(X_train, r_pred_train)

            # Compute the loss for the LSTM autoencoder using MirroredStrategy
            R_loss_train = compute_loss(X_train, r_pred_train)

        # Compute the gradients with respect to the Embedder and Recovery vars
        gradients = tape.gradient(
            R_loss_train, embedder_model.trainable_variables +
            recovery_model.trainable_variables)

        # Apply the gradients to the Embedder and Recovery vars
        optimizer.apply_gradients(
            zip(
                gradients,  # Always minimization function
                embedder_model.trainable_variables +
                recovery_model.trainable_variables))

        # Record the lower and upper layer gradients + the MSE for the autoencoder
        grad_embedder_ll(tf.norm(gradients[1]))
        grad_embedder_ul(tf.norm(gradients[9]))
        grad_recovery_ll(tf.norm(gradients[12]))
        grad_recovery_ul(tf.norm(gradients[20]))
        #r_loss_train(R_loss_train)

    @tf.function()
    def distributed_train_step_embedder(X_train):
        per_replica_losses = mirrored_strategy.run(train_step_embedder,
                                                   args=(X_train, ))
        R_loss_train = mirrored_strategy.reduce(tf.distribute.ReduceOp.SUM,
                                                per_replica_losses,
                                                axis=None)
        r_loss_train(R_loss_train)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def test_step_embedder(X_test):
        # Apply the Embedder to data and Recovery to predicted hidden states
        e_pred_test = embedder_model(X_test)
        r_pred_test = recovery_model(e_pred_test)

        # Compute the loss function for the LSTM autoencoder
        #R_loss_test = loss_object(X_test, r_pred_test)

        # Compute the loss function for the LSTM autoencoder using MirroredStrategy
        R_loss_test = compute_loss(X_test, r_pred_test)
        r_loss_test(R_loss_test)

    # Initialize the number of minibatches
    nr_mb_train = 0
    # Train the embedder for the input data
    for epoch in range(load_epochs, load_epochs + 55):
        r_loss_train.reset_states()
        r_loss_test.reset_states()
        grad_embedder_ll.reset_states()
        grad_embedder_ul.reset_states()
        grad_recovery_ll.reset_states()
        grad_recovery_ul.reset_states()

        # Train over the complete train and test dataset
        for x_train in X_train:
            distributed_train_step_embedder(x_train)
            with summary_writer_bottom.as_default():
                tf.summary.scalar(
                    '1. Pre-training autoencoder/2. Gradient norm - embedder',
                    grad_embedder_ll.result(),
                    step=nr_mb_train)
                tf.summary.scalar(
                    '1. Pre-training autoencoder/2. Gradient norm - recovery',
                    grad_recovery_ll.result(),
                    step=nr_mb_train)

            with summary_writer_top.as_default():
                tf.summary.scalar(
                    '1. Pre-training autoencoder/2. Gradient norm - embedder',
                    grad_embedder_ul.result(),
                    step=nr_mb_train,
                    description=str(descr_auto_grads_embedder()))
                tf.summary.scalar(
                    '1. Pre-training autoencoder/2. Gradient norm - recovery',
                    grad_recovery_ul.result(),
                    step=nr_mb_train,
                    description=str(descr_auto_grads_recovery()))
            nr_mb_train += 1

        for x_test in X_test:
            test_step_embedder(x_test)

        with summary_writer_train.as_default():
            tf.summary.scalar('1. Pre-training autoencoder/1. Recovery loss',
                              r_loss_train.result(),
                              step=epoch)
            if epoch % 50 == 0:  # Only log trainable variables per 10 epochs
                add_hist(embedder_model.trainable_variables, epoch)
                add_hist(recovery_model.trainable_variables, epoch)

        with summary_writer_test.as_default():
            tf.summary.scalar('1. Pre-training autoencoder/1. Recovery loss',
                              r_loss_test.result(),
                              step=epoch,
                              description=str(descr_auto_loss()))

        # Log the progress to the user console in python
        template = 'Autoencoder training: Epoch {}, Loss: {}, Test Loss: {}'
        print(
            template.format(epoch + 1,
                            np.round(r_loss_train.result().numpy(), 5),
                            np.round(r_loss_test.result().numpy(), 5)))

    print('Finished Embedding Network Training')

    # 2. Continue w/ supervisor training on real data (same temporal relations)
    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def train_step_supervised(X_train):
        with tf.GradientTape() as tape:
            # Apply Embedder to data and check temporal relations with supervisor
            e_pred_train = embedder_model(X_train)
            H_hat_supervise = supervisor_model(e_pred_train)

            # Compute squared loss for real embedding and supervised embedding
            #G_loss_S_train = loss_object(e_pred_train[:, 1:, :],
            #                       H_hat_supervise[:, 1:, :])
            #tf.debugging.assert_non_negative(G_loss_S_train)

            # Compute the Supervisor model loss for the MirroredStrategy approach
            G_loss_S_train = compute_loss(e_pred_train[:, 1:, :],
                                          H_hat_supervise[:, 1:, :])

        # Compute the gradients with respect to the Embedder and Recovery vars
        gradients = tape.gradient(G_loss_S_train,
                                  supervisor_model.trainable_variables)

        # Apply the gradients to the Embedder and Recovery vars
        optimizer.apply_gradients(
            zip(
                gradients,  # Always minimization
                supervisor_model.trainable_variables))

        # Record the lower and upper layer gradients + the MSE for the supervisor
        grad_supervisor_ll(tf.norm(gradients[1]))
        grad_supervisor_ul(tf.norm(gradients[6]))
        # g_loss_s_train(G_loss_S_train)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def distributed_train_step_supervised(X_train):
        per_replica_losses = mirrored_strategy.run(train_step_supervised,
                                                   args=(X_train, ))
        G_loss_S_train = mirrored_strategy.reduce(tf.distribute.ReduceOp.SUM,
                                                  per_replica_losses,
                                                  axis=None)
        g_loss_s_train(G_loss_S_train)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def test_step_supervised(X_test):
        e_pred_test = embedder_model(X_test)
        H_hat_supervise_test = supervisor_model(e_pred_test)
        G_loss_S_test = loss_object(e_pred_test[:, 1:, :],
                                    H_hat_supervise_test[:, 1:, :])
        g_loss_s_test(G_loss_S_test)

    # Initialize minibatch number
    nr_mb_train = 0
    for epoch in range(load_epochs, load_epochs + 5):
        g_loss_s_train.reset_states()
        g_loss_s_test.reset_states()
        grad_supervisor_ll.reset_states()
        grad_supervisor_ul.reset_states()

        for x_train in X_train:
            distributed_train_step_supervised(x_train)
            with summary_writer_bottom.as_default():
                tf.summary.scalar(
                    '2. Pre-training supervisor/2. Gradient norm - supervisor',
                    grad_supervisor_ll.result(),
                    step=nr_mb_train)

            with summary_writer_top.as_default():
                tf.summary.scalar(
                    '2. Pre-training supervisor/2. Gradient norm - supervisor',
                    grad_supervisor_ul.result(),
                    step=nr_mb_train,
                    description=str(descr_auto_grads_supervisor()))
            nr_mb_train += 1

        for x_test in X_test:
            test_step_supervised(x_test)

        with summary_writer_train.as_default():
            tf.summary.scalar('2. Pre-training supervisor/1. Supervised loss',
                              g_loss_s_train.result(),
                              step=epoch)
            if epoch % 10 == 0:  # Only log trainable variables per 10 epochs
                add_hist(supervisor_model.trainable_variables, epoch)

        with summary_writer_test.as_default():
            tf.summary.scalar('2. Pre-training supervisor/1. Supervised loss',
                              g_loss_s_test.result(),
                              step=epoch,
                              description=str(descr_supervisor_loss()))

        template = 'Epoch {}, Train Loss: {}, Test loss: {}'
        print(
            template.format(epoch + 1,
                            np.round(g_loss_s_train.result().numpy(), 8),
                            np.round(g_loss_s_test.result().numpy(), 8)))
    print('Finished training with Supervised loss only')

    # 3. Continue with joint training
    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64),
        tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64),
        tf.TensorSpec(shape=(), dtype=tf.bool),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    ])
    def train_step_jointly_generator(X_train,
                                     Z,
                                     graphing=False,
                                     wasserstein=False):
        if graphing:  # Only used for creating the graph
            with tf.GradientTape() as tape:
                # We need these steps to make the graph in Tensorboard complete
                dummy1 = embedder_model(X_train)  # Real embedding
                dummy2 = generator_model(Z)  # Fake embedding
                dummy4 = recovery_model(tf.concat(
                    [dummy1, dummy2], axis=0))  # Recovery from embedding
                dummy3 = supervisor_model(tf.concat(
                    [dummy1, dummy2], axis=0))  # Supervisor on embedding
                dummy5 = discriminator_model(
                    tf.concat([dummy1, dummy2],
                              axis=0))  # Discriminator on embedding
        else:
            if wasserstein:
                with tf.GradientTape() as tape:
                    H = embedder_model(X_train)
                    x_tilde = recovery_model(H)

                    # Apply Generator to Z and apply Supervisor on fake embedding
                    E_hat = generator_model(Z)
                    H_hat = supervisor_model(E_hat)
                    recovery_hat = recovery_model(E_hat)

                    Y_fake_e = discriminator_model.predict(E_hat)
                    G_loss_U_e = -tf.reduce_mean(Y_fake_e)

                    # 2. Generator - Supervised loss for fake embeddings
                    G_loss_S = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :])

                    # Sum and multiply supervisor loss by eta for equal
                    # contribution to generator loss function
                    G_loss = G_loss_U_e + eta * G_loss_S  #+ kappa * tf.add(G_loss_f1 , G_loss_f2)

                # Compute the gradients w.r.t. generator and supervisor model
                gradients_generator = tape.gradient(
                    G_loss, generator_model.trainable_variables)

                # Apply the gradients to the generator model
                optimizer.apply_gradients(
                    zip(gradients_generator,
                        generator_model.trainable_variables))

            else:
                with tf.GradientTape() as tape:
                    H = embedder_model(X_train)
                    x_tilde = recovery_model(H)

                    # Apply Generator to Z and apply Supervisor on fake embedding
                    E_hat = generator_model(Z)
                    H_hat = supervisor_model(E_hat)
                    recovery_hat = recovery_model(E_hat)

                    # Compute real and fake probabilities using Discriminator model
                    Y_fake_e = discriminator_model(E_hat)
                    # 1. Generator - Adversarial loss - We want to trick Discriminator to give classification of 1
                    G_loss_U_e = loss_object_adversarial(
                        tf.ones_like(Y_fake_e), Y_fake_e)

                    # 2. Generator - Supervised loss for fake embeddings
                    G_loss_S = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :])

                    #if dummy1.shape[0] != recovery_hat.shape[0]:
                    #    recovery_hat = recovery_hat[0:dummy1.shape[0], :, :]

                    # # 3. Generator - Feature matching skewness and kurtosis
                    # G_loss_f1 = tf.math.pow(tf.reduce_mean(scipy.stats.skew(x_tilde, axis = 1)) -
                    #             tf.reduce_mean(scipy.stats.skew(recovery_hat, axis = 1)), 2)

                    # # 3. Generator - Feature matching skewness and kurtosis
                    # G_loss_f2 = tf.math.pow(tf.reduce_mean(scipy.stats.kurtosis(x_tilde, axis = 1)) -
                    #             tf.reduce_mean(scipy.stats.kurtosis(recovery_hat, axis = 1)), 2)

                    # Sum and multiply supervisor loss by eta for equal
                    # contribution to generator loss function
                    G_loss = G_loss_U_e + eta * G_loss_S  #+ kappa * tf.add(G_loss_f1 , G_loss_f2)

                # Compute the gradients w.r.t. generator and supervisor model
                gradients_generator = tape.gradient(
                    G_loss, generator_model.trainable_variables)

                # Apply the gradients to the generator model
                optimizer.apply_gradients(
                    zip(gradients_generator,
                        generator_model.trainable_variables))

            # Record the lower and upper layer gradients + the MSE for the generator
            grad_generator_ll(tf.norm(gradients_generator[1]))
            grad_generator_ul(tf.norm(gradients_generator[9]))

            # Compute individual components of the generator loss
            g_loss_u_e(G_loss_U_e)
            g_loss_s(
                G_loss_S)  # Based on this we can set the eta value in G_loss_S

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    ])
    def test_step_jointly_generator(Z, wasserstein=False):
        E_hat = generator_model(Z)
        H_hat = supervisor_model(E_hat)

        if wasserstein:
            Y_fake_e = discriminator_model.predict(E_hat)
            G_loss_U_e_test = -tf.reduce_mean(Y_fake_e)
        else:
            Y_fake_e = discriminator_model(E_hat)
            G_loss_U_e_test = loss_object_adversarial(tf.ones_like(Y_fake_e),
                                                      Y_fake_e)

        G_loss_S_test = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :])
        g_loss_u_e_test(G_loss_U_e_test)
        g_loss_s_test(G_loss_S_test)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def train_step_jointly_embedder(X_train):
        with tf.GradientTape() as tape:
            # Apply Embedder to data and recover the data from the embedding space
            H = embedder_model(X_train)
            X_tilde = recovery_model(H)

            # Compute the loss function for the embedder-recovery model
            r_loss_train = loss_object(X_train, X_tilde)

            # Include the supervision loss but only for 10 %
            H_hat_supervise = supervisor_model(H)
            G_loss_S_embedder = loss_object(H[:, 1:, :],
                                            H_hat_supervise[:, 1:, :])

            # Combine the two losses
            E_loss = r_loss_train + lambda_val * tf.sqrt(G_loss_S_embedder)

        # Compute the gradients with respect to the embedder-recovery model
        gradients_embedder = tape.gradient(
            E_loss, embedder_model.trainable_variables +
            recovery_model.trainable_variables)

        optimizer.apply_gradients(
            zip(
                gradients_embedder, embedder_model.trainable_variables +
                recovery_model.trainable_variables))

        # Compute the embedding-recovery loss and supervisor loss
        e_loss_T0(r_loss_train)
        g_loss_s_embedder(G_loss_S_embedder)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64)
    ])
    def test_step_jointly_embedder(X_test):
        H = embedder_model(X_test)
        X_tilde = recovery_model(H)
        E_loss_T0_test = loss_object(X_test, X_tilde)
        H_hat_supervise = supervisor_model(H)
        G_loss_S_embedder_test = loss_object(H[:, 1:, :],
                                             H_hat_supervise[:, 1:, :])
        e_loss_T0_test(E_loss_T0_test)
        g_loss_s_embedder_test(G_loss_S_embedder_test)

    @tf.function()
    def gradient_penalty(real, fake):
        try:
            alpha = tf.random.uniform(shape=[real.shape[0], 20, hidden_dim],
                                      minval=0.,
                                      maxval=1.,
                                      dtype=tf.float64)
            interpolates = real + alpha * (fake - real)
            with tf.GradientTape() as tape:
                tape.watch(interpolates)
                probs = discriminator_model.predict(interpolates)

            gradients = tape.gradient(probs, interpolates)
            slopes = tf.sqrt(
                tf.math.reduce_sum(tf.square(gradients), axis=[1, 2]))
            gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
            return gradient_penalty
        except:
            return tf.constant(0, dtype=tf.float16)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64),
        tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64),
        tf.TensorSpec(shape=(), dtype=tf.float16),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    ])
    def train_step_discriminator(X_train,
                                 Z,
                                 smoothing_factor=1,
                                 wasserstein=False):
        if wasserstein:  # Use the Wasserstein Gradient penalty
            with tf.GradientTape() as tape:
                # Embeddings for real data and classifications from discriminator
                H = embedder_model(X_train)
                # Embeddings for fake data and classifications from discriminator
                E_hat = generator_model(Z)
                Y_real = discriminator_model.predict(H)
                Y_fake = discriminator_model.predict(E_hat)
                D_loss = tf.reduce_mean(Y_real) - tf.reduce_mean(Y_fake)
                D_loss += gamma * tf.cast(gradient_penalty(H, E_hat),
                                          tf.float16)

            # Compute the gradients with respect to the discriminator model
            grad_d = tape.gradient(D_loss,
                                   discriminator_model.trainable_variables)

            # Apply the gradient to the discriminator model
            optimizer.apply_gradients(
                zip(
                    grad_d,  # Minimize the Cross Entropy
                    discriminator_model.trainable_variables))

            # Record the lower and upper layer gradients + the MSE for the discriminator
            grad_discriminator_ll(tf.norm(grad_d[1]))
            grad_discriminator_ul(tf.norm(grad_d[9]))
            d_loss(D_loss)

        else:  # Just normal Jensen-Shannon divergence
            with tf.GradientTape() as tape:
                # Embeddings for real data and classifications from discriminator
                H = embedder_model(X_train)
                # Embeddings for fake data and classifications from discriminator
                E_hat = generator_model(Z)
                Y_real = discriminator_model(
                    H)  # From logits instead of probs for numerical stability
                Y_fake_e = discriminator_model(E_hat)
                D_loss_real = loss_object_adversarial(
                    tf.ones_like(Y_real) * smoothing_factor, Y_real)
                D_loss_fake_e = loss_object_adversarial(
                    tf.zeros_like(Y_fake_e), Y_fake_e)
                D_loss = D_loss_real + D_loss_fake_e

            # Compute the gradients with respect to the discriminator model
            grad_d = tape.gradient(D_loss,
                                   discriminator_model.trainable_variables)

            # Apply the gradient to the discriminator model
            optimizer.apply_gradients(
                zip(
                    grad_d,  # Minimize the Cross Entropy
                    discriminator_model.trainable_variables))

            # Record the lower and upper layer gradients + the MSE for the discriminator
            grad_discriminator_ll(tf.norm(grad_d[1]))
            grad_discriminator_ul(tf.norm(grad_d[9]))
            d_loss(D_loss)

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64),
        tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64),
        tf.TensorSpec(shape=(), dtype=tf.bool)
    ])
    def test_step_discriminator(X_test, Z, wasserstein=False):
        H = embedder_model(X_test)
        E_hat = generator_model(Z)
        if wasserstein:  # Use the Wasserstein Gradient penalty
            Y_real = discriminator_model.predict(H)
            Y_fake = discriminator_model.predict(E_hat)
            D_loss_test = tf.reduce_mean(Y_fake) - tf.reduce_mean(Y_real)
            D_loss_test += gamma * gradient_penalty(H, E_hat)
        else:
            Y_real = discriminator_model(
                H)  # From logits instead of probs for numerical stability
            Y_fake_e = discriminator_model(E_hat)
            D_loss_real = loss_object_adversarial(tf.ones_like(Y_real), Y_real)
            D_loss_fake_e = loss_object_adversarial(tf.zeros_like(Y_fake_e),
                                                    Y_fake_e)
            D_loss_test = D_loss_real + D_loss_fake_e

        d_loss_test(D_loss_test)

    def evaluate_accuracy(X_test, Z):
        Y_real_test = (discriminator_model.predict(
            embedder_model(X_test)).numpy() > 0.5) * 1
        Y_fake_test = (discriminator_model.predict(Z).numpy() > 0.5) * 1

        # Compute the loss
        D_accuracy_real = loss_object_accuracy(tf.ones_like(Y_real_test),
                                               Y_real_test).numpy()
        D_accuracy_fake = loss_object_accuracy(tf.zeros_like(Y_fake_test),
                                               Y_fake_test).numpy()

        return D_accuracy_real, D_accuracy_fake

    # Helper counter for the already performed epochs
    already_done_epochs = epoch

    # Define the algorithm for training jointly
    print('Start joint training')
    nr_mb_train = 0  # Iterator for generator training
    o = -1  # Iterator for discriminator training
    tf.summary.trace_on(graph=False, profiler=True)  # Initialize the profiler
    for epoch in range(load_epochs, iterations + load_epochs):
        g_loss_u_e.reset_states()  # Reset the loss at every epoch
        g_loss_s.reset_states()
        e_loss_T0.reset_states()
        g_loss_s_embedder.reset_states()
        d_loss.reset_states()

        d_loss_test.reset_states()

        # This for loop is GENERATOR TRAINING
        # Create 1 generator and embedding training iters.
        if epoch == 0 and o == -1:
            # Train the generator and embedder sequentially
            for x_train in X_train:
                Z_mb = tf.cast(RandomGenerator(batch_size, [20, hidden_dim]),
                               tf.float32)
                train_step_jointly_generator(
                    x_train,
                    Z_mb,
                    graphing=tf.constant(True, dtype=tf.bool),
                    wasserstein=tf.constant(True, dtype=tf.bool))
                train_step_jointly_embedder(x_train)

                with summary_writer_bottom.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - generator',
                        grad_generator_ll.result(),
                        step=nr_mb_train)
                with summary_writer_top.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - generator',
                        grad_generator_ul.result(),
                        step=nr_mb_train,
                        description=str(descr_joint_grad_generator()))
                nr_mb_train += 1
            for x_test in X_test:
                Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
                test_step_jointly_generator(Z_mb)
                test_step_jointly_embedder(x_test)

                with summary_writer_test.as_default(
                ):  # Get autoencoder loss for recovery and
                    # Log autoencoder + supervisor losses
                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Recovery loss',
                        e_loss_T0_test.result(),
                        step=nr_mb_train)

                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Supervised loss',
                        g_loss_s_embedder_test.result(),
                        step=nr_mb_train)

            o += 1
        else:
            # Train the generator and embedder sequentially
            for x_train in X_train:
                Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
                train_step_jointly_generator(
                    x_train,
                    Z_mb,
                    graphing=tf.constant(False, dtype=tf.bool),
                    wasserstein=tf.constant(True, dtype=tf.bool))
                train_step_jointly_embedder(
                    x_train)  # Possibility to double the embedder training

                with summary_writer_bottom.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - generator',
                        grad_generator_ll.result(),
                        step=nr_mb_train)
                with summary_writer_top.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - generator',
                        grad_generator_ul.result(),
                        step=nr_mb_train,
                        description=str(descr_joint_grad_generator()))
                with summary_writer_train.as_default(
                ):  # Get autoencoder loss for recovery and
                    # Log autoencoder + supervisor losses
                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Recovery loss',
                        e_loss_T0.result(),
                        step=nr_mb_train)

                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Supervised loss',
                        g_loss_s_embedder.result(),
                        step=nr_mb_train)

                nr_mb_train += 1

            for x_test in X_test:
                Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
                test_step_jointly_generator(Z_mb)
                test_step_jointly_embedder(x_test)

                with summary_writer_test.as_default(
                ):  # Get autoencoder loss for recovery and
                    # Log autoencoder + supervisor losses
                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Recovery loss',
                        e_loss_T0_test.result(),
                        step=nr_mb_train)

                    tf.summary.scalar(
                        '3. TimeGAN training - Autoencoder/1. Supervised loss',
                        g_loss_s_embedder_test.result(),
                        step=nr_mb_train)

            print('Generator update')

        # This for loop is DISCRIMINATOR TRAINING - Train discriminator if too bad or at initialization (0.0)
        i = 0
        while i < 5:  # Train d to optimum (Jensen-Shannon divergence)
            for x_train in X_train:  # Train discriminator max 5 iterations or stop if optimal discriminator
                Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
                train_step_discriminator(
                    x_train,
                    Z_mb,
                    smoothing_factor=tf.constant(1.0, dtype=tf.float16),
                    wasserstein=tf.constant(True, dtype=tf.bool))

                with summary_writer_top.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - discriminator',
                        grad_discriminator_ul.result(),
                        step=o,
                        description=str(descr_joint_grad_discriminator()))
                with summary_writer_bottom.as_default():
                    tf.summary.scalar(
                        '3. TimeGAN training - GAN/3. Gradient norm - discriminator',
                        grad_discriminator_ll.result(),
                        step=o)
                o += 1

            for x_test in X_test:
                Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
                test_step_discriminator(x_test,
                                        Z_mb,
                                        wasserstein=tf.constant(False,
                                                                dtype=tf.bool))
            print('Discriminator update')
            i += 1
            # Use when using Wasserstein loss
            #if tf.math.abs(d_loss.result()) > 5 or o > current_o + 5: # Standard to do 5 discriminator iterations
            #        break # Breaks the while loop

            #if d_loss.result() < 0.15 or o > current_o + 5: # Use when using sigmoid cross-entropy loss
            #        break

        # Compute the test accuracy
        acc_real_array = np.array([])
        acc_fake_array = np.array([])

        for x_test in X_test:
            Z_mb = RandomGenerator(batch_size, [20, hidden_dim])
            acc_real, acc_fake = evaluate_accuracy(x_test, Z_mb)

            acc_real_array = np.append(acc_real_array, acc_real)
            acc_fake_array = np.append(acc_fake_array, acc_fake)

        with summary_writer_train.as_default():
            # Log GAN + supervisor losses
            tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss',
                              d_loss.result(),
                              step=epoch,
                              description=str(descr_generator_loss_joint()))
            tf.summary.scalar('3. TimeGAN training - GAN/1. Supervised loss',
                              g_loss_s.result(),
                              step=epoch,
                              description=str(descr_supervisor_loss_joint()))

        #with summary_writer_lower_bound.as_default():
        #    tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss',
        #                      -2*np.log(2), step=epoch) # Only use when sigmoid cross-entropy is enabled

        with summary_writer_test.as_default():
            # Log GAN + supervisor losses
            tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss',
                              d_loss_test.result(),
                              step=epoch)

            tf.summary.scalar('3. TimeGAN training - GAN/1. Supervised loss',
                              g_loss_s_test.result(),
                              step=epoch)

        with summary_writer_real_data.as_default():
            tf.summary.scalar('3. TimeGAN training - GAN/2. Accuracy',
                              tf.reduce_mean(acc_real_array),
                              step=epoch)

        with summary_writer_fake_data.as_default():
            tf.summary.scalar('3. TimeGAN training - GAN/2. Accuracy',
                              tf.reduce_mean(acc_fake_array),
                              step=epoch,
                              description=str(descr_accuracy_joint()))

            # Only log the weights of the model per 10 epochs
            if epoch % 10 == 0:  # Add variables to histogram and distribution

                # Pre-trained models
                add_hist(embedder_model.trainable_variables,
                         epoch + already_done_epochs)
                add_hist(recovery_model.trainable_variables,
                         epoch + already_done_epochs)
                add_hist(supervisor_model.trainable_variables,
                         epoch + already_done_epochs)

                # Not pre-trained models
                add_hist(generator_model.trainable_variables, epoch)
                add_hist(discriminator_model.trainable_variables, epoch)

            if epoch % 50 == 0 and epoch != 0:  # It takes around an hour to do 10 epochs
                # Lastly save all models
                embedder_model.save_weights(
                    'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/embedder/epoch_'
                    + str(epoch))
                recovery_model.save_weights(
                    'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/recovery/epoch_'
                    + str(epoch))
                supervisor_model.save_weights(
                    'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/supervisor/epoch_'
                    + str(epoch))
                generator_model.save_weights(
                    'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/generator/epoch_'
                    + str(epoch))
                discriminator_model.save_weights(
                    'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/discriminator/epoch_'
                    + str(epoch))

                # Convert the model into interpretable simulations and Nearest-Neighbour comparisons
                figure = image_grid(1000, 20, 4, recovery_model,
                                    generator_model)

                figure.canvas.draw()
                w, h = figure.canvas.get_width_height()
                img = np.fromstring(figure.canvas.tostring_rgb(),
                                    dtype=np.uint8,
                                    sep='')
                img = img.reshape((1, h, w, 3))

                with summary_writer_train.as_default():
                    tensor = tf.constant(img)
                    tf.summary.image(
                        str("Simulations & nearest neighbour (green) after " +
                            str(epoch) + " training iterations"),
                        tensor,
                        step=epoch,
                        description=str(descr_images()))

            print('step: ' + str(epoch + 1) + ', g_loss_u_e: ' +
                  str(np.round(g_loss_u_e.result().numpy(), 8)) +
                  ', g_loss_s: ' +
                  str(np.round(g_loss_s.result().numpy(), 8)) +
                  ', g_loss_s_embedder: ' +
                  str(np.round(g_loss_s_embedder.result().numpy(), 8)) +
                  ', e_loss_t0: ' +
                  str(np.round(e_loss_T0.result().numpy(), 8)) + ', d_loss: ' +
                  str(np.round(d_loss.result().numpy(), 8)))
        tf.summary.trace_export(name="model_trace",
                                step=0,
                                profiler_outdir=log_dir)
    print('Finish joint training')
Beispiel #22
0
    def compile(self):

        if self.compiled:
            print('Model already compiled.')
            return
        self.compiled = True

        # Placeholders.
        self.X = tf.placeholder(tf.float32, shape=(None, 32, 32, 1), name='X')
        self.Y = tf.placeholder(tf.float32, shape=(None, 32, 32, 2), name='Y')
        self.labels = tf.placeholder(tf.float32, shape=(None, 10), name='labels')

        # Generator.
        generator = Generator(self.seed)

        # Discriminator.
        discriminator = Discriminator(self.seed)

        # Classifier.
        classifier = Classifier(self.seed)

        self.gen_out = generator.forward(self.X)
        disc_out_real = discriminator.forward(tf.concat([self.X, self.Y], 3))
        disc_out_fake = discriminator.forward(tf.concat([self.X, self.gen_out], 3), reuse_vars=True)

        # VAC-GAN classifier losses.
        classifier_real = classifier.forward(tf.concat([self.X, self.Y], 3))
        classfier_fake = classifier.forward(tf.concat([self.X, self.gen_out], 3), reuse_vars=True)
        classifier_l_real = tf.nn.softmax_cross_entropy_with_logits_v2(logits=classifier_real, labels=self.labels)
        classifier_l_fake = tf.nn.softmax_cross_entropy_with_logits_v2(logits=classfier_fake, labels=self.labels)
        self.classifier_loss_real = tf.reduce_mean(classifier_l_real)
        self.classifier_loss_fake = tf.reduce_mean(classifier_l_fake)
        self.classifier_loss = tf.reduce_mean(classifier_l_fake + classifier_l_real)

        # Generator loss.
        self.gen_loss_gan = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_fake, labels=tf.ones_like(disc_out_fake)))
        self.gen_loss_l1 = tf.reduce_mean(tf.abs(self.Y - self.gen_out)) * self.l1_weight
        self.gen_loss = self.gen_loss_gan + self.gen_loss_l1 + self.VAC_weight * self.classifier_loss

        # Discriminator losses.
        disc_l_fake = tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_fake, labels=tf.zeros_like(disc_out_fake))
        disc_l_real = tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_real, labels=tf.ones_like(disc_out_real)*self.label_smoothing)
        self.disc_loss_fake = tf.reduce_mean(disc_l_fake)
        self.disc_loss_real = tf.reduce_mean(disc_l_real)
        self.disc_loss = tf.reduce_mean(disc_l_fake + disc_l_real)

        # Global step.
        self.global_step = tf.Variable(0, name='global_step', trainable=False)

        # Learning rate.
        if self.learning_rate_decay:
            self.lr = tf.maximum(1e-6, tf.train.exponential_decay(
                learning_rate=self.learning_rate,
                global_step=self.global_step,
                decay_steps=self.learning_rate_decay_steps,
                decay_rate=self.learning_rate_decay_rate))
        else:
            self.lr = tf.constant(self.learning_rate)

        # Optimizers.
        self.gen_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.gen_loss, var_list=generator.variables)
        self.disc_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr/10).minimize(self.disc_loss, var_list=discriminator.variables)
        self.classifier_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr/10).minimize(self.classifier_loss, var_list=classifier.variables, global_step=self.global_step)

        # Sampler.
        gen_sample = Generator(self.seed, is_training=False)
        self.sampler = gen_sample.forward(self.X, reuse_vars=True)

        self.MAE = tf.reduce_mean(tf.abs(self.Y - self.sampler))

        self.saver = tf.train.Saver()
Beispiel #23
0
def train(FLAGS):
    # Define the hyperparameters
    p_every = FLAGS.p_every
    s_every = FLAGS.s_every
    epochs = FLAGS.epochs
    dlr = FLAGS.dlr
    glr = FLAGS.glr
    beta1 = FLAGS.beta1
    beta2 = FLAGS.beta2
    z_size = FLAGS.zsize
    batch_size = FLAGS.batch_size
    rh = FLAGS.resize_height
    rw = FLAGS.resize_width
    d_path = FLAGS.dataset_path
    d_type = FLAGS.dataset_type

    # Preprocessing Data
    transform = transforms.Compose([
        transforms.Resize((rh, rw)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    if FLAGS.dataset_path == None:
        if d_type == "cars":
            if not os.path.exists('./datasets/cars_train'):
                os.system('sh ./datasets/dload.sh cars')
            d_path = './datasets/cars_train/'

        elif d_type == "flowers":
            if not os.path.exists('./datasets/flowers/'):
                os.system('sh ./datasets/dload.sh flowers')
            d_path = './datasets/flowers/'

        elif d_type == "dogs":
            if not os.path.exists('./datasets/jpg'):
                os.system('sh ./datasets/dload.sh dogs')
            d_path = './datasets/jpg/'

    train_data = datasets.ImageFolder(d_path, transform=transform)
    trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

    # Define the D and G
    dis = Discriminator(64)
    gen = Generator()

    # Apply weight initialization
    dis.apply(init_weight)
    gen.apply(init_weight)

    # Define the loss function
    criterion = nn.BCELoss()

    # Optimizers
    d_opt = optim.Adam(dis.parameters(), lr=dlr, betas=(beta1, beta2))
    g_opt = optim.Adam(gen.parameters(), lr=glr, betas=(beta1, beta2))

    # Train loop
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    train_losses = []
    eval_losses = []

    dis.to(device)
    gen.to(device)

    real_label = 1
    fake_label = 0

    for e in range(epochs):

        td_loss = 0
        tg_loss = 0

        for batch_i, (real_images, _) in enumerate(trainloader):

            real_images = real_images.to(device)

            batch_size = real_images.size(0)

            #### Train the Discriminator ####

            d_opt.zero_grad()

            d_real = dis(real_images)

            label = torch.full((batch_size, ), real_label, device=device)
            r_loss = criterion(d_real, label)
            r_loss.backward()

            z = torch.randn(batch_size, z_size, 1, 1, device=device)

            fake_images = gen(z)

            label.fill_(fake_label)

            d_fake = dis(fake_images.detach())

            f_loss = criterion(d_fake, label)
            f_loss.backward()

            d_loss = r_loss + f_loss

            d_opt.step()

            #### Train the Generator ####
            g_opt.zero_grad()

            label.fill_(real_label)
            d_fake2 = dis(fake_images)

            g_loss = criterion(d_fake2, label)
            g_loss.backward()

            g_opt.step()

            if batch_i % p_every == 0:
                print ('Epoch [{:5d} / {:5d}] | d_loss: {:6.4f} | g_loss: {:6.4f}'. \
                        format(e+1, epochs, d_loss, g_loss))

        train_losses.append([td_loss, tg_loss])

        if e % s_every == 0:
            d_ckpt = {
                'model_state_dict': dis.state_dict(),
                'opt_state_dict': d_opt.state_dict()
            }

            g_ckpt = {
                'model_state_dict': gen.state_dict(),
                'opt_state_dict': g_opt.state_dict()
            }

            torch.save(d_ckpt, 'd-nm-{}.pth'.format(e))
            torch.save(g_ckpt, 'g-nm-{}.pth'.format(e))

        utils.save_image(fake_images.detach(),
                         'fake_{}.png'.format(e),
                         normalize=True)

    print('[INFO] Training Completed successfully!')
Beispiel #24
0
def fix_model_state_dict(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k
        if name.startswith('module.'):
            name = name[7:]  # remove 'module.' of dataparallel
        new_state_dict[name] = v
    return new_state_dict


#torch.manual_seed(44)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

device = "cuda" if torch.cuda.is_available() else "cpu"
G = Generator(z_dim=20)
D = Discriminator(z_dim=20)
'''-------load weights-------'''
G_load_weights = torch.load('./checkpoints/G_Efficient_GAN_1500.pth')
G.load_state_dict(fix_model_state_dict(G_load_weights))

D_load_weights = torch.load('./checkpoints/D_Efficient_GAN_1500.pth')
D.load_state_dict(fix_model_state_dict(D_load_weights))

G.to(device)
D.to(device)
"""use GPU in parallel"""
if device == 'cuda':
    G = torch.nn.DataParallel(G)
    D = torch.nn.DataParallel(D)
    print("parallel mode")
Beispiel #25
0
def main(args):
    # Step0 ====================================================================
    # Set GPU ids
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids

    # Set the file name format
    FILE_NAME_FORMAT = "{0}_{1}_{2}_{3:d}{4}".format(args.model, args.dataset,
                                                     args.loss, args.epochs,
                                                     args.flag)
    # Set the results file path
    RESULT_FILE_NAME = FILE_NAME_FORMAT + '_results.pkl'
    RESULT_FILE_PATH = os.path.join(RESULT_PATH, RESULT_FILE_NAME)
    # Set the checkpoint file path
    CHECKPOINT_FILE_NAME = FILE_NAME_FORMAT + '.ckpt'
    CHECKPOINT_FILE_PATH = os.path.join(CHECKPOINT_PATH, CHECKPOINT_FILE_NAME)
    BEST_CHECKPOINT_FILE_NAME = FILE_NAME_FORMAT + '_best.ckpt'
    BEST_CHECKPOINT_FILE_PATH = os.path.join(CHECKPOINT_PATH,
                                             BEST_CHECKPOINT_FILE_NAME)
    # Set the random seed same for reproducibility
    random.seed(190811)
    torch.manual_seed(190811)
    torch.cuda.manual_seed_all(190811)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Step1 ====================================================================
    # Load dataset
    train_dataloader = CycleGAN_Dataloader(name=args.dataset,
                                           num_workers=args.num_workers)
    test_dataloader = CycleGAN_Dataloader(name=args.dataset,
                                          train=False,
                                          num_workers=args.num_workers)
    print('==> DataLoader ready.')

    # Step2 ====================================================================
    # Make the model
    if args.dataset == 'cityscapes':
        A_generator = Generator(num_resblock=6)
        B_generator = Generator(num_resblock=6)
        A_discriminator = Discriminator()
        B_discriminator = Discriminator()
    else:
        A_generator = Generator(num_resblock=9)
        B_generator = Generator(num_resblock=9)
        A_discriminator = Discriminator()
        B_discriminator = Discriminator()

    # Check DataParallel available
    if torch.cuda.device_count() > 1:
        A_generator = nn.DataParallel(A_generator)
        B_generator = nn.DataParallel(B_generator)
        A_discriminator = nn.DataParallel(A_discriminator)
        B_discriminator = nn.DataParallel(B_discriminator)

    # Check CUDA available
    if torch.cuda.is_available():
        A_generator.cuda()
        B_generator.cuda()
        A_discriminator.cuda()
        B_discriminator.cuda()
    print('==> Model ready.')

    # Step3 ====================================================================
    # Set each loss function
    criterion_GAN = nn.MSELoss()
    criterion_cycle = nn.L1Loss()
    criterion_identity = nn.L1Loss()
    criterion_feature = nn.L1Loss()

    # Set each optimizer
    optimizer_G = optim.Adam(itertools.chain(A_generator.parameters(),
                                             B_generator.parameters()),
                             lr=args.lr,
                             betas=(0.5, 0.999))
    optimizer_D = optim.Adam(itertools.chain(A_discriminator.parameters(),
                                             B_discriminator.parameters()),
                             lr=args.lr,
                             betas=(0.5, 0.999))

    # Set learning rate scheduler
    def lambda_rule(epoch):
        epoch_decay = args.epochs / 2
        lr_linear_scale = 1.0 - max(0, epoch + 1 - epoch_decay) \
                                / float(epoch_decay+ 1)
        return lr_linear_scale

    scheduler_G = lr_scheduler.LambdaLR(optimizer_G, lr_lambda=lambda_rule)
    scheduler_D = lr_scheduler.LambdaLR(optimizer_D, lr_lambda=lambda_rule)
    print('==> Criterion and optimizer ready.')

    # Step4 ====================================================================
    # Train and validate the model
    start_epoch = 0
    best_metric = float("inf")

    # Initialize the result lists
    train_loss_G = []
    train_loss_D_A = []
    train_loss_D_B = []

    # Set image buffer
    A_buffer = ImageBuffer(args.buffer_size)
    B_buffer = ImageBuffer(args.buffer_size)

    if args.resume:
        assert os.path.exists(CHECKPOINT_FILE_PATH), 'No checkpoint file!'
        checkpoint = torch.load(CHECKPOINT_FILE_PATH)
        A_generator.load_state_dict(checkpoint['A_generator_state_dict'])
        B_generator.load_state_dict(checkpoint['B_generator_state_dict'])
        A_discriminator.load_state_dict(
            checkpoint['A_discriminator_state_dict'])
        B_discriminator.load_state_dict(
            checkpoint['B_discriminator_state_dict'])
        optimizer_G.load_state_dict(checkpoint['optimizer_G_state_dict'])
        optimizer_D.load_state_dict(checkpoint['optimizer_D_state_dict'])
        scheduler_G.load_state_dict(checkpoint['scheduler_G_state_dict'])
        scheduler_D.load_state_dict(checkpoint['scheduler_D_state_dict'])
        start_epoch = checkpoint['epoch']
        train_loss_G = checkpoint['train_loss_G']
        train_loss_D_A = checkpoint['train_loss_D_A']
        train_loss_D_B = checkpoint['train_loss_D_B']
        best_metric = checkpoint['best_metric']

    # Save the training information
    result_data = {}
    result_data['model'] = args.model
    result_data['dataset'] = args.dataset
    result_data['loss'] = args.loss
    result_data['target_epoch'] = args.epochs
    result_data['batch_size'] = args.batch_size

    # Check the directory of the file path
    if not os.path.exists(os.path.dirname(RESULT_FILE_PATH)):
        os.makedirs(os.path.dirname(RESULT_FILE_PATH))
    if not os.path.exists(os.path.dirname(CHECKPOINT_FILE_PATH)):
        os.makedirs(os.path.dirname(CHECKPOINT_FILE_PATH))
    print('==> Train ready.')

    for epoch in range(args.epochs):
        # strat after the checkpoint epoch
        if epoch < start_epoch:
            continue

        print("\n[Epoch: {:3d}/{:3d}]".format(epoch + 1, args.epochs))
        epoch_time = time.time()
        #=======================================================================
        # train and validate the model
        tloss_G, tloss_D = train(
            train_dataloader, A_generator, B_generator, A_discriminator,
            B_discriminator, criterion_GAN, criterion_cycle,
            criterion_identity, optimizer_G, optimizer_D, A_buffer, B_buffer,
            args.loss, args.lambda_cycle, args.lambda_identity,
            criterion_feature, args.lambda_feature, args.attention)
        train_loss_G.append(tloss_G)
        train_loss_D_A.append(tloss_D['A'])
        train_loss_D_B.append(tloss_D['B'])

        if (epoch + 1) % 10 == 0:
            val(test_dataloader, A_generator, B_generator, A_discriminator,
                B_discriminator, epoch + 1, FILE_NAME_FORMAT, args.attention)

        # Update the optimizer's learning rate
        current_lr = optimizer_G.param_groups[0]['lr']
        scheduler_G.step()
        scheduler_D.step()
        #=======================================================================
        current = time.time()

        # Save the current result
        result_data['current_epoch'] = epoch
        result_data['train_loss_G'] = train_loss_G
        result_data['train_loss_D_A'] = train_loss_D_A
        result_data['train_loss_D_B'] = train_loss_D_B

        # Save result_data as pkl file
        with open(RESULT_FILE_PATH, 'wb') as pkl_file:
            pickle.dump(result_data,
                        pkl_file,
                        protocol=pickle.HIGHEST_PROTOCOL)

        # Save the best checkpoint
        # if train_loss_G < best_metric:
        #     best_metric = train_loss_G
        #     torch.save({
        #         'epoch': epoch+1,
        #         'A_generator_state_dict': A_generator.state_dict(),
        #         'B_generator_state_dict': B_generator.state_dict(),
        #         'A_discriminator_state_dict': A_discriminator.state_dict(),
        #         'B_discriminator_state_dict': B_discriminator.state_dict(),
        #         'optimizer_G_state_dict': optimizer_G.state_dict(),
        #         'optimizer_D_state_dict': optimizer_D.state_dict(),
        #         'scheduler_G_state_dict': scheduler_G.state_dict(),
        #         'scheduler_D_state_dict': scheduler_D.state_dict(),
        #         'train_loss_G': train_loss_G,
        #         'train_loss_D_A': train_loss_D_A,
        #         'train_loss_D_B': train_loss_D_B,
        #         'best_metric': best_metric,
        #         }, BEST_CHECKPOINT_FILE_PATH)

        # Save the current checkpoint
        torch.save(
            {
                'epoch': epoch + 1,
                'A_generator_state_dict': A_generator.state_dict(),
                'B_generator_state_dict': B_generator.state_dict(),
                'A_discriminator_state_dict': A_discriminator.state_dict(),
                'B_discriminator_state_dict': B_discriminator.state_dict(),
                'optimizer_G_state_dict': optimizer_G.state_dict(),
                'optimizer_D_state_dict': optimizer_D.state_dict(),
                'scheduler_G_state_dict': scheduler_G.state_dict(),
                'scheduler_D_state_dict': scheduler_D.state_dict(),
                'train_loss_G': train_loss_G,
                'train_loss_D_A': train_loss_D_A,
                'train_loss_D_B': train_loss_D_B,
                'best_metric': best_metric,
            }, CHECKPOINT_FILE_PATH)

        if (epoch + 1) % 10 == 0:
            CHECKPOINT_FILE_NAME_epoch = FILE_NAME_FORMAT + '_{0}.ckpt'
            CHECKPOINT_FILE_PATH_epoch = os.path.join(
                CHECKPOINT_PATH, FILE_NAME_FORMAT, CHECKPOINT_FILE_NAME_epoch)
            if not os.path.exists(os.path.dirname(CHECKPOINT_FILE_PATH_epoch)):
                os.makedirs(os.path.dirname(CHECKPOINT_FILE_PATH_epoch))
            torch.save(
                {
                    'epoch': epoch + 1,
                    'A_generator_state_dict': A_generator.state_dict(),
                    'B_generator_state_dict': B_generator.state_dict(),
                    'A_discriminator_state_dict': A_discriminator.state_dict(),
                    'B_discriminator_state_dict': B_discriminator.state_dict(),
                    'optimizer_G_state_dict': optimizer_G.state_dict(),
                    'optimizer_D_state_dict': optimizer_D.state_dict(),
                    'scheduler_G_state_dict': scheduler_G.state_dict(),
                    'scheduler_D_state_dict': scheduler_D.state_dict(),
                    'train_loss_G': train_loss_G,
                    'train_loss_D_A': train_loss_D_A,
                    'train_loss_D_B': train_loss_D_B,
                    'best_metric': best_metric,
                }, CHECKPOINT_FILE_PATH_epoch)

        # Print the information on the console
        print("model                : {}".format(args.model))
        print("dataset              : {}".format(args.dataset))
        print("loss                 : {}".format(args.loss))
        print("batch_size           : {}".format(args.batch_size))
        print("current lrate        : {:f}".format(current_lr))
        print("G loss               : {:f}".format(tloss_G))
        print("D A/B loss           : {:f}/{:f}".format(
            tloss_D['A'], tloss_D['B']))
        print("epoch time           : {0:.3f} sec".format(current -
                                                          epoch_time))
        print("Current elapsed time : {0:.3f} sec".format(current - start))
    print('==> Train done.')

    print(' '.join(['Results have been saved at', RESULT_FILE_PATH]))
    print(' '.join(['Checkpoints have been saved at', CHECKPOINT_FILE_PATH]))
Beispiel #26
0
    def __init__(self,
                 input_data_path,
                 output_model_folder,
                 decode_mols_save_path='',
                 n_epochs=200,
                 starting_epoch=1,
                 batch_size=64,
                 lr=0.0002,
                 b1=0.5,
                 b2=0.999,
                 n_critic=5,
                 sample_interval=10,
                 save_interval=100,
                 sample_after_training=100,
                 message=""):
        self.message = message

        # init params
        self.input_data_path = input_data_path
        self.output_model_folder = output_model_folder
        self.n_epochs = n_epochs
        self.starting_epoch = starting_epoch
        self.batch_size = batch_size
        self.lr = lr
        self.b1 = b1
        self.b2 = b2
        self.n_critic = n_critic
        self.sample_interval = sample_interval
        self.save_interval = save_interval
        self.sample_after_training = sample_after_training
        self.decode_mols_save_path = decode_mols_save_path

        # initialize dataloader
        json_smiles = open(self.input_data_path, "r")
        latent_space_mols = np.array(json.load(json_smiles))
        latent_space_mols = latent_space_mols.reshape(
            latent_space_mols.shape[0], 512)

        self.dataloader = torch.utils.data.DataLoader(
            LatentMolsDataset(latent_space_mols),
            shuffle=True,
            batch_size=self.batch_size)

        # load discriminator
        discriminator_name = 'discriminator.txt' if self.starting_epoch == 1 else str(
            self.starting_epoch - 1) + '_discriminator.txt'
        discriminator_path = os.path.join(output_model_folder,
                                          discriminator_name)
        self.D = Discriminator.load(discriminator_path)

        # load generator
        generator_name = 'generator.txt' if self.starting_epoch == 1 else str(
            self.starting_epoch - 1) + '_generator.txt'
        generator_path = os.path.join(output_model_folder, generator_name)
        self.G = Generator.load(generator_path)

        # initialize sampler
        self.Sampler = Sampler(self.G)

        # initialize optimizer
        self.optimizer_G = torch.optim.Adam(self.G.parameters(),
                                            lr=self.lr,
                                            betas=(self.b1, self.b2))
        self.optimizer_D = torch.optim.Adam(self.D.parameters(),
                                            lr=self.lr,
                                            betas=(self.b1, self.b2))

        # Tensor
        cuda = True if torch.cuda.is_available() else False
        if cuda:
            self.G.cuda()
            self.D.cuda()
        self.Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
Beispiel #27
0
def train_SRGAN():
    global_step = tf.train.get_or_create_global_step()

    # read input batch
    with tf.device('/cpu:0'):
        imgs_LR, imgs_HR = inputs2(False, FLAGS.batch_size)

    ##################################################
    #          GENERATOR - SR IMAGE created          #
    ##################################################
    generator = Generator()
    imgs_SR = generator.fit(imgs_LR, train=True, reuse=False)
    # variables for generator (SRResNet)
    if FLAGS.load_gen and not FLAGS.load_disc:
        variables_to_restore_srgan = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='generator')
        srgan_saver = tf.train.Saver(variables_to_restore_srgan)

    # Display the images in the tensorboard.
    tf.summary.image(
        'images_LR',
        tf.image.resize_images(
            imgs_LR, [FLAGS.image_size * 4, FLAGS.image_size * 4]))  # Bilinear
    tf.summary.image('images_HR', imgs_HR)
    tf.summary.image('images_SR', imgs_SR)

    ###########################################
    #          DISCRIMINATOR - train          #
    ###########################################
    discriminator = Discriminator()
    with tf.name_scope('discriminator_HR'):
        logit_HR, probab_HR = discriminator.fit(imgs_HR,
                                                train=True,
                                                reuse=False)
    with tf.name_scope('discriminator_SR'):
        logit_SR, probab_SR = discriminator.fit(imgs_SR,
                                                train=True,
                                                reuse=True)
    if FLAGS.load_gen and FLAGS.load_disc:
        variables_to_restore_srgan = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') + \
                                     tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='dicriminator')
        srgan_saver = tf.train.Saver(variables_to_restore_srgan)
    disc_loss = discriminator.adversarial_loss(logit_HR=probab_HR,
                                               logit_SR=probab_SR)
    global_step, disc_train_op = discriminator.train2(disc_loss, global_step)

    ###########################################
    #            GENERATOR - train            #
    ###########################################
    with tf.control_dependencies([disc_train_op, disc_loss
                                  ]):  # ensure that disc has done one step
        adversarial_loss = generator.adversarial_loss(probab_SR)
        if FLAGS.load_vgg:
            content_loss = generator.vgg_loss(imgs_HR, imgs_SR)
            content_loss_type = 'vgg'
            gen_loss = FLAGS.vgg_loss_scale * content_loss + FLAGS.adversarial_loss_scale * adversarial_loss
        else:
            content_loss = generator.pixelwise_mse_loss(imgs_HR, imgs_SR)
            content_loss_type = 'mse'
            gen_loss = content_loss + FLAGS.adversarial_loss_scale * adversarial_loss
        _, gen_train_op = generator.train2(gen_loss,
                                           global_step,
                                           gs_update=False)  # No update to gs

    # variables for VGG19
    if FLAGS.load_vgg:
        variables_to_restore_vgg = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='vgg_19')
        vgg_saver = tf.train.Saver(variables_to_restore_vgg)
    saver = tf.train.Saver()

    ###########################################
    #               SUMMARIES                 #
    ###########################################
    # Moving average on loss
    exp_averager = tf.train.ExponentialMovingAverage(decay=0.99)
    losses_list = [disc_loss, content_loss, adversarial_loss, gen_loss]
    update_loss = exp_averager.apply(losses_list)
    disc_loss_avg, content_loss_avg, adversarial_loss_avg, gen_loss_avg = \
        [exp_averager.average(var) for var in losses_list]
    tf.summary.scalar('discriminator_loss', disc_loss_avg)
    tf.summary.scalar('gen_{0}_loss'.format(content_loss_type),
                      content_loss_avg)
    tf.summary.scalar('gen_adversarial_loss', adversarial_loss_avg)
    tf.summary.scalar('generator_loss', gen_loss_avg)

    # Merge all summary inforation.
    summary = tf.summary.merge_all()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # Load pretrained model
        if FLAGS.load_gen and FLAGS.load_disc:
            print('Loading weights for SRGAN generator and discriminator...')
            srgan_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'srgan')))
        elif FLAGS.load_gen:
            print('Loading weights for SRGAN generator...')
            srgan_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'srresnet')))
        if FLAGS.load_vgg:
            print('Loading weights for VGG19..')
            vgg_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'vgg19')))

        print('Starting training procedure...')
        start = time.time()
        for it in range(FLAGS.n_iter):
            gs, _, d_loss, _, g_loss, _, summ = sess.run([
                global_step, update_loss, disc_loss_avg, disc_train_op,
                gen_loss_avg, gen_train_op, summary
            ])
            if it % FLAGS.log_freq == 0 and it > 0:
                t = (time.time() - start)
                print('{0} iter, gen_loss: {1}, disc_loss: {2}, img/sec: {3}'.
                      format(gs, g_loss, d_loss,
                             FLAGS.log_freq * FLAGS.batch_size / t))
                summary_writer.add_summary(summ, gs)
                summary_writer.flush()
                start = time.time()
            if it % FLAGS.ckpt_freq == 0 and it > 0:
                saver.save(sess, FLAGS.checkpoint_dir, global_step=gs)
        coord.request_stop()
        coord.join(threads)
def fix_model_state_dict(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k
        if name.startswith('module.'):
            name = name[7:]  # remove 'module.' of dataparallel
        new_state_dict[name] = v
    return new_state_dict


#torch.manual_seed(44)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

device = "cuda" if torch.cuda.is_available() else "cpu"
G = Generator(z_dim=20, image_size=64)
D = Discriminator(z_dim=20, image_size=64)
'''-------load weights-------'''
G_load_weights = torch.load('./checkpoints/G_AnoGAN_300.pth')
G.load_state_dict(fix_model_state_dict(G_load_weights))

D_load_weights = torch.load('./checkpoints/D_AnoGAN_300.pth')
D.load_state_dict(fix_model_state_dict(D_load_weights))

G.to(device)
D.to(device)
"""use GPU in parallel"""
if device == 'cuda':
    G = torch.nn.DataParallel(G)
    D = torch.nn.DataParallel(D)
    print("parallel mode")
Beispiel #29
0
from models.Discriminator import Discriminator


def save_sounds(path, sounds, sampling_rate):
    now_time = time.time()
    for i, sound in enumerate(sounds):
        sound = sound.squeeze(0)
        sound = sound.to('cpu').detach().numpy().copy()
        hash_string = hashlib.md5(str(now_time).encode()).hexdigest()
        file_path = os.path.join(
            path, f"generated_sound_{i}_{hash_string}.wav")
        print(file_path)
        soundfile.write(file_path, sound, sampling_rate, format="WAV")


model = Generator()
latent_dim = 100
z = torch.rand(latent_dim, dtype=torch.float32)
output = model.forward(z)
output = output.squeeze(0).squeeze(0).detach().numpy().copy()
S = librosa.feature.inverse.mel_to_stft(output)
y = librosa.griffinlim(S)
print(y.shape)
soundfile.write('./output/result.wav', y, 22050, format="WAV")

# output_dir = "./output"
# if not os.path.exists(output_dir):
#     os.makedirs(output_dir)
# sampling_rate = 16000
# save_sounds("./output/", [output], sampling_rate)
Beispiel #30
0
def train_SRResNet():
    global_step = tf.train.get_or_create_global_step()

    with tf.device('/cpu:0'):
        imgs_LR, imgs_HR = inputs2(False, FLAGS.batch_size)

    # train generator (no adv. loss)
    generator = Generator()
    imgs_SR = generator.fit(imgs_LR, train=True, reuse=False)

    # Display the training images in the visualizer.
    tf.summary.image('images_LR', imgs_LR)
    tf.summary.image('images_HR', imgs_HR)
    tf.summary.image('images_SR', imgs_SR)

    # Restore
    if FLAGS.load_gen:
        variables_to_restore_srgan = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='generator')
        srgan_saver = tf.train.Saver(variables_to_restore_srgan)
    # Restore variables for VGG19 and SRResNet (pretrained model)
    if FLAGS.load_vgg:
        variables_to_restore_vgg = tf.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES, scope='vgg_19')
        vgg_saver = tf.train.Saver(variables_to_restore_vgg)

    mse_loss = generator.pixelwise_mse_loss(imgs_HR, imgs_SR)
    psnr = generator.psnr(imgs_HR, imgs_SR)
    # vgg_loss = generator.vgg_loss(imgs_HR, imgs_SR)
    global_step, gen_train_op = generator.train2(mse_loss, global_step)

    ###########################################
    #               SUMMARIES                 #
    ###########################################
    # Moving average on loss
    exp_averager = tf.train.ExponentialMovingAverage(decay=0.99)
    update_loss = exp_averager.apply([mse_loss])
    gen_loss = exp_averager.average(mse_loss)
    tf.summary.scalar('PSNR', psnr)
    tf.summary.scalar('MSE', mse_loss)
    tf.summary.scalar('generator loss', gen_loss)

    saver = tf.train.Saver()
    # Merge all summary inforation.
    summary = tf.summary.merge_all()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        if FLAGS.load_gen:
            print('Loading pre-trained SRResNet (i.e. Generator)...')
            srgan_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'srresnet')))
        if FLAGS.load_vgg:
            print('Loading pre-trained VGG 19...')
            vgg_saver.restore(
                sess,
                tf.train.latest_checkpoint(
                    os.path.join(FLAGS.pretrained_models, 'vgg19')))

        print('Starting training procedure...')
        start = time.time()
        for it in range(FLAGS.n_iter):
            gs, _, loss, _, summ = sess.run(
                [global_step, update_loss, gen_loss, gen_train_op, summary])
            if it % FLAGS.log_freq == 0 and it > 0:
                t = (time.time() - start)
                print('{0} iter, loss: {1}, img/sec: {2}'.format(
                    gs, loss, it * FLAGS.batch_size / t))
                print(t)
                summary_writer.add_summary(summ, gs)
                summary_writer.flush()
            if it % FLAGS.ckpt_freq == 0 and it > 0:
                saver.save(sess, FLAGS.checkpoint_dir, global_step=gs + 70000)
        coord.request_stop()
        coord.join(threads)