def run(self): # get data latent_vector_file = open(self.input_data_path, "r") latent_space_mols = np.array(json.load(latent_vector_file)) shape = latent_space_mols.shape # expecting tuple (set_size, dim_1, dim_2) data_shape = tuple([shape[1], shape[2]]) # create Discriminator D = Discriminator(data_shape) # save Discriminator if not os.path.exists(self.output_model_folder): os.makedirs(self.output_model_folder) discriminator_path = os.path.join(self.output_model_folder, 'discriminator.txt') D.save(discriminator_path) # create Generator G = Generator(data_shape, latent_dim=shape[2]) # save generator generator_path = os.path.join(self.output_model_folder, 'generator.txt') G.save(generator_path) return True
class DamageDetection: def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) np.random.seed(self.args.seed) print('{} detection...'.format(args.dataset)) white_noise = dp.DatasetReader(white_noise=self.args.dataset, data_path=data_path, len_seg=self.args.len_seg ) self.testset = torch.tensor(torch.from_numpy(white_noise.dataset_), dtype=torch.float32) self.spots = np.load('{}/spots.npy'.format(info_path)) self.Generator = Generator(args) # Generator self.Discriminator = Discriminator(args) # Discriminator def __call__(self, *args, **kwargs): self.test() def file_name(self): return '{}_{}_{}_{}_{}_{}'.format(self.args.model_name, self.args.net_name, self.args.len_seg, self.args.optimizer, self.args.learning_rate, self.args.num_epoch ) def test(self): path_gen = '{}/models/{}_Gen.model'.format(save_path, self.file_name()) path_dis = '{}/models/{}_Dis.model'.format(save_path, self.file_name()) self.Generator.load_state_dict(torch.load(path_gen)) # Load Generator self.Discriminator.load_state_dict(torch.load(path_dis)) # Load Discriminator self.Generator.eval() self.Discriminator.eval() damage_indices = {} beta = 0.5 with torch.no_grad(): for i, spot in enumerate(self.spots): damage_indices[spot] = {} z = torch.randn(self.testset.shape[1], 50) data_gen = self.Generator(z) data_real = self.testset[i] res = ((data_gen - data_real) ** 2).mean() dis = self.Discriminator(data_gen).mean() - 1 loss = beta * res.item() + (1 - beta) * np.abs(dis.item()) damage_indices[spot]['Generate residual'] = res.item() damage_indices[spot]['Discriminate loss'] = np.abs(dis.item()) damage_indices[spot]['Loss'] = loss print('[{}]\tGenerate residual: {:5f}\tDiscriminate loss: {:5f}\tLoss: {:5f}'. format(spot, res.item(), np.abs(dis.item()), loss) ) damage_indices = json.dumps(damage_indices, indent=2) with open('{}/damage index/{}_{}.json'.format(save_path, self.args.dataset, self.file_name() ), 'w') as f: f.write(damage_indices)
def CreateDiscriminator(self): # create Discriminator D = Discriminator(self.data_shape) # save Discriminator if not os.path.exists(self.output_model_folder): os.makedirs(self.output_model_folder) discriminator_path = os.path.join(self.output_model_folder, 'discriminator.txt') D.save(discriminator_path)
def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) np.random.seed(self.args.seed) print('{} detection...'.format(args.dataset)) white_noise = dp.DatasetReader(white_noise=self.args.dataset, data_path=data_path, len_seg=self.args.len_seg ) self.testset = torch.tensor(torch.from_numpy(white_noise.dataset_), dtype=torch.float32) self.spots = np.load('{}/spots.npy'.format(info_path)) self.Generator = Generator(args) # Generator self.Discriminator = Discriminator(args) # Discriminator
def dis_pretrain(): print("start pre-training discriminator...") conf = dis_config() train_data, test_data = get_pn_data('data/dis_data') train_loader = DataLoader(train_data, conf.batch_size, shuffle=True, num_workers=conf.num_workers, collate_fn=collate_fn) test_loader = DataLoader(train_data, conf.batch_size, shuffle=True, num_workers=conf.num_workers, collate_fn=collate_fn) Dis = Discriminator(conf) Dis.pretrain(train_loader, test_loader)
def gan_train(): print("start gan training...") conf = gan_config() gen = Generator(conf) dis = Discriminator(conf) gen.trainModel.load('generator.pkl') train_data = get_pos_data('data/gan_data') test_data = get_neg_data('data/dis_data') train_loader = DataLoader(train_data, conf.batch_size, shuffle=True, num_workers=conf.num_workers, collate_fn=collate_fn, drop_last=True) test_loader = DataLoader(test_data, conf.batch_size, shuffle=True, num_workers=conf.num_workers, collate_fn=collate_fn, drop_last=True) avg_reward = 0 for epoch in range(conf.n_epochs): dis.trainModel.load('discriminator.pkl') epoch_loss = 0 epoch_reward = 0 for i, batch_data in enumerate(train_loader): data, label = batch_data gen_step = gen.gen_step(data) high, low = next(gen_step) losses, reward = dis.dis_step(high, low) reward = reward - avg_reward epoch_loss += losses.data[0] epoch_reward += reward gen_step.send(reward) avg_reward = epoch_reward / conf.batch_size print('Epoch{}/{}, Train_Loss={:.3f}'.format( epoch + 1, conf.n_epochs, epoch_loss / conf.batch_size)) worst_acc = 1 if epoch % conf.epoch_per_test == 0: true_y, pred_y = predict(dis.trainModel, test_loader) eval_acc = acc_metric(true_y, pred_y) if worst_acc > eval_acc: worst_acc = eval_acc gen.trainModel.save(conf.model_name) print('gan_valid_acc is {:.3f}'.format(worst_acc))
def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) np.random.seed(self.args.seed) print('> Training arguments:') for arg in vars(args): print('>>> {}: {}'.format(arg, getattr(args, arg))) white_noise = dp.DatasetReader(white_noise=self.args.dataset, data_path=data_path, data_source=args.data, len_seg=self.args.len_seg) dataset, _ = white_noise(args.net_name) self.data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True) self.Generator = Generator(args) # Generator self.Discriminator = Discriminator(args) # Discriminator
def main(): G = Generator(z_dim=20) D = Discriminator(z_dim=20) E = Encoder(z_dim=20) G.apply(weights_init) D.apply(weights_init) E.apply(weights_init) train_img_list=make_datapath_list(num=200) mean = (0.5,) std = (0.5,) train_dataset = GAN_Img_Dataset(file_list=train_img_list, transform=ImageTransform(mean, std)) batch_size = 64 train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) num_epochs = 1500 G_update, D_update, E_update = train_model(G, D, E, dataloader=train_dataloader, num_epochs=num_epochs, save_model_name='Efficient_GAN')
def build_model(self): # code_dim=100, n_class=1000 self.Generator = Generator(chn=self.g_conv_dim, k_size= 3, res_num= self.res_num).to(self.device) self.Discriminator = Discriminator(chn=self.d_conv_dim, k_size= 3).to(self.device) self.Transform = Transform_block().to(self.device) if self.parallel: print('use parallel...') print('gpuids ', self.gpus) gpus = [int(i) for i in self.gpus.split(',')] self.Generator = nn.DataParallel(self.Generator, device_ids=gpus) self.Discriminator = nn.DataParallel(self.Discriminator, device_ids=gpus) self.Transform = nn.DataParallel(self.Transform, device_ids=gpus) # self.G.apply(weights_init) # self.D.apply(weights_init) # Loss and optimizer # self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2]) self.g_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.Generator.parameters()), self.g_lr, [self.beta1, self.beta2]) # self.decoder_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, # self.Decoder.parameters()), self.g_lr, [self.beta1, self.beta2]) self.d_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.Discriminator.parameters()), self.d_lr, [self.beta1, self.beta2]) # self.L1_loss = torch.nn.L1Loss() self.MSE_loss = torch.nn.MSELoss() self.L1_loss = torch.nn.SmoothL1Loss() self.C_loss = torch.nn.BCEWithLogitsLoss() # self.TV_loss = TVLoss(self.TVLossWeight,self.imsize,self.batch_size) # print networks logging.info("Generator structure:") logging.info(self.Generator) # print(self.Decoder) logging.info("Discriminator structure:") logging.info(self.Discriminator)
def initialize_model(self, lr_schedular_options, model_type='unet', residual_blocks=9, layer_size=64): all_models = ['unet', 'resnet', 'inception', 'unet2', 'unet_large', 'unet_fusion'] if model_type not in all_models: raise Exception('This model type is not available!'); self.dis = Discriminator(image_size=self.image_size, leaky_relu=self.leaky_relu_threshold) if model_type == 'unet': self.gen = Generator_Unet(image_size=self.image_size, ngf=layer_size) elif model_type == 'resnet': self.gen = Generator_RESNET(residual_blocks=residual_blocks, ngf=layer_size) elif model_type == 'inception': self.gen = Generator_InceptionNet(ngf=layer_size) elif model_type == 'unet2': self.gen = Generator_Unet_2(image_size=self.image_size, ngf=layer_size) elif model_type == 'unet_large': self.gen = Generator_Unet_Large(image_size=self.image_size, ngf=layer_size) elif model_type == 'unet_fusion': self.gen = Generator_Unet_Fusion(image_size=self.image_size, ngf=layer_size) if self.device is not None: self.gen.cuda() self.dis.cuda() self.gen_optim = optim.Adam(self.gen.parameters(), lr=self.lr, betas=self.betas) self.dis_optim = optim.Adam(self.dis.parameters(), lr=self.lr, betas=self.betas) self.lr_schedule_dis = self.get_learning_schedule(self.gen_optim, lr_schedular_options) self.lr_schedule_gen = self.get_learning_schedule(self.dis_optim, lr_schedular_options) self.model_type = model_type self.layer_size = layer_size self.residual_blocks = residual_blocks self.lr_policy = lr_schedular_options print('Model Initialized !\nGenerator Model Type : {} and Layer Size : {}'.format(model_type, layer_size)) print('Model Parameters are:\nEpochs : {}\nLearning rate : {}\nLeaky Relu Threshold : {}\nLamda : {}\nBeta : {}' .format(self.epochs, self.lr, self.leaky_relu_threshold, self.lamda, self.betas))
def test_gradient_penalty_non_zero(self): # Test to verify that a non-zero gradient penalty is computed on the from the first training step with TemporaryDirectory() as tmpdirname: latent = np.random.rand(64, 1, 512) os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'), exist_ok=True) with open(tmpdirname + '/encoded_smiles.latent', 'w') as f: json.dump(latent.tolist(), f) C = CreateModelRunner(input_data_path=tmpdirname + '/encoded_smiles.latent', output_model_folder=tmpdirname) C.run() D = Discriminator.load(tmpdirname + '/discriminator.txt') G = Generator.load(tmpdirname + '/generator.txt') json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r") latent_space_mols = np.array(json.load(json_smiles)) testSampler = Sampler(G) latent_space_mols = latent_space_mols.reshape( latent_space_mols.shape[0], 512) T = torch.cuda.FloatTensor G.cuda() D.cuda() dataloader = torch.utils.data.DataLoader( LatentMolsDataset(latent_space_mols), shuffle=True, batch_size=64, drop_last=True) for _, real_mols in enumerate(dataloader): real_mols = real_mols.type(T) fake_mols = testSampler.sample(real_mols.shape[0]) alpha = T(np.random.random((real_mols.size(0), 1))) interpolates = (alpha * real_mols + ((1 - alpha) * fake_mols)).requires_grad_(True) d_interpolates = D(interpolates) fake = T(real_mols.shape[0], 1).fill_(1.0) gradients = autograd.grad( outputs=d_interpolates, inputs=interpolates, grad_outputs=fake, create_graph=True, retain_graph=True, only_inputs=True, )[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean() self.assertTrue(gradient_penalty.data != 0) break
def load_models(epoch, hparams, hidden_dim): from models.Discriminator import Discriminator from models.Recovery import Recovery from models.Generator import Generator from models.Embedder import Embedder from models.Supervisor import Supervisor if epoch % 50 != 0: return 'Only insert epochs that are divisible by 50.' else: # Only use when you want to load the models e_model_pre_trained = Embedder('logs/e_model_pre_train', hparams, hidden_dim, dimensionality=11) e_model_pre_trained.load_weights( 'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/embedder/epoch_' + str(epoch)).expect_partial() e_model_pre_trained.build([]) r_model_pre_trained = Recovery('logs/r_model_pre_train', hparams, hidden_dim, dimensionality=11) r_model_pre_trained.load_weights( 'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/recovery/epoch_' + str(epoch)).expect_partial() r_model_pre_trained.build([]) s_model_pre_trained = Supervisor('logs/s_model_pre_train', hparams, hidden_dim) s_model_pre_trained.load_weights( 'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/supervisor/epoch_' + str(epoch)).expect_partial() s_model_pre_trained.build([]) g_model_pre_trained = Generator('logs/g_model_pre_train', hparams, hidden_dim) g_model_pre_trained.load_weights( 'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/generator/epoch_' + str(epoch)).expect_partial() g_model_pre_trained.build([]) d_model_pre_trained = Discriminator('logs/d_model_pre_train', hparams, hidden_dim) d_model_pre_trained.load_weights( 'C://Users/s157148/Documents/Github/TimeGAN/weights/ALL/discriminator/epoch_' + str(epoch)).expect_partial() d_model_pre_trained.build([]) return e_model_pre_trained, r_model_pre_trained, s_model_pre_trained, g_model_pre_trained, d_model_pre_trained
def test_discriminator_shape(self): # Test to verify that the same dimension network is created invariant of smiles input file size with TemporaryDirectory() as tmpdirname: for j in [1, 64, 256, 1024]: latent = np.random.rand(j, 1, 512) os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'), exist_ok=True) with open(tmpdirname + '/encoded_smiles.latent', 'w') as f: json.dump(latent.tolist(), f) C = CreateModelRunner(input_data_path=tmpdirname + '/encoded_smiles.latent', output_model_folder=tmpdirname) C.run() D = Discriminator.load(tmpdirname + '/discriminator.txt') D_params = [] for param in D.parameters(): D_params.append(param.view(-1)) D_params = torch.cat(D_params) reference = 394241 self.assertEqual(D_params.shape[0], reference, "Network does not match expected size")
def test_separate_optimizers(self): # Verify that two different instances of the optimizer is created using the TrainModelRunner.py initialization # This ensures the two components train separately with TemporaryDirectory() as tmpdirname: latent = np.random.rand(64, 1, 512) os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'), exist_ok=True) with open(tmpdirname + '/encoded_smiles.latent', 'w') as f: json.dump(latent.tolist(), f) C = CreateModelRunner(input_data_path=tmpdirname + '/encoded_smiles.latent', output_model_folder=tmpdirname) C.run() D = Discriminator.load(tmpdirname + '/discriminator.txt') G = Generator.load(tmpdirname + '/generator.txt') optimizer_G = torch.optim.Adam(G.parameters()) optimizer_D = torch.optim.Adam(D.parameters()) self.assertTrue(type(optimizer_G) == type( optimizer_D)) # must return the same type of object self.assertTrue( optimizer_G is not optimizer_D) # object identity MUST be different
class BaseExperiment: def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) np.random.seed(self.args.seed) print('> Training arguments:') for arg in vars(args): print('>>> {}: {}'.format(arg, getattr(args, arg))) white_noise = dp.DatasetReader(white_noise=self.args.dataset, data_path=data_path, data_source=args.data, len_seg=self.args.len_seg) dataset, _ = white_noise(args.net_name) self.data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=True) self.Generator = Generator(args) # Generator self.Discriminator = Discriminator(args) # Discriminator def select_optimizer(self, model): if self.args.optimizer == 'Adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate, betas=(0.5, 0.9)) elif self.args.optimizer == 'RMS': optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate) elif self.args.optimizer == 'SGD': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate, momentum=0.9) elif self.args.optimizer == 'Adagrad': optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate) elif self.args.optimizer == 'Adadelta': optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate) return optimizer def weights_init(self, model): initializers = { 'xavier_uniform_': nn.init.xavier_uniform_, 'xavier_normal_': nn.init.xavier_normal, 'orthogonal_': nn.init.orthogonal_, 'kaiming_normal_': nn.init.kaiming_normal_ } if isinstance(model, nn.Linear): initializer = initializers[self.args.initializer] initializer(model.weight) model.bias.data.fill_(0) def file_name(self): return '{}_{}_{}_{}_{}_{}'.format( self.args.model_name, self.args.net_name, self.args.len_seg, self.args.optimizer, self.args.learning_rate, self.args.num_epoch) def gradient_penalty(self, x_real, x_fake, batch_size, beta=0.3): x_real = x_real.detach() x_fake = x_fake.detach() alpha = torch.rand(batch_size, 1) alpha = alpha.expand_as(x_real) interpolates = alpha * x_real + ((1 - alpha) * x_fake) interpolates.requires_grad_() dis_interpolates = self.Discriminator(interpolates) gradients = autograd.grad( outputs=dis_interpolates, inputs=interpolates, grad_outputs=torch.ones_like(dis_interpolates), create_graph=True, retain_graph=True, only_inputs=True)[0] grad_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean() * beta return grad_penalty def train(self): self.Generator.apply(self.weights_init) self.Discriminator.apply(self.weights_init) gen_optimizer = self.select_optimizer(self.Generator) dis_optimizer = self.select_optimizer(self.Generator) losses = {} criterion = nn.MSELoss() dis_losses, gen_losses = [0], [0] for epoch in range(self.args.num_epoch): t0 = time.time() for _, sample_batched in enumerate(self.data_loader): data_real = torch.tensor(sample_batched, dtype=torch.float32) batch_size = sample_batched.size(0) # 1. Train Discriminator: maximize log(D(x)) + log(1 - D(G(z))) for _ in range(5): pred_real = self.Discriminator(data_real) loss_real = -pred_real.mean() # Generate data z = torch.randn(batch_size, self.args.dim_noise) data_fake = self.Generator(z).detach() pred_fake = self.Discriminator(data_fake) loss_fake = pred_fake.mean() # Discriminator loss if self.args.model_name == 'WGAN': grad_penalty = self.gradient_penalty( data_real, data_fake, batch_size) else: grad_penalty = 0 dis_loss = loss_real + loss_fake + grad_penalty dis_optimizer.zero_grad() dis_loss.backward() dis_optimizer.step() # Train Generator: maximize log(D(G(z))) pred_fake = self.Discriminator(data_fake) gen_loss = -pred_fake.mean() gen_optimizer.zero_grad() gen_loss.backward() gen_optimizer.step() mse = criterion(data_fake, data_real) t1 = time.time() print('\033[1;31m[Epoch {:>4}]\033[0m ' '\033[1;31mD(x) = {:.5f}\033[0m ' '\033[1;32mD(G(z)) = {:.5f}\033[0m ' '\033[1;32mMSE = {:.5f}\033[0m ' 'Time cost={:.2f}s'.format(epoch + 1, -loss_real, -gen_loss, mse, t1 - t0)) dis_losses.append(dis_loss.item()) gen_losses.append(-gen_loss.item()) fig, ax = plt.subplots() ax.plot(data_real[0], label='real') ax.plot(data_fake[0], ls='--', lw=0.5, label='fake') ax.legend() plt.show()
def main(): ############### define global parameters ############### global opt, optimizerH, optimizerR, optimizerD, writer, logPath, schedulerH, schedulerR global val_loader, smallestLoss, mse_loss, gan_loss, pixel_loss, patch, criterion_GAN, criterion_pixelwise ################# 输出配置参数 ############### opt = parser.parse_args() if torch.cuda.is_available() and not opt.cuda: print("WARNING: You have a CUDA device, " "so you should probably run with --cuda") cudnn.benchmark = True ############ create the dirs to save the result ############# cur_time = time.strftime('%Y-%m-%d-%H_%M_%S', time.localtime()) experiment_dir = opt.hostname + "_" + cur_time + opt.remark opt.outckpts += experiment_dir + "/checkPoints" opt.trainpics += experiment_dir + "/trainPics" opt.validationpics += experiment_dir + "/validationPics" opt.outlogs += experiment_dir + "/trainingLogs" opt.outcodes += experiment_dir + "/codes" opt.testPics += experiment_dir + "/testPics" if not os.path.exists(opt.outckpts): os.makedirs(opt.outckpts) if not os.path.exists(opt.trainpics): os.makedirs(opt.trainpics) if not os.path.exists(opt.validationpics): os.makedirs(opt.validationpics) if not os.path.exists(opt.outlogs): os.makedirs(opt.outlogs) if not os.path.exists(opt.outcodes): os.makedirs(opt.outcodes) if (not os.path.exists(opt.testPics)) and opt.test != '': os.makedirs(opt.testPics) logPath = opt.outlogs + '/%s_%d_log.txt' % (opt.dataset, opt.batchSize) # 保存模型的参数 print_log(str(opt), logPath) # 保存本次实验的代码 save_current_codes(opt.outcodes) # tensorboardX writer writer = SummaryWriter(comment='**' + opt.hostname + "_" + opt.remark) ############## 获取数据集 ############################ DATA_DIR_root = './datasets/' DATA_DIR = os.path.join(DATA_DIR_root, opt.datasets) traindir = os.path.join(DATA_DIR, 'train') valdir = os.path.join(DATA_DIR, 'val') secretdir = os.path.join(DATA_DIR_root, opt.secret) train_dataset = MyImageFolder( traindir, transforms.Compose([ transforms.Resize([opt.imageSize, 512]), transforms.ToTensor(), ])) val_dataset = MyImageFolder( valdir, transforms.Compose([ transforms.Resize([opt.imageSize, 512]), transforms.ToTensor(), ])) secret_dataset = MyImageFolder( secretdir, transforms.Compose([ transforms.Resize([opt.imageSize, opt.imageSize]), transforms.ToTensor(), ])) assert train_dataset assert val_dataset assert secret_dataset train_loader = DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) secret_loader = DataLoader(secret_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=int(opt.workers)) val_loader = DataLoader(val_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) ############## 所使用网络结构 ############################ Hnet = UnetGenerator(input_nc=6, output_nc=3, num_downs= opt.num_downs, output_function=nn.Sigmoid) Hnet.cuda() Hnet.apply(weights_init) Rnet = RevealNet(output_function=nn.Sigmoid) Rnet.cuda() Rnet.apply(weights_init) if opt.Dnorm == "spectral" : Dnet = Discriminator_SN(in_channels=3) Dnet.cuda() elif opt.Dnorm == "switch" : Dnet = Discriminator_Switch(in_channels=3) Dnet.cuda() else: Dnet = Discriminator(in_channels=3) Dnet.cuda() # Dnet.apply(weights_init) # Calculate output of image discriminator (PatchGAN) patch = (1, opt.imageSize // 2 ** 4, opt.imageSize // 2 ** 4) # setup optimizer optimizerH = optim.Adam(Hnet.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) schedulerH = ReduceLROnPlateau(optimizerH, mode='min', factor=0.2, patience=5, verbose=True) optimizerR = optim.Adam(Rnet.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) schedulerR = ReduceLROnPlateau(optimizerR, mode='min', factor=0.2, patience=8, verbose=True) optimizerD = optim.Adam(Dnet.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) schedulerD = ReduceLROnPlateau(optimizerD, mode='min', factor=0.2, patience=5, verbose=True) # 判断是否接着之前的训练 if opt.Hnet != "": Hnet.load_state_dict(torch.load(opt.Hnet)) # 两块卡加这行 if opt.ngpu > 1: Hnet = torch.nn.DataParallel(Hnet).cuda() print_network(Hnet) if opt.Rnet != '': Rnet.load_state_dict(torch.load(opt.Rnet)) if opt.ngpu > 1: Rnet = torch.nn.DataParallel(Rnet).cuda() print_network(Rnet) if opt.Dnet != '': Dnet.load_state_dict(torch.load(opt.Dnet)) if opt.ngpu > 1: Dnet = torch.nn.DataParallel(Dnet).cuda() print_network(Dnet) # define loss mse_loss = nn.MSELoss().cuda() criterion_GAN = nn.MSELoss().cuda() criterion_pixelwise = nn.L1Loss().cuda() smallestLoss = 10000 print_log("training is beginning .......................................................", logPath) for epoch in range(opt.niter): ######################## train ########################################## train(train_loader, secret_loader, epoch, Hnet=Hnet, Rnet=Rnet, Dnet=Dnet) ####################### validation ##################################### val_hloss, val_rloss, val_r_mseloss, val_r_consistloss, val_dloss, val_fakedloss, val_realdloss, val_Ganlosses, val_Pixellosses, val_sumloss = validation(val_loader, secret_loader, epoch, Hnet=Hnet, Rnet=Rnet, Dnet=Dnet) ####################### adjust learning rate ############################ schedulerH.step(val_sumloss) schedulerR.step(val_rloss) schedulerD.step(val_dloss) # # save the best model parameters # if val_sumloss < globals()["smallestLoss"]: # globals()["smallestLoss"] = val_sumloss # # do checkPointing # torch.save(Hnet.state_dict(), # '%s/netH_epoch_%d,sumloss=%.6f,Hloss=%.6f.pth' % ( # opt.outckpts, epoch, val_sumloss, val_hloss)) # torch.save(Rnet.state_dict(), # '%s/netR_epoch_%d,sumloss=%.6f,Rloss=%.6f.pth' % ( # opt.outckpts, epoch, val_sumloss, val_rloss)) # torch.save(Dnet.state_dict(), # '%s/netD_epoch_%d,sumloss=%.6f,Dloss=%.6f.pth' % ( # opt.outckpts, epoch, val_sumloss, val_dloss)) # save the epoch model parameters torch.save(Hnet.state_dict(), '%s/netH_epoch_%d,sumloss=%.6f,Hloss=%.6f.pth' % ( opt.outckpts, epoch, val_sumloss, val_hloss)) torch.save(Rnet.state_dict(), '%s/netR_epoch_%d,sumloss=%.6f,Rloss=%.6f.pth' % ( opt.outckpts, epoch, val_sumloss, val_rloss)) torch.save(Dnet.state_dict(), '%s/netD_epoch_%d,sumloss=%.6f,Dloss=%.6f.pth' % ( opt.outckpts, epoch, val_sumloss, val_dloss)) writer.close()
class BaseExperiment: def __init__(self, args): self.args = args torch.manual_seed(self.args.seed) np.random.seed(self.args.seed) print('> Training arguments:') for arg in vars(args): print('>>> {}: {}'.format(arg, getattr(args, arg))) white_noise = dp.DatasetReader(white_noise=self.args.dataset, data_path=data_path, data_source=args.data, len_seg=self.args.len_seg ) dataset, _ = white_noise(args.net_name) self.data_loader = DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, ) self.Generator = Generator(args) # Generator self.Discriminator = Discriminator(args) # Discriminator def select_optimizer(self, model): if self.args.optimizer == 'Adam': optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate, betas=(0.5, 0.9) ) elif self.args.optimizer == 'RMS': optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate ) elif self.args.optimizer == 'SGD': optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate, momentum=0.9 ) elif self.args.optimizer == 'Adagrad': optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate ) elif self.args.optimizer == 'Adadelta': optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=self.args.learning_rate ) return optimizer @staticmethod def weights_init(m): """ Custom weights initialization called on netG and netD :param m: """ classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0) def file_name(self): return '{}_{}_{}_{}_{}_{}'.format(self.args.model_name, self.args.net_name, self.args.len_seg, self.args.optimizer, self.args.learning_rate, self.args.num_epoch ) def gradient_penalty(self, x_real, x_fake, batch_size, beta=0.3): x_real = x_real.detach() x_fake = x_fake.detach() alpha = torch.rand(batch_size, 1) alpha = alpha.expand_as(x_real) interpolates = alpha * x_real + ((1 - alpha) * x_fake) interpolates.requires_grad_() dis_interpolates = self.Discriminator(interpolates) gradients = autograd.grad(outputs=dis_interpolates, inputs=interpolates, grad_outputs=torch.ones_like(dis_interpolates), create_graph=True, retain_graph=True, only_inputs=True)[0] grad_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * beta return grad_penalty def train(self): # self.Generator.apply(self.weights_init) self.weights_init(self.Generator) self.weights_init(self.Discriminator) optimizerD = self.select_optimizer(self.Generator) optimizerG = self.select_optimizer(self.Generator) criterion = nn.BCELoss() fixed_noise = torch.randn(64, 100, 1, 1) c = nn.MSELoss() real_label = 1. fake_label = 0. G_losses = [] D_losses = [] for epoch in range(self.args.num_epoch): t0 = time.time() for i, sample_batched in enumerate(self.data_loader): # 1. Train Discriminator: maximize log(D(x)) + log(1 - D(G(z))) self.Discriminator.zero_grad() data_real = torch.tensor(sample_batched, dtype=torch.float32) data_real = data_real.unsqueeze(2) batch_size = sample_batched.size(0) label = torch.full((batch_size, ), 1, dtype=torch.float32) output = self.Discriminator(data_real) errD_real = criterion(output, label) errD_real.backward() D_x = output.mean().item() # Generate data noise = torch.rand(batch_size, 100, 1, 1) fake = self.Generator(noise).detach() label.fill_(fake_label) output = self.Discriminator(fake) errD_fake = criterion(output, label) errD_fake.backward() D_G_z1 = output.mean().item() optimizerD.zero_grad() errD = errD_real + errD_fake # Update D optimizerD.step() # Train Generator: maximize log(D(G(z))) self.Generator.zero_grad() label.fill_(real_label) output = self.Discriminator(fake) errG = criterion(output, label) optimizerG.zero_grad() errG.backward() D_G_z2 = output.mean().item() # Update G optimizerG.step() # mse = criterion(fake, data_real) f = fake.squeeze(2) r = data_real.squeeze(2) mse = c(f, r) # t1 = time.time() if i % 50 == 0: print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' % (epoch, self.args.num_epoch, i, len(self.data_loader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) print(mse) # Save Losses for plotting later G_losses.append(errG.item()) D_losses.append(errD.item()) data_fake = f.numpy() data_real = r.numpy() fig, ax = plt.subplots() ax.plot(data_fake[0][1], label='fake') ax.plot(data_real[0][1], ls='--', lw=0.5, label='real') ax.legend() plt.show()
class Trainer(object): def __init__(self, style_data_loader, content_data_loader, config): self.log_file = os.path.join(config.log_path, config.version,config.version+"_log.log") self.report_file = os.path.join(config.log_path, config.version,config.version+"_report.log") logging.basicConfig(filename=self.report_file, format='[%(asctime)s-%(levelname)s:%(message)s]', level = logging.DEBUG,filemode='w', datefmt='%Y-%m-%d%I:%M:%S %p') self.Experiment_description = config.experiment_description logging.info("Experiment description: \n%s"%self.Experiment_description) # Data loader self.style_data_loader = style_data_loader self.content_data_loader = content_data_loader # exact loss self.adv_loss = config.adv_loss logging.info("loss: %s"%self.adv_loss) # Model hyper-parameters self.imsize = config.imsize logging.info("image size: %d"%self.imsize) self.batch_size = config.batch_size logging.info("Batch size: %d"%self.batch_size) logging.info("Is shuffle: {}".format(config.is_shuffle)) logging.info("Image center crop size: {}".format(config.center_crop)) self.res_num = config.res_num logging.info("resblock number: %d"%self.res_num) self.g_conv_dim = config.g_conv_dim logging.info("generator convolution initial channel: %d"%self.g_conv_dim) self.d_conv_dim = config.d_conv_dim logging.info("discriminator convolution initial channel: %d"%self.d_conv_dim) self.parallel = config.parallel logging.info("Is multi-GPU parallel: %s"%str(self.parallel)) self.gpus = config.gpus logging.info("GPU number: %s"%self.gpus) self.total_step = config.total_step logging.info("Total step: %d"%self.total_step) self.d_iters = config.d_iters self.g_iters = config.g_iters self.total_iters_ratio=config.total_iters_ratio self.num_workers = config.num_workers self.g_lr = config.g_lr logging.info("Generator learning rate: %f"%self.g_lr) self.d_lr = config.d_lr logging.info("Discriminator learning rate: %f"%self.d_lr) self.lr_decay = config.lr_decay logging.info("Learning rate decay: %f"%self.lr_decay) self.beta1 = config.beta1 logging.info("Adam opitimizer beta1: %f"%self.beta1) self.beta2 = config.beta2 logging.info("Adam opitimizer beta2: %f"%self.beta2) self.pretrained_model = config.pretrained_model self.use_pretrained_model = config.use_pretrained_model logging.info("Use pretrained model: %s"%str(self.pretrained_model)) self.use_tensorboard = config.use_tensorboard logging.info("Use tensorboard: %s"%str(self.use_tensorboard)) self.check_point_path = config.check_point_path self.sample_path = config.sample_path self.summary_path = config.summary_path self.validation_path = config.validation # val_dataloader = Validation_Data_Loader(self.validation_path,self.imsize) # self.validation_data = val_dataloader.load_validation_images() # valres_path = os.path.join(config.log_path, config.version, "valres") # if not os.path.exists(valres_path): # os.makedirs(valres_path) # self.valres_path = valres_path self.log_step = config.log_step self.sample_step = config.sample_step self.model_save_step = config.model_save_step self.prep_weights = [1.0, 1.0, 1.0, 1.0, 1.0] self.transform_loss_w = config.transform_loss_w logging.info("transform loss weight: %f"%self.transform_loss_w) self.feature_loss_w = config.feature_loss_w logging.info("feature loss weight: %f"%self.feature_loss_w) self.style_class = config.style_class self.real_prep_threshold= config.real_prep_threshold logging.info("real label threshold: %f"%self.real_prep_threshold) # self.TVLossWeight = config.TV_loss_weight # logging.info("TV loss weight: %f"%self.TVLossWeight) self.discr_success_rate = config.discr_success_rate logging.info("discriminator success rate: %f"%self.discr_success_rate) logging.info("Is conditional generating: %s"%str(config.condition_model)) self.device = torch.device('cuda:%s'%config.default_GPU if torch.cuda.is_available() else 'cpu') print('build_model...') self.build_model() if self.use_tensorboard: self.build_tensorboard() # Start with trained model if self.use_pretrained_model: print('load_pretrained_model...') def train(self): # Data iterator style_iter = iter(self.style_data_loader) content_iter = iter(self.content_data_loader) step_per_epoch = len(self.style_data_loader) model_save_step = int(self.model_save_step) # Fixed input for debugging # Start with trained model if self.use_pretrained_model: start = self.pretrained_model + 1 else: start = 0 alternately_iter = 0 self.d_iters = self.d_iters * self.total_iters_ratio max_alternately_iter = self.d_iters + self.total_iters_ratio * self.g_iters d_acc = 0 real_acc = 0 photo_acc = 0 fake_acc = 0 win_rate = self.discr_success_rate discr_success = self.discr_success_rate alpha = 0.05 real_labels = [] fake_labels = [] # size = [[self.batch_size,122*122],[self.batch_size,58*58],[self.batch_size,10*10],[self.batch_size,2*2],[self.batch_size,2*2]] size = [[self.batch_size,1,760,760],[self.batch_size,1,371,371],[self.batch_size,1,83,83],[self.batch_size,1,11,11],[self.batch_size,1,6,6]] for i in range(5): real_label = torch.ones(size[i], device=self.device) fake_label = torch.zeros(size[i], device=self.device) # threshold = torch.zeros(size[i], device=self.device) real_labels.append(real_label) fake_labels.append(fake_label) # Start time print('Start ====== training...') start_time = time.time() for step in range(start, self.total_step): self.Discriminator.train() self.Generator.train() # self.Decoder.train() try: content_images =next(content_iter) style_images = next(style_iter) except: style_iter = iter(self.style_data_loader) content_iter = iter(self.content_data_loader) style_images = next(style_iter) content_images = next(content_iter) style_images = style_images.to(self.device) content_images = content_images.to(self.device) # ================== Train D ================== # # Compute loss with real images if discr_success < win_rate: real_out = self.Discriminator(style_images) d_loss_real = 0 real_acc = 0 for i in range(len(real_out)): temp = self.C_loss(real_out[i],real_labels[i]).mean() real_acc += torch.gt(real_out[i],0).type(torch.float).mean() temp *= self.prep_weights[i] d_loss_real += temp real_acc /= len(real_out) d_loss_photo = 0 photo_out = self.Discriminator(content_images) photo_acc = 0 for i in range(len(photo_out)): temp = self.C_loss(photo_out[i],fake_labels[i]) photo_acc += torch.lt(photo_out[i],0).type(torch.float).mean() temp *= self.prep_weights[i] d_loss_photo += temp photo_acc /= len(photo_out) fake_image,_ = self.Generator(content_images) fake_out = self.Discriminator(fake_image.detach()) d_loss_fake = 0 fake_acc = 0 for i in range(len(fake_out)): temp = self.C_loss(fake_out[i],fake_labels[i]).mean() fake_acc += torch.lt(fake_out[i],0).type(torch.float).mean() temp *= self.prep_weights[i] d_loss_fake += temp fake_acc /= len(fake_out) d_acc = ((real_acc + photo_acc + fake_acc)/3).item() discr_success = discr_success * (1. - alpha) + alpha * d_acc # Backward + Optimize d_loss = d_loss_real + d_loss_photo + d_loss_fake self.reset_grad() d_loss.backward() self.d_optimizer.step() else: # ================== Train G ================== # # fake_image, real_feature= self.Generator(content_images) fake_feature = self.Generator(fake_image, get_feature = True) fake_out = self.Discriminator(fake_image) g_feature_loss = self.L1_loss(fake_feature,real_feature) g_transform_loss = self.MSE_loss(self.Transform(content_images),self.Transform(fake_image)) g_loss_fake = 0 g_acc = 0 for i in range(len(fake_out)): temp = self.C_loss(fake_out[i],real_labels[i]).mean() g_acc += torch.gt(fake_out[i],0).type(torch.float).mean() temp *= self.prep_weights[i] g_loss_fake += temp g_acc /= len(fake_out) g_loss_fake = g_loss_fake + g_feature_loss*self.feature_loss_w + \ g_transform_loss*self.transform_loss_w discr_success = discr_success * (1. - alpha) + alpha * (1.0 - g_acc) self.reset_grad() g_loss_fake.backward() self.g_optimizer.step() # self.decoder_optimizer.step() # Print out log info if (step + 1) % self.log_step == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) print("Elapsed [{}], G_step [{}/{}], D_step[{}/{}], d_out_real: {:.4f}, d_out_fake: {:.4f}, g_loss_fake: {:.4f}". format(elapsed, step + 1, self.total_step, (step + 1), self.total_step , d_loss_real.item(), d_loss_fake.item(), g_loss_fake.item())) if self.use_tensorboard: self.writer.add_scalar('data/d_loss_real', d_loss_real.item(),(step + 1)) self.writer.add_scalar('data/d_loss_fake', d_loss_fake.item(),(step + 1)) self.writer.add_scalar('data/d_loss', d_loss.item(), (step + 1)) self.writer.add_scalar('data/g_loss', g_loss_fake.item(), (step + 1)) self.writer.add_scalar('data/g_feature_loss', g_feature_loss, (step + 1)) self.writer.add_scalar('data/g_transform_loss', g_transform_loss, (step + 1)) # self.writer.add_scalar('data/g_tv_loss', g_tv_loss, (step + 1)) self.writer.add_scalar('acc/real_acc', real_acc.item(), (step + 1)) self.writer.add_scalar('acc/photo_acc', photo_acc.item(), (step + 1)) self.writer.add_scalar('acc/fake_acc', fake_acc.item(), (step + 1)) self.writer.add_scalar('acc/disc_acc', d_acc, (step + 1)) self.writer.add_scalar('acc/g_acc', g_acc, (step + 1)) self.writer.add_scalar("acc/discr_success",discr_success,(step+1)) # Sample images if (step + 1) % self.sample_step == 0: print('Sample images {}_fake.png'.format(step + 1)) fake_images,_ = self.Generator(content_images) saved_image1 = torch.cat([denorm(content_images),denorm(fake_images.data)],3) saved_image2 = torch.cat([denorm(style_images),denorm(fake_images.data)],3) wocao = torch.cat([saved_image1,saved_image2],2) save_image(wocao, os.path.join(self.sample_path, '{}_fake.jpg'.format(step + 1))) # print("Transfer validation images") # num = 1 # for val_img in self.validation_data: # print("testing no.%d img"%num) # val_img = val_img.to(self.device) # fake_images,_ = self.Generator(val_img) # saved_val_image = torch.cat([denorm(val_img),denorm(fake_images)],3) # save_image(saved_val_image, # os.path.join(self.valres_path, '%d_%d.jpg'%((step+1),num))) # num +=1 # save_image(denorm(displaymask.data),os.path.join(self.sample_path, '{}_mask.png'.format(step + 1))) if (step+1) % model_save_step==0: torch.save(self.Generator.state_dict(), os.path.join(self.check_point_path , '{}_Generator.pth'.format(step + 1))) torch.save(self.Discriminator.state_dict(), os.path.join(self.check_point_path , '{}_Discriminator.pth'.format(step + 1))) # alternately_iter += 1 # alternately_iter %= max_alternately_iter def build_model(self): # code_dim=100, n_class=1000 self.Generator = Generator(chn=self.g_conv_dim, k_size= 3, res_num= self.res_num).to(self.device) self.Discriminator = Discriminator(chn=self.d_conv_dim, k_size= 3).to(self.device) self.Transform = Transform_block().to(self.device) if self.parallel: print('use parallel...') print('gpuids ', self.gpus) gpus = [int(i) for i in self.gpus.split(',')] self.Generator = nn.DataParallel(self.Generator, device_ids=gpus) self.Discriminator = nn.DataParallel(self.Discriminator, device_ids=gpus) self.Transform = nn.DataParallel(self.Transform, device_ids=gpus) # self.G.apply(weights_init) # self.D.apply(weights_init) # Loss and optimizer # self.g_optimizer = torch.optim.Adam(self.G.parameters(), self.g_lr, [self.beta1, self.beta2]) self.g_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.Generator.parameters()), self.g_lr, [self.beta1, self.beta2]) # self.decoder_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, # self.Decoder.parameters()), self.g_lr, [self.beta1, self.beta2]) self.d_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.Discriminator.parameters()), self.d_lr, [self.beta1, self.beta2]) # self.L1_loss = torch.nn.L1Loss() self.MSE_loss = torch.nn.MSELoss() self.L1_loss = torch.nn.SmoothL1Loss() self.C_loss = torch.nn.BCEWithLogitsLoss() # self.TV_loss = TVLoss(self.TVLossWeight,self.imsize,self.batch_size) # print networks logging.info("Generator structure:") logging.info(self.Generator) # print(self.Decoder) logging.info("Discriminator structure:") logging.info(self.Discriminator) def build_tensorboard(self): from tensorboardX import SummaryWriter # from logger import Logger # self.logger = Logger(self.log_path) self.writer = SummaryWriter(log_dir=self.summary_path) def load_pretrained_model(self): self.Generator.load_state_dict(torch.load(os.path.join( self.check_point_path , '{}_Generator.pth'.format(self.pretrained_model)))) self.Discriminator.load_state_dict(torch.load(os.path.join( self.check_point_path , '{}_Discriminator.pth'.format(self.pretrained_model)))) print('loaded trained models (step: {})..!'.format(self.pretrained_model)) def reset_grad(self): self.g_optimizer.zero_grad() # self.decoder_optimizer.zero_grad() self.d_optimizer.zero_grad() def save_sample(self, data_iter): real_images, _ = next(data_iter) save_image(denorm(real_images), os.path.join(self.sample_path, 'real.png'))
class Model: def __init__(self, base_path='', epochs=10, learning_rate=0.0002, image_size=256, leaky_relu=0.2, betas=(0.5, 0.999), lamda=100, image_format='png'): self.image_size = image_size self.leaky_relu_threshold = leaky_relu self.epochs = epochs self.lr = learning_rate self.betas = betas self.lamda = lamda self.base_path = base_path self.image_format = image_format self.count = 1 self.gen = None self.dis = None self.gen_optim = None self.dis_optim = None self.model_type = None self.residual_blocks = 9 self.layer_size = 64 self.lr_policy = None self.lr_schedule_gen = None self.lr_schedule_dis = None self.device = self.get_device() self.create_folder_structure() def create_folder_structure(self): checkpoint_folder = self.base_path + '/checkpoints' loss_folder = self.base_path + '/Loss_Checkpoints' training_folder = self.base_path + '/Training Images' test_folder = self.base_path + '/Test Images' if not os.path.exists(checkpoint_folder): os.makedirs(checkpoint_folder) if not os.path.exists(loss_folder): os.makedirs(loss_folder) if not os.path.exists(training_folder): os.makedirs(training_folder) if not os.path.exists(test_folder): os.makedirs(test_folder) def get_device(self): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('Using device:', device) print(torch.cuda.get_device_name(0)) if device.type == 'cuda': print('Memory Usage -') print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024 ** 3, 1), 'GB') print('Cached: ', round(torch.cuda.memory_cached(0) / 1024 ** 3, 1), 'GB') return device else: return None def initialize_model(self, lr_schedular_options, model_type='unet', residual_blocks=9, layer_size=64): all_models = ['unet', 'resnet', 'inception', 'unet2', 'unet_large', 'unet_fusion'] if model_type not in all_models: raise Exception('This model type is not available!'); self.dis = Discriminator(image_size=self.image_size, leaky_relu=self.leaky_relu_threshold) if model_type == 'unet': self.gen = Generator_Unet(image_size=self.image_size, ngf=layer_size) elif model_type == 'resnet': self.gen = Generator_RESNET(residual_blocks=residual_blocks, ngf=layer_size) elif model_type == 'inception': self.gen = Generator_InceptionNet(ngf=layer_size) elif model_type == 'unet2': self.gen = Generator_Unet_2(image_size=self.image_size, ngf=layer_size) elif model_type == 'unet_large': self.gen = Generator_Unet_Large(image_size=self.image_size, ngf=layer_size) elif model_type == 'unet_fusion': self.gen = Generator_Unet_Fusion(image_size=self.image_size, ngf=layer_size) if self.device is not None: self.gen.cuda() self.dis.cuda() self.gen_optim = optim.Adam(self.gen.parameters(), lr=self.lr, betas=self.betas) self.dis_optim = optim.Adam(self.dis.parameters(), lr=self.lr, betas=self.betas) self.lr_schedule_dis = self.get_learning_schedule(self.gen_optim, lr_schedular_options) self.lr_schedule_gen = self.get_learning_schedule(self.dis_optim, lr_schedular_options) self.model_type = model_type self.layer_size = layer_size self.residual_blocks = residual_blocks self.lr_policy = lr_schedular_options print('Model Initialized !\nGenerator Model Type : {} and Layer Size : {}'.format(model_type, layer_size)) print('Model Parameters are:\nEpochs : {}\nLearning rate : {}\nLeaky Relu Threshold : {}\nLamda : {}\nBeta : {}' .format(self.epochs, self.lr, self.leaky_relu_threshold, self.lamda, self.betas)) def train_model(self, trainloader, average_loss, eval=(False, None, None), save_model=(False, 25), display_test_image=(False, None, 25)): print('We will be using L1 loss with perpetual loss (L1)!') mean_loss = nn.BCELoss() l1_loss = nn.L1Loss() vgg16 = models.vgg16() vgg16_conv = nn.Sequential(*list(vgg16.children())[:-3]) self.gen.train() self.dis.train() batches = len(trainloader) print('Total number of batches in an epoch are : {}'.format(batches)) sample_img_test = None if display_test_image[0]: sample_img_test, rgb_test_images = next(iter(display_test_image[1])) save_image((rgb_test_images[0].detach().cpu() + 1) / 2, '{}/Training Images/real_img.{}'.format(self.base_path, self.image_format)) if self.device is not None: sample_img_test = sample_img_test.cuda() for i in range(self.epochs): if eval[0] and (i % eval[2] == 0): self.evaluate_L1_loss_dataset(eval[1], train=False) self.evaluate_L1_loss_dataset(trainloader, train=True) self.gen.train() running_gen_loss = 0 running_dis_loss = 0 for gray_img, real_img in trainloader: batch_size = len(gray_img) zero_label = torch.zeros(batch_size) one_label = torch.ones(batch_size) if self.device is not None: gray_img = gray_img.cuda() real_img = real_img.cuda() zero_label = zero_label.cuda() one_label = one_label.cuda() # Discriminator loss self.dis_optim.zero_grad() fake_img = self.gen(gray_img) dis_real_loss = mean_loss(self.dis(real_img), one_label) dis_fake_loss = mean_loss(self.dis(fake_img), zero_label) total_dis_loss = dis_fake_loss + dis_real_loss total_dis_loss.backward() self.dis_optim.step() # Generator loss self.gen_optim.zero_grad() fake_img = self.gen(gray_img) gen_adv_loss = mean_loss(self.dis(fake_img), one_label) gen_l1_loss = l1_loss(fake_img.view(batch_size, -1), real_img.view(batch_size, -1)) gen_pre_train = l1_loss(vgg16_conv(fake_img), vgg16_conv(real_img)) total_gen_loss = gen_adv_loss + self.lamda * gen_l1_loss + self.lamda * gen_pre_train total_gen_loss.backward() self.gen_optim.step() running_dis_loss += total_dis_loss.item() running_gen_loss += total_gen_loss.item() running_dis_loss /= (batches * 1.0) running_gen_loss /= (batches * 1.0) print('Epoch : {}, Generator Loss : {} and Discriminator Loss : {}'.format(i + 1, running_gen_loss, running_dis_loss)) if display_test_image[0] and i % display_test_image[2] == 0: self.gen.eval() out_result = self.gen(sample_img_test) out_result = out_result.detach().cpu() out_result = (out_result[0] + 1) / 2 save_image(out_result, '{}/Training Images/epoch_{}.{}'.format(self.base_path, i, self.image_format)) self.gen.train() save_tuple = ([running_gen_loss], [running_dis_loss]) average_loss.add_loss(save_tuple) if save_model[0] and i % save_model[1] == 0: self.save_checkpoint('checkpoint_epoch_{}'.format(i), self.model_type) average_loss.save('checkpoint_avg_loss', save_index=0) self.lr_schedule_gen.step() self.lr_schedule_dis.step() for param_grp in self.dis_optim.param_groups: print('Learning rate after {} epochs is : {}'.format(i + 1, param_grp['lr'])) self.save_checkpoint('checkpoint_train_final', self.model_type) average_loss.save('checkpoint_avg_loss_final', save_index=0) def get_learning_schedule(self, optimizer, option): schedular = None if option['lr_policy'] == 'linear': def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch - option['n_epochs']) / float(option['n_epoch_decay'] + 1) return lr_l schedular = lr_schedular.LambdaLR(optimizer, lr_lambda=lambda_rule) elif option['lr_policy'] == 'plateau': schedular = lr_schedular.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) elif option['lr_policy'] == 'step': schedular = lr_schedular.StepLR(optimizer, step_size=option['step_size'], gamma=0.1) elif option['lr_policy'] == 'cosine': schedular = lr_schedular.CosineAnnealingLR(optimizer, T_max=option['n_epochs'], eta_min=0) else: raise Exception('LR Policy not implemented!') return schedular def evaluate_model(self, loader, save_filename, no_of_images=1): # Considering that we have batch size of 1 for test set if self.gen is None or self.dis is None: raise Exception('Model has not been initialized and hence cannot be saved!'); counter_images_generated = 0 while counter_images_generated < no_of_images: gray, rgb = next(iter(loader)) if self.device is not None: gray = gray.cuda() filename = '{}/Test Images/{}_{}.{}'.format(self.base_path, save_filename, self.count, self.image_format) real_filename = '{}/Test Images/{}_{}_real.{}'.format(self.base_path, save_filename, self.count, self.image_format) real_gray_filename = '{}/Test Images/{}_{}_real_gray.{}'.format(self.base_path, save_filename, self.count, self.image_format) self.count += 1 self.gen.eval() out = self.gen(gray) out = out[0].detach().cpu() out = (out + 1) / 2 save_image(out, filename) gray_img = gray[0].detach().cpu() save_image(gray_img, real_gray_filename) real_img = (rgb[0].detach().cpu() + 1) / 2 save_image(real_img, real_filename) counter_images_generated += 1 def evaluate_L1_loss_dataset(self, loader, train=False): if self.gen is None or self.dis is None: raise Exception('Model has not been initialized and hence cannot be evaluated!') loss_function = nn.L1Loss() self.gen.eval() total_loss = 0.0; iterations = 0; for gray, real in loader: iterations += 1 if self.device is not None: gray = gray.cuda() real = real.cuda() gen_out = self.gen(gray) iteration_loss = loss_function(gen_out, real) total_loss += iteration_loss.item() total_loss = total_loss / (iterations * 1.0) train_test = 'test' if train: train_test = 'train' print('Total L1 loss over {} set is : {}'.format(train_test, total_loss)) return total_loss; def change_params(self, epochs=None, learning_rate=None, leaky_relu=None, betas=None, lamda=None): if epochs is not None: self.epochs = epochs print('Changed the number of epochs to {}!'.format(self.epochs)) if learning_rate is not None: self.lr = learning_rate print('Changed the learning rate to {}!'.format(self.lr)) if leaky_relu is not None: self.leaky_relu_threshold = leaky_relu print('Changed the threshold for leaky relu to {}!'.format(self.leaky_relu_threshold)) if betas is not None: self.betas = betas print('Changed the betas for Adams Optimizer!') if betas is not None or learning_rate is not None: self.gen_optim = optim.Adam(self.gen.parameters(), lr=self.lr, betas=self.betas) self.dis_optim = optim.Adam(self.dis.parameters(), lr=self.lr, betas=self.betas) if lamda is not None: self.lamda = lamda print('Lamda value has been changed to {}!'.format(self.lamda)) def set_all_params(self, epochs, lr, leaky_thresh, lamda, beta): self.epochs = epochs self.lr = lr self.leaky_relu_threshold = leaky_thresh self.lamda = lamda self.betas = beta self.gen_optim = optim.Adam(self.gen.parameters(), lr=self.lr, betas=self.betas) self.dis_optim = optim.Adam(self.dis.parameters(), lr=self.lr, betas=self.betas) print('Model Parameters are:\nEpochs : {}\nLearning rate : {}\nLeaky Relu Threshold : {}\nLamda : {}\nBeta : {}' .format(self.epochs, self.lr, self.leaky_relu_threshold, self.lamda, self.betas)) def run_model_on_dataset(self, loader, save_folder, save_path=None): if self.gen is None or self.dis is None: raise Exception('Model has not been initialized and hence cannot be saved!'); index = 1 if save_path is None: save_path = self.base_path for gray, dummy in loader: if self.device is not None: gray = gray.cuda() filename = '{}/{}/{}.{}'.format(save_path, save_folder, index, self.image_format) index += 1 self.gen.eval() out = self.gen(gray) out = out[0].detach().cpu() out = (out + 1) / 2 save_image(out, filename) def save_checkpoint(self, filename, model_type='unet'): if self.gen is None or self.dis is None: raise Exception('The model has not been initialized and hence cannot be saved !') filename = '{}/checkpoints/{}.pth'.format(self.base_path, filename) save_dict = {'model_type': model_type, 'dis_dict': self.dis.state_dict(), 'gen_dict': self.gen.state_dict(), 'lr': self.lr, 'epochs': self.epochs, 'betas': self.betas, 'image_size': self.image_size, 'leaky_relu_thresh': self.leaky_relu_threshold, 'lamda': self.lamda, 'base_path': self.base_path, 'count': self.count, 'image_format': self.image_format, 'device': self.device, 'residual_blocks': self.residual_blocks, 'layer_size': self.layer_size, 'lr_policy': self.lr_policy} torch.save(save_dict, filename) print('The model checkpoint has been saved !') def load_checkpoint(self, filename): filename = '{}/checkpoints/{}.pth'.format(self.base_path, filename) if not pathlib.Path(filename).exists(): raise Exception('This checkpoint does not exist!') self.gen = None self.dis = None save_dict = torch.load(filename) self.betas = save_dict['betas'] self.image_size = save_dict['image_size'] self.epochs = save_dict['epochs'] self.leaky_relu_threshold = save_dict['leaky_relu_thresh'] self.lamda = save_dict['lamda'] self.lr = save_dict['lr'] self.base_path = save_dict['base_path'] self.count = save_dict['count'] self.image_format = save_dict['image_format'] self.device = save_dict['device'] self.residual_blocks = save_dict['residual_blocks'] self.layer_size = save_dict['layer_size'] self.lr_policy = save_dict['lr_policy'] device = self.get_device() if device != self.device: error_msg = '' if self.device is None: error_msg = 'The model was trained on CPU and will therefore be continued on CPU only!' else: error_msg = 'The model was trained on GPU and cannot be loaded on a CPU machine!' raise Exception(error_msg) self.initialize_model(model_type=save_dict['model_type'], residual_blocks=self.residual_blocks, layer_size=self.layer_size, lr_schedular_options=self.lr_policy) self.gen.load_state_dict(save_dict['gen_dict']) self.dis.load_state_dict(save_dict['dis_dict']) print('The model checkpoint has been restored!')
class Pix2PixMain(object): def __init__(self): # ----------------------------------- # global # ----------------------------------- np.random.seed(Settings.SEED) torch.manual_seed(Settings.SEED) random.seed(Settings.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed(Settings.SEED) self.device = torch.device("cuda") else: self.device = torch.device("cpu") # ----------------------------------- # model # ----------------------------------- self.generator = Generator(in_c=Settings.IN_CHANNEL, out_c=Settings.OUT_CHANNEL, ngf=Settings.NGF).to(self.device) self.generator.apply(self.generator.weights_init) self.discriminator = Discriminator( in_c=Settings.IN_CHANNEL, out_c=Settings.OUT_CHANNEL, ndf=Settings.NDF, n_layers=Settings.DISCRIMINATOR_LAYER).to(self.device) self.discriminator.apply(self.discriminator.weights_init) print("model init done") # ----------------------------------- # data # ----------------------------------- train_transforms = transforms.Compose([ transforms.Resize((Settings.INPUT_SIZE, Settings.INPUT_SIZE)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) data_prepare = get_dataloader( dataset_name=Settings.DATASET, batch_size=Settings.BATCH_SIZE, data_root=Settings.DATASET_ROOT, train_num_workers=Settings.TRAIN_NUM_WORKERS, transforms=train_transforms, val_num_workers=Settings.TEST_NUM_WORKERS) self.train_dataloader = data_prepare.train_dataloader self.test_dataloader = data_prepare.test_dataloader print("data init done.....") # ----------------------------------- # optimizer and criterion # ----------------------------------- self.optimG = optim.Adam([{ "params": self.generator.parameters() }], lr=Settings.G_LR, betas=Settings.G_BETAS) self.optimD = optim.Adam([{ "params": self.discriminator.parameters() }], lr=Settings.D_LR, betas=Settings.D_BETAS) self.criterion_l1loss = nn.L1Loss() self.criterion_BCE = nn.BCELoss() print("optimizer and criterion init done.....") # ----------------------------------- # recorder # ----------------------------------- self.recorder = { "errD_fake": list(), "errD_real": list(), "errG_l1loss": list(), "errG_bce": list(), "errG": list(), "accD": list() } output_file = time.strftime( "{}_{}_%Y_%m_%d_%H_%M_%S".format("pix2pix", Settings.DATASET), time.localtime()) self.output_root = os.path.join(Settings.OUTPUT_ROOT, output_file) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_MODEL_KEY)) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_LOG_KEY)) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_IMAGE_KEY)) print("recorder init done.....") def __call__(self): print_steps = max( 1, int(len(self.train_dataloader) * Settings.PRINT_FREQUENT)) eval_steps = max( 1, int(len(self.train_dataloader) * Settings.EVAL_FREQUENT)) batch_steps = max(1, int(Settings.EPOCHS * Settings.BATCH_FREQUENT)) print("begin train.....") for epoch in range(1, Settings.EPOCHS + 1): for step, batch in enumerate(self.train_dataloader): # train self.train_module(batch) # print self.print_module(epoch, step, print_steps) if epoch % batch_steps == 0: # val self.val_module(epoch, step, eval_steps) # save log self.log_save_module() def train_module(self, batch): self.generator.train() self.discriminator.train() input_images = None target_images = None if Settings.DATASET == "edge2shoes": input_images = batch["edge_images"].to(self.device) target_images = batch["color_images"].to(self.device) elif Settings.DATASET == "Mogaoku": input_images = batch["edge_images"].to(self.device) target_images = batch["color_images"].to(self.device) else: KeyError("DataSet {} doesn't exit".format(Settings.DATASET)) # 判别器迭代 self.optimD.zero_grad() true_image_d_pred = self.discriminator(input_images, target_images) true_images_label = torch.full(true_image_d_pred.shape, Settings.REAL_LABEL, dtype=torch.float32, device=self.device) errD_real_bce = self.criterion_BCE(true_image_d_pred, true_images_label) errD_real_bce.backward() fake_images = self.generator(input_images) fake_images_d_pred = self.discriminator(input_images, fake_images.detach()) fake_images_label = torch.full(fake_images_d_pred.shape, Settings.FAKE_LABEL, dtype=torch.float32, device=self.device) errD_fake_bce = self.criterion_BCE(fake_images_d_pred, fake_images_label) errD_fake_bce.backward() self.optimD.step() real_image_pred_true_num = ((true_image_d_pred > 0.5) == true_images_label).sum().float() fake_image_pred_true_num = ((fake_images_d_pred > 0.5) == fake_images_label).sum().float() accD = (real_image_pred_true_num + fake_image_pred_true_num) / \ (true_images_label.numel() + fake_images_label.numel()) # 生成器迭代 self.optimG.zero_grad() fake_images_d_pred = self.discriminator(input_images, fake_images) true_images_label = torch.full(fake_images_d_pred.shape, Settings.REAL_LABEL, dtype=torch.float32, device=self.device) errG_bce = self.criterion_BCE(fake_images_d_pred, true_images_label) errG_l1loss = self.criterion_l1loss(fake_images, target_images) errG = errG_bce + errG_l1loss * Settings.L1_LOSS_LAMUDA errG.backward() self.optimG.step() # recorder self.recorder["errD_real"].append(errD_real_bce.item()) self.recorder["errD_fake"].append(errD_fake_bce.item()) self.recorder["errG_l1loss"].append(errG_l1loss.item()) self.recorder["errG_bce"].append(errG_bce.item()) self.recorder["errG"].append(errG.item()) self.recorder["accD"].append(accD) def val_module(self, epoch, step, eval_steps): def apply_dropout(m): if type(m) == nn.Dropout: m.train() if (step + 1) % eval_steps == 0: output_images = None output_count = 0 self.generator.eval() self.discriminator.eval() # 启用dropout if Settings.USING_DROPOUT_DURING_EVAL: self.generator.apply(apply_dropout) self.discriminator.apply(apply_dropout) for eval_step, eval_batch in enumerate(self.test_dataloader): input_images = eval_batch["edge_images"].to(self.device) target_images = eval_batch["color_images"] pred_images = self.generator(input_images).detach().cpu() output_image = torch.cat( [input_images.cpu(), target_images, pred_images], dim=3) if output_images is None: output_images = output_image else: output_images = torch.cat([output_images, output_image], dim=0) if output_images.shape[0] == int( len(self.test_dataloader) / 4): output_images = make_grid( output_images, padding=2, normalize=True, nrow=Settings.CONSTANT_FEATURE_DIS_LEN).numpy() output_images = np.array( np.transpose(output_images, (1, 2, 0)) * 255, dtype=np.uint8) output_images = Image.fromarray(output_images) output_images.save( os.path.join( self.output_root, Settings.OUTPUT_IMAGE_KEY, "epoch_{}_step_{}_count_{}.jpg".format( epoch, step, output_count))) output_count += 1 output_images = None self.model_save_module(epoch, step) self.log_save_module() def print_module(self, epoch, step, print_steps): if (step + 1) % print_steps == 0: print("[{}/{}]\t [{}/{}]\t ".format(epoch, Settings.EPOCHS, step + 1, len(self.train_dataloader)), end=" ") for key in self.recorder: print("[{}:{}]\t".format(key, self.recorder[key][-1]), end=" ") print(" ") def model_save_module(self, epoch, step): torch.save( self.generator.state_dict(), os.path.join( self.output_root, Settings.OUTPUT_MODEL_KEY, "pix2pix_generator_epoch_{}_step_{}.pth".format(epoch, step))) torch.save( self.discriminator.state_dict(), os.path.join( self.output_root, Settings.OUTPUT_MODEL_KEY, "pix2pix_discriminator_epoch_{}_step_{}.pth".format( epoch, step))) def log_save_module(self): # 保存记录 with open( os.path.join(self.output_root, Settings.OUTPUT_LOG_KEY, "log.txt"), "w") as f: for item_ in range(len(self.recorder["accD"])): for key in self.recorder: f.write("{}:{}\t".format(key, self.recorder[key][item_])) f.write("\n") # 保存图表 for key in self.recorder: plt.figure(figsize=(10, 5)) plt.title("{} During Training".format(key)) plt.plot(self.recorder[key], label=key) plt.xlabel("iterations") plt.ylabel("value") plt.legend() if "acc" in key: plt.yticks(np.arange(0, 1, 0.5)) plt.savefig( os.path.join(self.output_root, Settings.OUTPUT_LOG_KEY, "{}.jpg".format(key))) plt.close("all") def learning_rate_decay_module(self, epoch): if epoch % Settings.LR_DECAY_EPOCHS == 0: for param_group in self.optimD.param_groups: param_group["lr"] *= 0.2 for param_group in self.optimG.param_groups: param_group["lr"] *= 0.2
def train(FLAGS): # Define the hyperparameters p_every = FLAGS.p_every s_every = FLAGS.s_every epochs = FLAGS.epochs dlr = FLAGS.dlr glr = FLAGS.glr beta1 = FLAGS.beta1 beta2 = FLAGS.beta2 z_size = FLAGS.zsize batch_size = FLAGS.batch_size rh = FLAGS.resize_height rw = FLAGS.resize_width d_path = FLAGS.dataset_path d_type = FLAGS.dataset_type # Preprocessing Data transform = transforms.Compose([ transforms.Resize((rh, rw)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if FLAGS.dataset_path == None: if d_type == "cars": if not os.path.exists('./datasets/cars_train'): os.system('sh ./datasets/dload.sh cars') d_path = './datasets/cars_train/' elif d_type == "flowers": if not os.path.exists('./datasets/flowers/'): os.system('sh ./datasets/dload.sh flowers') d_path = './datasets/flowers/' elif d_type == "dogs": if not os.path.exists('./datasets/jpg'): os.system('sh ./datasets/dload.sh dogs') d_path = './datasets/jpg/' train_data = datasets.ImageFolder(d_path, transform=transform) trainloader = DataLoader(train_data, batch_size=batch_size, shuffle=True) # Define the D and G dis = Discriminator(64) gen = Generator() # Apply weight initialization dis.apply(init_weight) gen.apply(init_weight) # Define the loss function criterion = nn.BCELoss() # Optimizers d_opt = optim.Adam(dis.parameters(), lr=dlr, betas=(beta1, beta2)) g_opt = optim.Adam(gen.parameters(), lr=glr, betas=(beta1, beta2)) # Train loop device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') train_losses = [] eval_losses = [] dis.to(device) gen.to(device) real_label = 1 fake_label = 0 for e in range(epochs): td_loss = 0 tg_loss = 0 for batch_i, (real_images, _) in enumerate(trainloader): real_images = real_images.to(device) batch_size = real_images.size(0) #### Train the Discriminator #### d_opt.zero_grad() d_real = dis(real_images) label = torch.full((batch_size, ), real_label, device=device) r_loss = criterion(d_real, label) r_loss.backward() z = torch.randn(batch_size, z_size, 1, 1, device=device) fake_images = gen(z) label.fill_(fake_label) d_fake = dis(fake_images.detach()) f_loss = criterion(d_fake, label) f_loss.backward() d_loss = r_loss + f_loss d_opt.step() #### Train the Generator #### g_opt.zero_grad() label.fill_(real_label) d_fake2 = dis(fake_images) g_loss = criterion(d_fake2, label) g_loss.backward() g_opt.step() if batch_i % p_every == 0: print ('Epoch [{:5d} / {:5d}] | d_loss: {:6.4f} | g_loss: {:6.4f}'. \ format(e+1, epochs, d_loss, g_loss)) train_losses.append([td_loss, tg_loss]) if e % s_every == 0: d_ckpt = { 'model_state_dict': dis.state_dict(), 'opt_state_dict': d_opt.state_dict() } g_ckpt = { 'model_state_dict': gen.state_dict(), 'opt_state_dict': g_opt.state_dict() } torch.save(d_ckpt, 'd-nm-{}.pth'.format(e)) torch.save(g_ckpt, 'g-nm-{}.pth'.format(e)) utils.save_image(fake_images.detach(), 'fake_{}.png'.format(e), normalize=True) print('[INFO] Training Completed successfully!')
def compile(self): if self.compiled: print('Model already compiled.') return self.compiled = True # Placeholders. self.X = tf.placeholder(tf.float32, shape=(None, 32, 32, 1), name='X') self.Y = tf.placeholder(tf.float32, shape=(None, 32, 32, 2), name='Y') self.labels = tf.placeholder(tf.float32, shape=(None, 10), name='labels') # Generator. generator = Generator(self.seed) # Discriminator. discriminator = Discriminator(self.seed) # Classifier. classifier = Classifier(self.seed) self.gen_out = generator.forward(self.X) disc_out_real = discriminator.forward(tf.concat([self.X, self.Y], 3)) disc_out_fake = discriminator.forward(tf.concat([self.X, self.gen_out], 3), reuse_vars=True) # VAC-GAN classifier losses. classifier_real = classifier.forward(tf.concat([self.X, self.Y], 3)) classfier_fake = classifier.forward(tf.concat([self.X, self.gen_out], 3), reuse_vars=True) classifier_l_real = tf.nn.softmax_cross_entropy_with_logits_v2(logits=classifier_real, labels=self.labels) classifier_l_fake = tf.nn.softmax_cross_entropy_with_logits_v2(logits=classfier_fake, labels=self.labels) self.classifier_loss_real = tf.reduce_mean(classifier_l_real) self.classifier_loss_fake = tf.reduce_mean(classifier_l_fake) self.classifier_loss = tf.reduce_mean(classifier_l_fake + classifier_l_real) # Generator loss. self.gen_loss_gan = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_fake, labels=tf.ones_like(disc_out_fake))) self.gen_loss_l1 = tf.reduce_mean(tf.abs(self.Y - self.gen_out)) * self.l1_weight self.gen_loss = self.gen_loss_gan + self.gen_loss_l1 + self.VAC_weight * self.classifier_loss # Discriminator losses. disc_l_fake = tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_fake, labels=tf.zeros_like(disc_out_fake)) disc_l_real = tf.nn.sigmoid_cross_entropy_with_logits(logits=disc_out_real, labels=tf.ones_like(disc_out_real)*self.label_smoothing) self.disc_loss_fake = tf.reduce_mean(disc_l_fake) self.disc_loss_real = tf.reduce_mean(disc_l_real) self.disc_loss = tf.reduce_mean(disc_l_fake + disc_l_real) # Global step. self.global_step = tf.Variable(0, name='global_step', trainable=False) # Learning rate. if self.learning_rate_decay: self.lr = tf.maximum(1e-6, tf.train.exponential_decay( learning_rate=self.learning_rate, global_step=self.global_step, decay_steps=self.learning_rate_decay_steps, decay_rate=self.learning_rate_decay_rate)) else: self.lr = tf.constant(self.learning_rate) # Optimizers. self.gen_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.gen_loss, var_list=generator.variables) self.disc_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr/10).minimize(self.disc_loss, var_list=discriminator.variables) self.classifier_optimizer = tf.train.AdamOptimizer(learning_rate=self.lr/10).minimize(self.classifier_loss, var_list=classifier.variables, global_step=self.global_step) # Sampler. gen_sample = Generator(self.seed, is_training=False) self.sampler = gen_sample.forward(self.X, reuse_vars=True) self.MAE = tf.reduce_mean(tf.abs(self.Y - self.sampler)) self.saver = tf.train.Saver()
def test_model_trains(self): # Performs one step of training and verifies that the weights are updated, implying some training occurs. with TemporaryDirectory() as tmpdirname: T = torch.cuda.FloatTensor latent = np.random.rand(64, 1, 512) os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'), exist_ok=True) with open(tmpdirname + '/encoded_smiles.latent', 'w') as f: json.dump(latent.tolist(), f) C = CreateModelRunner(input_data_path=tmpdirname + '/encoded_smiles.latent', output_model_folder=tmpdirname) C.run() D = Discriminator.load(tmpdirname + '/discriminator.txt') G = Generator.load(tmpdirname + '/generator.txt') G.cuda() D.cuda() optimizer_G = torch.optim.Adam(G.parameters()) optimizer_D = torch.optim.Adam(D.parameters()) json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r") latent_space_mols = np.array(json.load(json_smiles)) testSampler = Sampler(G) latent_space_mols = latent_space_mols.reshape( latent_space_mols.shape[0], 512) dataloader = torch.utils.data.DataLoader( LatentMolsDataset(latent_space_mols), shuffle=True, batch_size=64, drop_last=True) for _, real_mols in enumerate(dataloader): real_mols = real_mols.type(T) before_G_params = [] before_D_params = [] for param in G.parameters(): before_G_params.append(param.view(-1)) before_G_params = torch.cat(before_G_params) for param in D.parameters(): before_D_params.append(param.view(-1)) before_D_params = torch.cat(before_D_params) optimizer_D.zero_grad() fake_mols = testSampler.sample(real_mols.shape[0]) real_validity = D(real_mols) fake_validity = D(fake_mols) #It is not relevant to compute gradient penalty. The test is only interested in if there is a change in #the weights (training), not in giving proper training d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) d_loss.backward() optimizer_D.step() optimizer_G.zero_grad() fake_mols = testSampler.sample(real_mols.shape[0]) fake_validity = D(fake_mols) g_loss = -torch.mean(fake_validity) g_loss.backward() optimizer_G.step() after_G_params = [] after_D_params = [] for param in G.parameters(): after_G_params.append(param.view(-1)) after_G_params = torch.cat(after_G_params) for param in D.parameters(): after_D_params.append(param.view(-1)) after_D_params = torch.cat(after_D_params) self.assertTrue( torch.any(torch.ne(after_G_params, before_G_params))) self.assertTrue( torch.any(torch.ne(after_D_params, before_D_params))) break
def run(parameters, hparams, X_train, X_test, load=False, load_epochs=150, load_log_dir=""): # Network Parameters hidden_dim = parameters['hidden_dim'] num_layers = parameters['num_layers'] # Still have to implement iterations = parameters['iterations'] # Test run to check for overfitting batch_size = parameters[ 'batch_size'] * mirrored_strategy.num_replicas_in_sync # To scale the batch size according to the mirrored strategy module_name = parameters[ 'module_name'] # 'lstm' or 'GRU'' --> Still have to implement this z_dim = parameters['z_dim'] lambda_val = 1 # Hyperparameter for .. eta = 1 # Hyperparameter for .. kappa = 1 # Hyperparameter for feature matching gamma = 1 # Hyperparameter for the gradient penalty in WGAN-GP if load: # Write to already defined log directory? log_dir = load_log_dir else: # Or create new log directory? # Define the TensorBoard such that we can visualize the results log_dir = 'logs/' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") summary_writer_train = tf.summary.create_file_writer(log_dir + '/train') summary_writer_test = tf.summary.create_file_writer(log_dir + '/test') summary_writer_bottom = tf.summary.create_file_writer(log_dir + '/bottom') summary_writer_top = tf.summary.create_file_writer(log_dir + '/top') summary_writer_real_data = tf.summary.create_file_writer(log_dir + '/real_data') summary_writer_fake_data = tf.summary.create_file_writer(log_dir + '/fake_data') summary_writer_lower_bound = tf.summary.create_file_writer(log_dir + '/lower_bound') if load: embedder_model, recovery_model, supervisor_model, generator_model, discriminator_model = load_models( load_epochs, hparams, hidden_dim) else: with mirrored_strategy.scope(): # Create an instance of all neural networks models (All LSTM) embedder_model = Embedder('logs/embedder', hparams, hidden_dim, dimensionality=11) recovery_model = Recovery( 'logs/recovery', hparams, hidden_dim, dimensionality=11) # If used for EONIA rate only supervisor_model = Supervisor('logs/supervisor', hparams, hidden_dim) generator_model = Generator('logs/generator', hparams, hidden_dim) discriminator_model = Discriminator('logs/TimeGAN', hparams, hidden_dim) r_loss_train = tf.keras.metrics.Mean(name='r_loss_train') # Step 1 metrics r_loss_test = tf.keras.metrics.Mean(name='r_loss_test') grad_embedder_ll = tf.keras.metrics.Mean( name='e_grad_lower_layer') # Step 1 gradient grad_embedder_ul = tf.keras.metrics.Mean(name='e_grad_upper_layer') grad_recovery_ll = tf.keras.metrics.Mean(name='r_grad_lower_layer') grad_recovery_ul = tf.keras.metrics.Mean(name='r_grad_upper_layer') g_loss_s_train = tf.keras.metrics.Mean( name='g_loss_s_train') # Step 2 metrics g_loss_s_test = tf.keras.metrics.Mean(name='g_loss_s_test') grad_supervisor_ll = tf.keras.metrics.Mean( name='s_grad_lower_layer') # Step 2 gradients grad_supervisor_ul = tf.keras.metrics.Mean(name='s_grad_upper_layer') e_loss_T0 = tf.keras.metrics.Mean( name='e_loss_T0') # Step 3 metrics (train) g_loss_s_embedder = tf.keras.metrics.Mean(name='g_loss_s_embedder') g_loss_s = tf.keras.metrics.Mean(name='g_loss_s') d_loss = tf.keras.metrics.Mean(name='d_loss') g_loss_u_e = tf.keras.metrics.Mean(name='g_loss_u_e') e_loss_T0_test = tf.keras.metrics.Mean( name='e_loss_T0_test') # Step 3 metrics (test) g_loss_s_embedder_test = tf.keras.metrics.Mean(name='e_loss_T0_test') g_loss_s_test = tf.keras.metrics.Mean(name='g_loss_s_test') g_loss_u_e_test = tf.keras.metrics.Mean(name='g_loss_u_e_test') d_loss_test = tf.keras.metrics.Mean(name='d_loss_test') grad_discriminator_ll = tf.keras.metrics.Mean( name='d_grad_lower_layer') # Step 3 gradients grad_discriminator_ul = tf.keras.metrics.Mean(name='d_grad_upper_layer') grad_generator_ll = tf.keras.metrics.Mean(name='g_grad_lower_layer') grad_generator_ul = tf.keras.metrics.Mean(name='g_grad_upper_layer') loss_object_accuracy = tf.keras.metrics.Accuracy() # To calculate accuracy # Create the loss object, optimizer, and training function loss_object = tf.keras.losses.MeanSquaredError( reduction=tf.keras.losses.Reduction.NONE) # Rename this to MSE loss_object_adversarial = tf.losses.BinaryCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE) # More stable # from_logits = True because the last dense layers is linear and # does not have an activation -- could be differently specified # Activate the optimizer using the Mirrored Strategy approach with mirrored_strategy.scope(): optimizer = tf.keras.optimizers.Adam( 0.01 ) # Possibly increase the learning rate to stir up the GAN training # Change the input dataset to be used by the mirrored strategy X_train = mirrored_strategy.experimental_distribute_dataset(X_train) X_test = mirrored_strategy.experimental_distribute_dataset(X_test) # Compute the loss according to the MirroredStrategy approach def compute_loss(real, regenerate): per_example_loss = loss_object(real, regenerate) return tf.nn.compute_average_loss(per_example_loss, global_batch_size=batch_size) # 1. Start with embedder training (Optimal LSTM auto encoder network) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def train_step_embedder(X_train): with tf.GradientTape() as tape: # Apply Embedder to data and Recovery to predicted hidden states e_pred_train = embedder_model(X_train) r_pred_train = recovery_model(e_pred_train) # Compute loss for LSTM autoencoder #R_loss_train = loss_object(X_train, r_pred_train) # Compute the loss for the LSTM autoencoder using MirroredStrategy R_loss_train = compute_loss(X_train, r_pred_train) # Compute the gradients with respect to the Embedder and Recovery vars gradients = tape.gradient( R_loss_train, embedder_model.trainable_variables + recovery_model.trainable_variables) # Apply the gradients to the Embedder and Recovery vars optimizer.apply_gradients( zip( gradients, # Always minimization function embedder_model.trainable_variables + recovery_model.trainable_variables)) # Record the lower and upper layer gradients + the MSE for the autoencoder grad_embedder_ll(tf.norm(gradients[1])) grad_embedder_ul(tf.norm(gradients[9])) grad_recovery_ll(tf.norm(gradients[12])) grad_recovery_ul(tf.norm(gradients[20])) #r_loss_train(R_loss_train) @tf.function() def distributed_train_step_embedder(X_train): per_replica_losses = mirrored_strategy.run(train_step_embedder, args=(X_train, )) R_loss_train = mirrored_strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) r_loss_train(R_loss_train) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def test_step_embedder(X_test): # Apply the Embedder to data and Recovery to predicted hidden states e_pred_test = embedder_model(X_test) r_pred_test = recovery_model(e_pred_test) # Compute the loss function for the LSTM autoencoder #R_loss_test = loss_object(X_test, r_pred_test) # Compute the loss function for the LSTM autoencoder using MirroredStrategy R_loss_test = compute_loss(X_test, r_pred_test) r_loss_test(R_loss_test) # Initialize the number of minibatches nr_mb_train = 0 # Train the embedder for the input data for epoch in range(load_epochs, load_epochs + 55): r_loss_train.reset_states() r_loss_test.reset_states() grad_embedder_ll.reset_states() grad_embedder_ul.reset_states() grad_recovery_ll.reset_states() grad_recovery_ul.reset_states() # Train over the complete train and test dataset for x_train in X_train: distributed_train_step_embedder(x_train) with summary_writer_bottom.as_default(): tf.summary.scalar( '1. Pre-training autoencoder/2. Gradient norm - embedder', grad_embedder_ll.result(), step=nr_mb_train) tf.summary.scalar( '1. Pre-training autoencoder/2. Gradient norm - recovery', grad_recovery_ll.result(), step=nr_mb_train) with summary_writer_top.as_default(): tf.summary.scalar( '1. Pre-training autoencoder/2. Gradient norm - embedder', grad_embedder_ul.result(), step=nr_mb_train, description=str(descr_auto_grads_embedder())) tf.summary.scalar( '1. Pre-training autoencoder/2. Gradient norm - recovery', grad_recovery_ul.result(), step=nr_mb_train, description=str(descr_auto_grads_recovery())) nr_mb_train += 1 for x_test in X_test: test_step_embedder(x_test) with summary_writer_train.as_default(): tf.summary.scalar('1. Pre-training autoencoder/1. Recovery loss', r_loss_train.result(), step=epoch) if epoch % 50 == 0: # Only log trainable variables per 10 epochs add_hist(embedder_model.trainable_variables, epoch) add_hist(recovery_model.trainable_variables, epoch) with summary_writer_test.as_default(): tf.summary.scalar('1. Pre-training autoencoder/1. Recovery loss', r_loss_test.result(), step=epoch, description=str(descr_auto_loss())) # Log the progress to the user console in python template = 'Autoencoder training: Epoch {}, Loss: {}, Test Loss: {}' print( template.format(epoch + 1, np.round(r_loss_train.result().numpy(), 5), np.round(r_loss_test.result().numpy(), 5))) print('Finished Embedding Network Training') # 2. Continue w/ supervisor training on real data (same temporal relations) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def train_step_supervised(X_train): with tf.GradientTape() as tape: # Apply Embedder to data and check temporal relations with supervisor e_pred_train = embedder_model(X_train) H_hat_supervise = supervisor_model(e_pred_train) # Compute squared loss for real embedding and supervised embedding #G_loss_S_train = loss_object(e_pred_train[:, 1:, :], # H_hat_supervise[:, 1:, :]) #tf.debugging.assert_non_negative(G_loss_S_train) # Compute the Supervisor model loss for the MirroredStrategy approach G_loss_S_train = compute_loss(e_pred_train[:, 1:, :], H_hat_supervise[:, 1:, :]) # Compute the gradients with respect to the Embedder and Recovery vars gradients = tape.gradient(G_loss_S_train, supervisor_model.trainable_variables) # Apply the gradients to the Embedder and Recovery vars optimizer.apply_gradients( zip( gradients, # Always minimization supervisor_model.trainable_variables)) # Record the lower and upper layer gradients + the MSE for the supervisor grad_supervisor_ll(tf.norm(gradients[1])) grad_supervisor_ul(tf.norm(gradients[6])) # g_loss_s_train(G_loss_S_train) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def distributed_train_step_supervised(X_train): per_replica_losses = mirrored_strategy.run(train_step_supervised, args=(X_train, )) G_loss_S_train = mirrored_strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) g_loss_s_train(G_loss_S_train) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def test_step_supervised(X_test): e_pred_test = embedder_model(X_test) H_hat_supervise_test = supervisor_model(e_pred_test) G_loss_S_test = loss_object(e_pred_test[:, 1:, :], H_hat_supervise_test[:, 1:, :]) g_loss_s_test(G_loss_S_test) # Initialize minibatch number nr_mb_train = 0 for epoch in range(load_epochs, load_epochs + 5): g_loss_s_train.reset_states() g_loss_s_test.reset_states() grad_supervisor_ll.reset_states() grad_supervisor_ul.reset_states() for x_train in X_train: distributed_train_step_supervised(x_train) with summary_writer_bottom.as_default(): tf.summary.scalar( '2. Pre-training supervisor/2. Gradient norm - supervisor', grad_supervisor_ll.result(), step=nr_mb_train) with summary_writer_top.as_default(): tf.summary.scalar( '2. Pre-training supervisor/2. Gradient norm - supervisor', grad_supervisor_ul.result(), step=nr_mb_train, description=str(descr_auto_grads_supervisor())) nr_mb_train += 1 for x_test in X_test: test_step_supervised(x_test) with summary_writer_train.as_default(): tf.summary.scalar('2. Pre-training supervisor/1. Supervised loss', g_loss_s_train.result(), step=epoch) if epoch % 10 == 0: # Only log trainable variables per 10 epochs add_hist(supervisor_model.trainable_variables, epoch) with summary_writer_test.as_default(): tf.summary.scalar('2. Pre-training supervisor/1. Supervised loss', g_loss_s_test.result(), step=epoch, description=str(descr_supervisor_loss())) template = 'Epoch {}, Train Loss: {}, Test loss: {}' print( template.format(epoch + 1, np.round(g_loss_s_train.result().numpy(), 8), np.round(g_loss_s_test.result().numpy(), 8))) print('Finished training with Supervised loss only') # 3. Continue with joint training @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64), tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64), tf.TensorSpec(shape=(), dtype=tf.bool), tf.TensorSpec(shape=(), dtype=tf.bool) ]) def train_step_jointly_generator(X_train, Z, graphing=False, wasserstein=False): if graphing: # Only used for creating the graph with tf.GradientTape() as tape: # We need these steps to make the graph in Tensorboard complete dummy1 = embedder_model(X_train) # Real embedding dummy2 = generator_model(Z) # Fake embedding dummy4 = recovery_model(tf.concat( [dummy1, dummy2], axis=0)) # Recovery from embedding dummy3 = supervisor_model(tf.concat( [dummy1, dummy2], axis=0)) # Supervisor on embedding dummy5 = discriminator_model( tf.concat([dummy1, dummy2], axis=0)) # Discriminator on embedding else: if wasserstein: with tf.GradientTape() as tape: H = embedder_model(X_train) x_tilde = recovery_model(H) # Apply Generator to Z and apply Supervisor on fake embedding E_hat = generator_model(Z) H_hat = supervisor_model(E_hat) recovery_hat = recovery_model(E_hat) Y_fake_e = discriminator_model.predict(E_hat) G_loss_U_e = -tf.reduce_mean(Y_fake_e) # 2. Generator - Supervised loss for fake embeddings G_loss_S = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :]) # Sum and multiply supervisor loss by eta for equal # contribution to generator loss function G_loss = G_loss_U_e + eta * G_loss_S #+ kappa * tf.add(G_loss_f1 , G_loss_f2) # Compute the gradients w.r.t. generator and supervisor model gradients_generator = tape.gradient( G_loss, generator_model.trainable_variables) # Apply the gradients to the generator model optimizer.apply_gradients( zip(gradients_generator, generator_model.trainable_variables)) else: with tf.GradientTape() as tape: H = embedder_model(X_train) x_tilde = recovery_model(H) # Apply Generator to Z and apply Supervisor on fake embedding E_hat = generator_model(Z) H_hat = supervisor_model(E_hat) recovery_hat = recovery_model(E_hat) # Compute real and fake probabilities using Discriminator model Y_fake_e = discriminator_model(E_hat) # 1. Generator - Adversarial loss - We want to trick Discriminator to give classification of 1 G_loss_U_e = loss_object_adversarial( tf.ones_like(Y_fake_e), Y_fake_e) # 2. Generator - Supervised loss for fake embeddings G_loss_S = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :]) #if dummy1.shape[0] != recovery_hat.shape[0]: # recovery_hat = recovery_hat[0:dummy1.shape[0], :, :] # # 3. Generator - Feature matching skewness and kurtosis # G_loss_f1 = tf.math.pow(tf.reduce_mean(scipy.stats.skew(x_tilde, axis = 1)) - # tf.reduce_mean(scipy.stats.skew(recovery_hat, axis = 1)), 2) # # 3. Generator - Feature matching skewness and kurtosis # G_loss_f2 = tf.math.pow(tf.reduce_mean(scipy.stats.kurtosis(x_tilde, axis = 1)) - # tf.reduce_mean(scipy.stats.kurtosis(recovery_hat, axis = 1)), 2) # Sum and multiply supervisor loss by eta for equal # contribution to generator loss function G_loss = G_loss_U_e + eta * G_loss_S #+ kappa * tf.add(G_loss_f1 , G_loss_f2) # Compute the gradients w.r.t. generator and supervisor model gradients_generator = tape.gradient( G_loss, generator_model.trainable_variables) # Apply the gradients to the generator model optimizer.apply_gradients( zip(gradients_generator, generator_model.trainable_variables)) # Record the lower and upper layer gradients + the MSE for the generator grad_generator_ll(tf.norm(gradients_generator[1])) grad_generator_ul(tf.norm(gradients_generator[9])) # Compute individual components of the generator loss g_loss_u_e(G_loss_U_e) g_loss_s( G_loss_S) # Based on this we can set the eta value in G_loss_S @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64), tf.TensorSpec(shape=(), dtype=tf.bool) ]) def test_step_jointly_generator(Z, wasserstein=False): E_hat = generator_model(Z) H_hat = supervisor_model(E_hat) if wasserstein: Y_fake_e = discriminator_model.predict(E_hat) G_loss_U_e_test = -tf.reduce_mean(Y_fake_e) else: Y_fake_e = discriminator_model(E_hat) G_loss_U_e_test = loss_object_adversarial(tf.ones_like(Y_fake_e), Y_fake_e) G_loss_S_test = loss_object(E_hat[:, 1:, :], H_hat[:, 1:, :]) g_loss_u_e_test(G_loss_U_e_test) g_loss_s_test(G_loss_S_test) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def train_step_jointly_embedder(X_train): with tf.GradientTape() as tape: # Apply Embedder to data and recover the data from the embedding space H = embedder_model(X_train) X_tilde = recovery_model(H) # Compute the loss function for the embedder-recovery model r_loss_train = loss_object(X_train, X_tilde) # Include the supervision loss but only for 10 % H_hat_supervise = supervisor_model(H) G_loss_S_embedder = loss_object(H[:, 1:, :], H_hat_supervise[:, 1:, :]) # Combine the two losses E_loss = r_loss_train + lambda_val * tf.sqrt(G_loss_S_embedder) # Compute the gradients with respect to the embedder-recovery model gradients_embedder = tape.gradient( E_loss, embedder_model.trainable_variables + recovery_model.trainable_variables) optimizer.apply_gradients( zip( gradients_embedder, embedder_model.trainable_variables + recovery_model.trainable_variables)) # Compute the embedding-recovery loss and supervisor loss e_loss_T0(r_loss_train) g_loss_s_embedder(G_loss_S_embedder) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64) ]) def test_step_jointly_embedder(X_test): H = embedder_model(X_test) X_tilde = recovery_model(H) E_loss_T0_test = loss_object(X_test, X_tilde) H_hat_supervise = supervisor_model(H) G_loss_S_embedder_test = loss_object(H[:, 1:, :], H_hat_supervise[:, 1:, :]) e_loss_T0_test(E_loss_T0_test) g_loss_s_embedder_test(G_loss_S_embedder_test) @tf.function() def gradient_penalty(real, fake): try: alpha = tf.random.uniform(shape=[real.shape[0], 20, hidden_dim], minval=0., maxval=1., dtype=tf.float64) interpolates = real + alpha * (fake - real) with tf.GradientTape() as tape: tape.watch(interpolates) probs = discriminator_model.predict(interpolates) gradients = tape.gradient(probs, interpolates) slopes = tf.sqrt( tf.math.reduce_sum(tf.square(gradients), axis=[1, 2])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) return gradient_penalty except: return tf.constant(0, dtype=tf.float16) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64), tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64), tf.TensorSpec(shape=(), dtype=tf.float16), tf.TensorSpec(shape=(), dtype=tf.bool) ]) def train_step_discriminator(X_train, Z, smoothing_factor=1, wasserstein=False): if wasserstein: # Use the Wasserstein Gradient penalty with tf.GradientTape() as tape: # Embeddings for real data and classifications from discriminator H = embedder_model(X_train) # Embeddings for fake data and classifications from discriminator E_hat = generator_model(Z) Y_real = discriminator_model.predict(H) Y_fake = discriminator_model.predict(E_hat) D_loss = tf.reduce_mean(Y_real) - tf.reduce_mean(Y_fake) D_loss += gamma * tf.cast(gradient_penalty(H, E_hat), tf.float16) # Compute the gradients with respect to the discriminator model grad_d = tape.gradient(D_loss, discriminator_model.trainable_variables) # Apply the gradient to the discriminator model optimizer.apply_gradients( zip( grad_d, # Minimize the Cross Entropy discriminator_model.trainable_variables)) # Record the lower and upper layer gradients + the MSE for the discriminator grad_discriminator_ll(tf.norm(grad_d[1])) grad_discriminator_ul(tf.norm(grad_d[9])) d_loss(D_loss) else: # Just normal Jensen-Shannon divergence with tf.GradientTape() as tape: # Embeddings for real data and classifications from discriminator H = embedder_model(X_train) # Embeddings for fake data and classifications from discriminator E_hat = generator_model(Z) Y_real = discriminator_model( H) # From logits instead of probs for numerical stability Y_fake_e = discriminator_model(E_hat) D_loss_real = loss_object_adversarial( tf.ones_like(Y_real) * smoothing_factor, Y_real) D_loss_fake_e = loss_object_adversarial( tf.zeros_like(Y_fake_e), Y_fake_e) D_loss = D_loss_real + D_loss_fake_e # Compute the gradients with respect to the discriminator model grad_d = tape.gradient(D_loss, discriminator_model.trainable_variables) # Apply the gradient to the discriminator model optimizer.apply_gradients( zip( grad_d, # Minimize the Cross Entropy discriminator_model.trainable_variables)) # Record the lower and upper layer gradients + the MSE for the discriminator grad_discriminator_ll(tf.norm(grad_d[1])) grad_discriminator_ul(tf.norm(grad_d[9])) d_loss(D_loss) @tf.function(input_signature=[ tf.TensorSpec(shape=(None, 20, 11), dtype=tf.float64), tf.TensorSpec(shape=(None, 20, hidden_dim), dtype=tf.float64), tf.TensorSpec(shape=(), dtype=tf.bool) ]) def test_step_discriminator(X_test, Z, wasserstein=False): H = embedder_model(X_test) E_hat = generator_model(Z) if wasserstein: # Use the Wasserstein Gradient penalty Y_real = discriminator_model.predict(H) Y_fake = discriminator_model.predict(E_hat) D_loss_test = tf.reduce_mean(Y_fake) - tf.reduce_mean(Y_real) D_loss_test += gamma * gradient_penalty(H, E_hat) else: Y_real = discriminator_model( H) # From logits instead of probs for numerical stability Y_fake_e = discriminator_model(E_hat) D_loss_real = loss_object_adversarial(tf.ones_like(Y_real), Y_real) D_loss_fake_e = loss_object_adversarial(tf.zeros_like(Y_fake_e), Y_fake_e) D_loss_test = D_loss_real + D_loss_fake_e d_loss_test(D_loss_test) def evaluate_accuracy(X_test, Z): Y_real_test = (discriminator_model.predict( embedder_model(X_test)).numpy() > 0.5) * 1 Y_fake_test = (discriminator_model.predict(Z).numpy() > 0.5) * 1 # Compute the loss D_accuracy_real = loss_object_accuracy(tf.ones_like(Y_real_test), Y_real_test).numpy() D_accuracy_fake = loss_object_accuracy(tf.zeros_like(Y_fake_test), Y_fake_test).numpy() return D_accuracy_real, D_accuracy_fake # Helper counter for the already performed epochs already_done_epochs = epoch # Define the algorithm for training jointly print('Start joint training') nr_mb_train = 0 # Iterator for generator training o = -1 # Iterator for discriminator training tf.summary.trace_on(graph=False, profiler=True) # Initialize the profiler for epoch in range(load_epochs, iterations + load_epochs): g_loss_u_e.reset_states() # Reset the loss at every epoch g_loss_s.reset_states() e_loss_T0.reset_states() g_loss_s_embedder.reset_states() d_loss.reset_states() d_loss_test.reset_states() # This for loop is GENERATOR TRAINING # Create 1 generator and embedding training iters. if epoch == 0 and o == -1: # Train the generator and embedder sequentially for x_train in X_train: Z_mb = tf.cast(RandomGenerator(batch_size, [20, hidden_dim]), tf.float32) train_step_jointly_generator( x_train, Z_mb, graphing=tf.constant(True, dtype=tf.bool), wasserstein=tf.constant(True, dtype=tf.bool)) train_step_jointly_embedder(x_train) with summary_writer_bottom.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - generator', grad_generator_ll.result(), step=nr_mb_train) with summary_writer_top.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - generator', grad_generator_ul.result(), step=nr_mb_train, description=str(descr_joint_grad_generator())) nr_mb_train += 1 for x_test in X_test: Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) test_step_jointly_generator(Z_mb) test_step_jointly_embedder(x_test) with summary_writer_test.as_default( ): # Get autoencoder loss for recovery and # Log autoencoder + supervisor losses tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Recovery loss', e_loss_T0_test.result(), step=nr_mb_train) tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Supervised loss', g_loss_s_embedder_test.result(), step=nr_mb_train) o += 1 else: # Train the generator and embedder sequentially for x_train in X_train: Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) train_step_jointly_generator( x_train, Z_mb, graphing=tf.constant(False, dtype=tf.bool), wasserstein=tf.constant(True, dtype=tf.bool)) train_step_jointly_embedder( x_train) # Possibility to double the embedder training with summary_writer_bottom.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - generator', grad_generator_ll.result(), step=nr_mb_train) with summary_writer_top.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - generator', grad_generator_ul.result(), step=nr_mb_train, description=str(descr_joint_grad_generator())) with summary_writer_train.as_default( ): # Get autoencoder loss for recovery and # Log autoencoder + supervisor losses tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Recovery loss', e_loss_T0.result(), step=nr_mb_train) tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Supervised loss', g_loss_s_embedder.result(), step=nr_mb_train) nr_mb_train += 1 for x_test in X_test: Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) test_step_jointly_generator(Z_mb) test_step_jointly_embedder(x_test) with summary_writer_test.as_default( ): # Get autoencoder loss for recovery and # Log autoencoder + supervisor losses tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Recovery loss', e_loss_T0_test.result(), step=nr_mb_train) tf.summary.scalar( '3. TimeGAN training - Autoencoder/1. Supervised loss', g_loss_s_embedder_test.result(), step=nr_mb_train) print('Generator update') # This for loop is DISCRIMINATOR TRAINING - Train discriminator if too bad or at initialization (0.0) i = 0 while i < 5: # Train d to optimum (Jensen-Shannon divergence) for x_train in X_train: # Train discriminator max 5 iterations or stop if optimal discriminator Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) train_step_discriminator( x_train, Z_mb, smoothing_factor=tf.constant(1.0, dtype=tf.float16), wasserstein=tf.constant(True, dtype=tf.bool)) with summary_writer_top.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - discriminator', grad_discriminator_ul.result(), step=o, description=str(descr_joint_grad_discriminator())) with summary_writer_bottom.as_default(): tf.summary.scalar( '3. TimeGAN training - GAN/3. Gradient norm - discriminator', grad_discriminator_ll.result(), step=o) o += 1 for x_test in X_test: Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) test_step_discriminator(x_test, Z_mb, wasserstein=tf.constant(False, dtype=tf.bool)) print('Discriminator update') i += 1 # Use when using Wasserstein loss #if tf.math.abs(d_loss.result()) > 5 or o > current_o + 5: # Standard to do 5 discriminator iterations # break # Breaks the while loop #if d_loss.result() < 0.15 or o > current_o + 5: # Use when using sigmoid cross-entropy loss # break # Compute the test accuracy acc_real_array = np.array([]) acc_fake_array = np.array([]) for x_test in X_test: Z_mb = RandomGenerator(batch_size, [20, hidden_dim]) acc_real, acc_fake = evaluate_accuracy(x_test, Z_mb) acc_real_array = np.append(acc_real_array, acc_real) acc_fake_array = np.append(acc_fake_array, acc_fake) with summary_writer_train.as_default(): # Log GAN + supervisor losses tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss', d_loss.result(), step=epoch, description=str(descr_generator_loss_joint())) tf.summary.scalar('3. TimeGAN training - GAN/1. Supervised loss', g_loss_s.result(), step=epoch, description=str(descr_supervisor_loss_joint())) #with summary_writer_lower_bound.as_default(): # tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss', # -2*np.log(2), step=epoch) # Only use when sigmoid cross-entropy is enabled with summary_writer_test.as_default(): # Log GAN + supervisor losses tf.summary.scalar('3. TimeGAN training - GAN/1. Unsupervised loss', d_loss_test.result(), step=epoch) tf.summary.scalar('3. TimeGAN training - GAN/1. Supervised loss', g_loss_s_test.result(), step=epoch) with summary_writer_real_data.as_default(): tf.summary.scalar('3. TimeGAN training - GAN/2. Accuracy', tf.reduce_mean(acc_real_array), step=epoch) with summary_writer_fake_data.as_default(): tf.summary.scalar('3. TimeGAN training - GAN/2. Accuracy', tf.reduce_mean(acc_fake_array), step=epoch, description=str(descr_accuracy_joint())) # Only log the weights of the model per 10 epochs if epoch % 10 == 0: # Add variables to histogram and distribution # Pre-trained models add_hist(embedder_model.trainable_variables, epoch + already_done_epochs) add_hist(recovery_model.trainable_variables, epoch + already_done_epochs) add_hist(supervisor_model.trainable_variables, epoch + already_done_epochs) # Not pre-trained models add_hist(generator_model.trainable_variables, epoch) add_hist(discriminator_model.trainable_variables, epoch) if epoch % 50 == 0 and epoch != 0: # It takes around an hour to do 10 epochs # Lastly save all models embedder_model.save_weights( 'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/embedder/epoch_' + str(epoch)) recovery_model.save_weights( 'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/recovery/epoch_' + str(epoch)) supervisor_model.save_weights( 'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/supervisor/epoch_' + str(epoch)) generator_model.save_weights( 'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/generator/epoch_' + str(epoch)) discriminator_model.save_weights( 'C:/Users/s157148/Documents/GitHub/TimeGAN/weights/WGAN/discriminator/epoch_' + str(epoch)) # Convert the model into interpretable simulations and Nearest-Neighbour comparisons figure = image_grid(1000, 20, 4, recovery_model, generator_model) figure.canvas.draw() w, h = figure.canvas.get_width_height() img = np.fromstring(figure.canvas.tostring_rgb(), dtype=np.uint8, sep='') img = img.reshape((1, h, w, 3)) with summary_writer_train.as_default(): tensor = tf.constant(img) tf.summary.image( str("Simulations & nearest neighbour (green) after " + str(epoch) + " training iterations"), tensor, step=epoch, description=str(descr_images())) print('step: ' + str(epoch + 1) + ', g_loss_u_e: ' + str(np.round(g_loss_u_e.result().numpy(), 8)) + ', g_loss_s: ' + str(np.round(g_loss_s.result().numpy(), 8)) + ', g_loss_s_embedder: ' + str(np.round(g_loss_s_embedder.result().numpy(), 8)) + ', e_loss_t0: ' + str(np.round(e_loss_T0.result().numpy(), 8)) + ', d_loss: ' + str(np.round(d_loss.result().numpy(), 8))) tf.summary.trace_export(name="model_trace", step=0, profiler_outdir=log_dir) print('Finish joint training')
def __init__(self): # ----------------------------------- # global # ----------------------------------- np.random.seed(Settings.SEED) torch.manual_seed(Settings.SEED) random.seed(Settings.SEED) if torch.cuda.is_available(): torch.cuda.manual_seed(Settings.SEED) self.device = torch.device("cuda") else: self.device = torch.device("cpu") # ----------------------------------- # model # ----------------------------------- self.generator = Generator(in_c=Settings.IN_CHANNEL, out_c=Settings.OUT_CHANNEL, ngf=Settings.NGF).to(self.device) self.generator.apply(self.generator.weights_init) self.discriminator = Discriminator( in_c=Settings.IN_CHANNEL, out_c=Settings.OUT_CHANNEL, ndf=Settings.NDF, n_layers=Settings.DISCRIMINATOR_LAYER).to(self.device) self.discriminator.apply(self.discriminator.weights_init) print("model init done") # ----------------------------------- # data # ----------------------------------- train_transforms = transforms.Compose([ transforms.Resize((Settings.INPUT_SIZE, Settings.INPUT_SIZE)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) data_prepare = get_dataloader( dataset_name=Settings.DATASET, batch_size=Settings.BATCH_SIZE, data_root=Settings.DATASET_ROOT, train_num_workers=Settings.TRAIN_NUM_WORKERS, transforms=train_transforms, val_num_workers=Settings.TEST_NUM_WORKERS) self.train_dataloader = data_prepare.train_dataloader self.test_dataloader = data_prepare.test_dataloader print("data init done.....") # ----------------------------------- # optimizer and criterion # ----------------------------------- self.optimG = optim.Adam([{ "params": self.generator.parameters() }], lr=Settings.G_LR, betas=Settings.G_BETAS) self.optimD = optim.Adam([{ "params": self.discriminator.parameters() }], lr=Settings.D_LR, betas=Settings.D_BETAS) self.criterion_l1loss = nn.L1Loss() self.criterion_BCE = nn.BCELoss() print("optimizer and criterion init done.....") # ----------------------------------- # recorder # ----------------------------------- self.recorder = { "errD_fake": list(), "errD_real": list(), "errG_l1loss": list(), "errG_bce": list(), "errG": list(), "accD": list() } output_file = time.strftime( "{}_{}_%Y_%m_%d_%H_%M_%S".format("pix2pix", Settings.DATASET), time.localtime()) self.output_root = os.path.join(Settings.OUTPUT_ROOT, output_file) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_MODEL_KEY)) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_LOG_KEY)) os.makedirs(os.path.join(self.output_root, Settings.OUTPUT_IMAGE_KEY)) print("recorder init done.....")
def train_SRGAN(): global_step = tf.train.get_or_create_global_step() # read input batch with tf.device('/cpu:0'): imgs_LR, imgs_HR = inputs2(False, FLAGS.batch_size) ################################################## # GENERATOR - SR IMAGE created # ################################################## generator = Generator() imgs_SR = generator.fit(imgs_LR, train=True, reuse=False) # variables for generator (SRResNet) if FLAGS.load_gen and not FLAGS.load_disc: variables_to_restore_srgan = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') srgan_saver = tf.train.Saver(variables_to_restore_srgan) # Display the images in the tensorboard. tf.summary.image( 'images_LR', tf.image.resize_images( imgs_LR, [FLAGS.image_size * 4, FLAGS.image_size * 4])) # Bilinear tf.summary.image('images_HR', imgs_HR) tf.summary.image('images_SR', imgs_SR) ########################################### # DISCRIMINATOR - train # ########################################### discriminator = Discriminator() with tf.name_scope('discriminator_HR'): logit_HR, probab_HR = discriminator.fit(imgs_HR, train=True, reuse=False) with tf.name_scope('discriminator_SR'): logit_SR, probab_SR = discriminator.fit(imgs_SR, train=True, reuse=True) if FLAGS.load_gen and FLAGS.load_disc: variables_to_restore_srgan = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') + \ tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='dicriminator') srgan_saver = tf.train.Saver(variables_to_restore_srgan) disc_loss = discriminator.adversarial_loss(logit_HR=probab_HR, logit_SR=probab_SR) global_step, disc_train_op = discriminator.train2(disc_loss, global_step) ########################################### # GENERATOR - train # ########################################### with tf.control_dependencies([disc_train_op, disc_loss ]): # ensure that disc has done one step adversarial_loss = generator.adversarial_loss(probab_SR) if FLAGS.load_vgg: content_loss = generator.vgg_loss(imgs_HR, imgs_SR) content_loss_type = 'vgg' gen_loss = FLAGS.vgg_loss_scale * content_loss + FLAGS.adversarial_loss_scale * adversarial_loss else: content_loss = generator.pixelwise_mse_loss(imgs_HR, imgs_SR) content_loss_type = 'mse' gen_loss = content_loss + FLAGS.adversarial_loss_scale * adversarial_loss _, gen_train_op = generator.train2(gen_loss, global_step, gs_update=False) # No update to gs # variables for VGG19 if FLAGS.load_vgg: variables_to_restore_vgg = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='vgg_19') vgg_saver = tf.train.Saver(variables_to_restore_vgg) saver = tf.train.Saver() ########################################### # SUMMARIES # ########################################### # Moving average on loss exp_averager = tf.train.ExponentialMovingAverage(decay=0.99) losses_list = [disc_loss, content_loss, adversarial_loss, gen_loss] update_loss = exp_averager.apply(losses_list) disc_loss_avg, content_loss_avg, adversarial_loss_avg, gen_loss_avg = \ [exp_averager.average(var) for var in losses_list] tf.summary.scalar('discriminator_loss', disc_loss_avg) tf.summary.scalar('gen_{0}_loss'.format(content_loss_type), content_loss_avg) tf.summary.scalar('gen_adversarial_loss', adversarial_loss_avg) tf.summary.scalar('generator_loss', gen_loss_avg) # Merge all summary inforation. summary = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # Load pretrained model if FLAGS.load_gen and FLAGS.load_disc: print('Loading weights for SRGAN generator and discriminator...') srgan_saver.restore( sess, tf.train.latest_checkpoint( os.path.join(FLAGS.pretrained_models, 'srgan'))) elif FLAGS.load_gen: print('Loading weights for SRGAN generator...') srgan_saver.restore( sess, tf.train.latest_checkpoint( os.path.join(FLAGS.pretrained_models, 'srresnet'))) if FLAGS.load_vgg: print('Loading weights for VGG19..') vgg_saver.restore( sess, tf.train.latest_checkpoint( os.path.join(FLAGS.pretrained_models, 'vgg19'))) print('Starting training procedure...') start = time.time() for it in range(FLAGS.n_iter): gs, _, d_loss, _, g_loss, _, summ = sess.run([ global_step, update_loss, disc_loss_avg, disc_train_op, gen_loss_avg, gen_train_op, summary ]) if it % FLAGS.log_freq == 0 and it > 0: t = (time.time() - start) print('{0} iter, gen_loss: {1}, disc_loss: {2}, img/sec: {3}'. format(gs, g_loss, d_loss, FLAGS.log_freq * FLAGS.batch_size / t)) summary_writer.add_summary(summ, gs) summary_writer.flush() start = time.time() if it % FLAGS.ckpt_freq == 0 and it > 0: saver.save(sess, FLAGS.checkpoint_dir, global_step=gs) coord.request_stop() coord.join(threads)
def __init__(self, input_data_path, output_model_folder, decode_mols_save_path='', n_epochs=200, starting_epoch=1, batch_size=64, lr=0.0002, b1=0.5, b2=0.999, n_critic=5, sample_interval=10, save_interval=100, sample_after_training=100, message=""): self.message = message # init params self.input_data_path = input_data_path self.output_model_folder = output_model_folder self.n_epochs = n_epochs self.starting_epoch = starting_epoch self.batch_size = batch_size self.lr = lr self.b1 = b1 self.b2 = b2 self.n_critic = n_critic self.sample_interval = sample_interval self.save_interval = save_interval self.sample_after_training = sample_after_training self.decode_mols_save_path = decode_mols_save_path # initialize dataloader json_smiles = open(self.input_data_path, "r") latent_space_mols = np.array(json.load(json_smiles)) latent_space_mols = latent_space_mols.reshape( latent_space_mols.shape[0], 512) self.dataloader = torch.utils.data.DataLoader( LatentMolsDataset(latent_space_mols), shuffle=True, batch_size=self.batch_size) # load discriminator discriminator_name = 'discriminator.txt' if self.starting_epoch == 1 else str( self.starting_epoch - 1) + '_discriminator.txt' discriminator_path = os.path.join(output_model_folder, discriminator_name) self.D = Discriminator.load(discriminator_path) # load generator generator_name = 'generator.txt' if self.starting_epoch == 1 else str( self.starting_epoch - 1) + '_generator.txt' generator_path = os.path.join(output_model_folder, generator_name) self.G = Generator.load(generator_path) # initialize sampler self.Sampler = Sampler(self.G) # initialize optimizer self.optimizer_G = torch.optim.Adam(self.G.parameters(), lr=self.lr, betas=(self.b1, self.b2)) self.optimizer_D = torch.optim.Adam(self.D.parameters(), lr=self.lr, betas=(self.b1, self.b2)) # Tensor cuda = True if torch.cuda.is_available() else False if cuda: self.G.cuda() self.D.cuda() self.Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
def main(args): # Step0 ==================================================================== # Set GPU ids os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids # Set the file name format FILE_NAME_FORMAT = "{0}_{1}_{2}_{3:d}{4}".format(args.model, args.dataset, args.loss, args.epochs, args.flag) # Set the results file path RESULT_FILE_NAME = FILE_NAME_FORMAT + '_results.pkl' RESULT_FILE_PATH = os.path.join(RESULT_PATH, RESULT_FILE_NAME) # Set the checkpoint file path CHECKPOINT_FILE_NAME = FILE_NAME_FORMAT + '.ckpt' CHECKPOINT_FILE_PATH = os.path.join(CHECKPOINT_PATH, CHECKPOINT_FILE_NAME) BEST_CHECKPOINT_FILE_NAME = FILE_NAME_FORMAT + '_best.ckpt' BEST_CHECKPOINT_FILE_PATH = os.path.join(CHECKPOINT_PATH, BEST_CHECKPOINT_FILE_NAME) # Set the random seed same for reproducibility random.seed(190811) torch.manual_seed(190811) torch.cuda.manual_seed_all(190811) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Step1 ==================================================================== # Load dataset train_dataloader = CycleGAN_Dataloader(name=args.dataset, num_workers=args.num_workers) test_dataloader = CycleGAN_Dataloader(name=args.dataset, train=False, num_workers=args.num_workers) print('==> DataLoader ready.') # Step2 ==================================================================== # Make the model if args.dataset == 'cityscapes': A_generator = Generator(num_resblock=6) B_generator = Generator(num_resblock=6) A_discriminator = Discriminator() B_discriminator = Discriminator() else: A_generator = Generator(num_resblock=9) B_generator = Generator(num_resblock=9) A_discriminator = Discriminator() B_discriminator = Discriminator() # Check DataParallel available if torch.cuda.device_count() > 1: A_generator = nn.DataParallel(A_generator) B_generator = nn.DataParallel(B_generator) A_discriminator = nn.DataParallel(A_discriminator) B_discriminator = nn.DataParallel(B_discriminator) # Check CUDA available if torch.cuda.is_available(): A_generator.cuda() B_generator.cuda() A_discriminator.cuda() B_discriminator.cuda() print('==> Model ready.') # Step3 ==================================================================== # Set each loss function criterion_GAN = nn.MSELoss() criterion_cycle = nn.L1Loss() criterion_identity = nn.L1Loss() criterion_feature = nn.L1Loss() # Set each optimizer optimizer_G = optim.Adam(itertools.chain(A_generator.parameters(), B_generator.parameters()), lr=args.lr, betas=(0.5, 0.999)) optimizer_D = optim.Adam(itertools.chain(A_discriminator.parameters(), B_discriminator.parameters()), lr=args.lr, betas=(0.5, 0.999)) # Set learning rate scheduler def lambda_rule(epoch): epoch_decay = args.epochs / 2 lr_linear_scale = 1.0 - max(0, epoch + 1 - epoch_decay) \ / float(epoch_decay+ 1) return lr_linear_scale scheduler_G = lr_scheduler.LambdaLR(optimizer_G, lr_lambda=lambda_rule) scheduler_D = lr_scheduler.LambdaLR(optimizer_D, lr_lambda=lambda_rule) print('==> Criterion and optimizer ready.') # Step4 ==================================================================== # Train and validate the model start_epoch = 0 best_metric = float("inf") # Initialize the result lists train_loss_G = [] train_loss_D_A = [] train_loss_D_B = [] # Set image buffer A_buffer = ImageBuffer(args.buffer_size) B_buffer = ImageBuffer(args.buffer_size) if args.resume: assert os.path.exists(CHECKPOINT_FILE_PATH), 'No checkpoint file!' checkpoint = torch.load(CHECKPOINT_FILE_PATH) A_generator.load_state_dict(checkpoint['A_generator_state_dict']) B_generator.load_state_dict(checkpoint['B_generator_state_dict']) A_discriminator.load_state_dict( checkpoint['A_discriminator_state_dict']) B_discriminator.load_state_dict( checkpoint['B_discriminator_state_dict']) optimizer_G.load_state_dict(checkpoint['optimizer_G_state_dict']) optimizer_D.load_state_dict(checkpoint['optimizer_D_state_dict']) scheduler_G.load_state_dict(checkpoint['scheduler_G_state_dict']) scheduler_D.load_state_dict(checkpoint['scheduler_D_state_dict']) start_epoch = checkpoint['epoch'] train_loss_G = checkpoint['train_loss_G'] train_loss_D_A = checkpoint['train_loss_D_A'] train_loss_D_B = checkpoint['train_loss_D_B'] best_metric = checkpoint['best_metric'] # Save the training information result_data = {} result_data['model'] = args.model result_data['dataset'] = args.dataset result_data['loss'] = args.loss result_data['target_epoch'] = args.epochs result_data['batch_size'] = args.batch_size # Check the directory of the file path if not os.path.exists(os.path.dirname(RESULT_FILE_PATH)): os.makedirs(os.path.dirname(RESULT_FILE_PATH)) if not os.path.exists(os.path.dirname(CHECKPOINT_FILE_PATH)): os.makedirs(os.path.dirname(CHECKPOINT_FILE_PATH)) print('==> Train ready.') for epoch in range(args.epochs): # strat after the checkpoint epoch if epoch < start_epoch: continue print("\n[Epoch: {:3d}/{:3d}]".format(epoch + 1, args.epochs)) epoch_time = time.time() #======================================================================= # train and validate the model tloss_G, tloss_D = train( train_dataloader, A_generator, B_generator, A_discriminator, B_discriminator, criterion_GAN, criterion_cycle, criterion_identity, optimizer_G, optimizer_D, A_buffer, B_buffer, args.loss, args.lambda_cycle, args.lambda_identity, criterion_feature, args.lambda_feature, args.attention) train_loss_G.append(tloss_G) train_loss_D_A.append(tloss_D['A']) train_loss_D_B.append(tloss_D['B']) if (epoch + 1) % 10 == 0: val(test_dataloader, A_generator, B_generator, A_discriminator, B_discriminator, epoch + 1, FILE_NAME_FORMAT, args.attention) # Update the optimizer's learning rate current_lr = optimizer_G.param_groups[0]['lr'] scheduler_G.step() scheduler_D.step() #======================================================================= current = time.time() # Save the current result result_data['current_epoch'] = epoch result_data['train_loss_G'] = train_loss_G result_data['train_loss_D_A'] = train_loss_D_A result_data['train_loss_D_B'] = train_loss_D_B # Save result_data as pkl file with open(RESULT_FILE_PATH, 'wb') as pkl_file: pickle.dump(result_data, pkl_file, protocol=pickle.HIGHEST_PROTOCOL) # Save the best checkpoint # if train_loss_G < best_metric: # best_metric = train_loss_G # torch.save({ # 'epoch': epoch+1, # 'A_generator_state_dict': A_generator.state_dict(), # 'B_generator_state_dict': B_generator.state_dict(), # 'A_discriminator_state_dict': A_discriminator.state_dict(), # 'B_discriminator_state_dict': B_discriminator.state_dict(), # 'optimizer_G_state_dict': optimizer_G.state_dict(), # 'optimizer_D_state_dict': optimizer_D.state_dict(), # 'scheduler_G_state_dict': scheduler_G.state_dict(), # 'scheduler_D_state_dict': scheduler_D.state_dict(), # 'train_loss_G': train_loss_G, # 'train_loss_D_A': train_loss_D_A, # 'train_loss_D_B': train_loss_D_B, # 'best_metric': best_metric, # }, BEST_CHECKPOINT_FILE_PATH) # Save the current checkpoint torch.save( { 'epoch': epoch + 1, 'A_generator_state_dict': A_generator.state_dict(), 'B_generator_state_dict': B_generator.state_dict(), 'A_discriminator_state_dict': A_discriminator.state_dict(), 'B_discriminator_state_dict': B_discriminator.state_dict(), 'optimizer_G_state_dict': optimizer_G.state_dict(), 'optimizer_D_state_dict': optimizer_D.state_dict(), 'scheduler_G_state_dict': scheduler_G.state_dict(), 'scheduler_D_state_dict': scheduler_D.state_dict(), 'train_loss_G': train_loss_G, 'train_loss_D_A': train_loss_D_A, 'train_loss_D_B': train_loss_D_B, 'best_metric': best_metric, }, CHECKPOINT_FILE_PATH) if (epoch + 1) % 10 == 0: CHECKPOINT_FILE_NAME_epoch = FILE_NAME_FORMAT + '_{0}.ckpt' CHECKPOINT_FILE_PATH_epoch = os.path.join( CHECKPOINT_PATH, FILE_NAME_FORMAT, CHECKPOINT_FILE_NAME_epoch) if not os.path.exists(os.path.dirname(CHECKPOINT_FILE_PATH_epoch)): os.makedirs(os.path.dirname(CHECKPOINT_FILE_PATH_epoch)) torch.save( { 'epoch': epoch + 1, 'A_generator_state_dict': A_generator.state_dict(), 'B_generator_state_dict': B_generator.state_dict(), 'A_discriminator_state_dict': A_discriminator.state_dict(), 'B_discriminator_state_dict': B_discriminator.state_dict(), 'optimizer_G_state_dict': optimizer_G.state_dict(), 'optimizer_D_state_dict': optimizer_D.state_dict(), 'scheduler_G_state_dict': scheduler_G.state_dict(), 'scheduler_D_state_dict': scheduler_D.state_dict(), 'train_loss_G': train_loss_G, 'train_loss_D_A': train_loss_D_A, 'train_loss_D_B': train_loss_D_B, 'best_metric': best_metric, }, CHECKPOINT_FILE_PATH_epoch) # Print the information on the console print("model : {}".format(args.model)) print("dataset : {}".format(args.dataset)) print("loss : {}".format(args.loss)) print("batch_size : {}".format(args.batch_size)) print("current lrate : {:f}".format(current_lr)) print("G loss : {:f}".format(tloss_G)) print("D A/B loss : {:f}/{:f}".format( tloss_D['A'], tloss_D['B'])) print("epoch time : {0:.3f} sec".format(current - epoch_time)) print("Current elapsed time : {0:.3f} sec".format(current - start)) print('==> Train done.') print(' '.join(['Results have been saved at', RESULT_FILE_PATH])) print(' '.join(['Checkpoints have been saved at', CHECKPOINT_FILE_PATH]))
def fix_model_state_dict(state_dict): new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k if name.startswith('module.'): name = name[7:] # remove 'module.' of dataparallel new_state_dict[name] = v return new_state_dict #torch.manual_seed(44) os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" device = "cuda" if torch.cuda.is_available() else "cpu" G = Generator(z_dim=20, image_size=64) D = Discriminator(z_dim=20, image_size=64) '''-------load weights-------''' G_load_weights = torch.load('./checkpoints/G_AnoGAN_300.pth') G.load_state_dict(fix_model_state_dict(G_load_weights)) D_load_weights = torch.load('./checkpoints/D_AnoGAN_300.pth') D.load_state_dict(fix_model_state_dict(D_load_weights)) G.to(device) D.to(device) """use GPU in parallel""" if device == 'cuda': G = torch.nn.DataParallel(G) D = torch.nn.DataParallel(D) print("parallel mode")
def visualize_test_images(ckpt_list): #=========================================================================== for ckpt_name in ckpt_list: try: # Step0 ============================================================ # Parsing the hyper-parameters FILE_NAME_FORMAT = ckpt_name.split('.')[0] parsing_list = ckpt_name.split('.')[0].split('_') # Setting constants model_name = parsing_list[0] dataset_name = parsing_list[1] loss_type = parsing_list[2] flag = parsing_list[-1] if 'attention' in flag: attention = True else: attention = False # Step1 ============================================================ # Load dataset test_dataloader = CycleGAN_Dataloader(name=dataset_name, train=False, num_workers=8) print('==> DataLoader ready.') # Step2 ============================================================ # Make the model if dataset_name == 'cityscapes': A_generator = Generator(num_resblock=6) B_generator = Generator(num_resblock=6) A_discriminator = Discriminator() B_discriminator = Discriminator() else: A_generator = Generator(num_resblock=9) B_generator = Generator(num_resblock=9) A_discriminator = Discriminator() B_discriminator = Discriminator() # Check DataParallel available if torch.cuda.device_count() > 1: A_generator = nn.DataParallel(A_generator) B_generator = nn.DataParallel(B_generator) A_discriminator = nn.DataParallel(A_discriminator) B_discriminator = nn.DataParallel(B_discriminator) # Check CUDA available if torch.cuda.is_available(): A_generator.cuda() B_generator.cuda() A_discriminator.cuda() B_discriminator.cuda() print('==> Model ready.') # Step3 ============================================================ # Test the model checkpoint = torch.load(os.path.join(CHECKPOINT_PATH, ckpt_name)) A_generator.load_state_dict(checkpoint['A_generator_state_dict']) B_generator.load_state_dict(checkpoint['B_generator_state_dict']) A_discriminator.load_state_dict(checkpoint['A_discriminator_state_dict']) B_discriminator.load_state_dict(checkpoint['B_discriminator_state_dict']) train_epoch = checkpoint['epoch'] val(test_dataloader, A_generator, B_generator, A_discriminator, B_discriminator, train_epoch, FILE_NAME_FORMAT, attention) #------------------------------------------------------------------- # Print the result on the console print("model : {}".format(model_name)) print("dataset : {}".format(dataset_name)) print("loss : {}".format(loss_type)) print('-'*50) except Exception as e: print(e) print('==> Visualize test images done.')