def train(model_config, train_config): mode = 'train' dataset = ShakespeareModern(train_shakespeare_path, test_shakespeare_path, train_modern_path, test_modern_path, mode=mode) dataloader = DataLoader(dataset, batch_size=train_config['batch_size'], shuffle=False) print(dataset.domain_A_max_len) shakespeare_disc = Discriminator(model_config['embedding_size'], model_config['hidden_dim'], len(dataset.vocab), batch_size=train_config['batch_size']).cuda() shakespeare_disc.train() if train_config['continue_train']: shakespeare_disc.load_state_dict(torch.load(train_config['model_path'])) criterion = nn.BCELoss().cuda() optimizer = torch.optim.Adam(shakespeare_disc.parameters(), lr=train_config['base_lr'], weight_decay=1e-5) real_label = torch.ones((train_config['batch_size'], 1)).cuda() fake_label = torch.zeros((train_config['batch_size'], 1)).cuda() for epoch in range(train_config['num_epochs']): for idx, (s, s_addn_feats, m, m_addn_feats) in tqdm(enumerate(dataloader)): s = s.transpose(0, 1) m = m.transpose(0, 1) s = Variable(s).cuda() s_output = shakespeare_disc(s, s_addn_feats) s_loss = criterion(s_output, real_label) s_loss = 100 * s_loss optimizer.zero_grad() s_loss.backward() optimizer.step() shakespeare_disc.hidden = shakespeare_disc.init_hidden() m = Variable(m).cuda() m_output = shakespeare_disc(m, m_addn_feats) m_loss = criterion(m_output, fake_label) m_loss = 100 * m_loss optimizer.zero_grad() m_loss.backward() optimizer.step() shakespeare_disc.hidden = shakespeare_disc.init_hidden() if idx % 100 == 0: print('\tepoch [{}/{}], iter: {}, s_loss: {:.4f}, m_loss: {:.4f}, preds: s: {}, {}, m: {}, {}' .format(epoch+1, train_config['num_epochs'], idx, s_loss.item(), m_loss.item(), s_output.item(), round(s_output.item()), m_output.item(), round(m_output.item()))) print('\tepoch [{}/{}]'.format(epoch+1, train_config['num_epochs'])) torch.save(shakespeare_disc.state_dict(), './shakespeare_disc.pth')
## results_dir: optimizer_str = 'adam_lr%s_wd%s' % (args.lr, args.wd) loss_str = '' results_dir = os.path.join('cp_finetune_results', args.dataset, args.task, args.base_model_str, '%s_%s' % (optimizer_str, loss_str)) img_dir = os.path.join(results_dir, 'img') pth_dir = os.path.join(results_dir, 'pth') create_dir(img_dir), create_dir(pth_dir) # Optimizers optimizer_G = torch.optim.Adam(netG.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3 optimizer_D = torch.optim.Adam(netD.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3 # LR schedulers: lr_scheduler_G = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_G, args.epochs) lr_scheduler_D = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_D, args.epochs) # load trained models: if args.resume: last_epoch, loss_G_lst, loss_G_perceptual_lst, loss_G_GAN_lst, loss_D_lst, best_FID = load_ckpt_finetune( netG, netD,
else: netG = Generator1(opts).to(opts.device) netD = Discriminator1(opts).to(opts.device) # seen reconstructor netRS = Reconstructor(opts).to(opts.device) # unseen reconstructor netRU = Reconstructor(opts).to(opts.device) if opts.optimizer == "ADAM": optimzerF = optim.Adam else: optimzerF = optim.RMSprop #train setup optimizerD = optimzerF(netD.parameters(), lr=opts.lr) optimizerG = optimzerF(netG.parameters(), lr=opts.lr) optimizerRS = optimzerF(netRS.parameters(), lr=opts.r_lr) optimizerRU = optimzerF(netRU.parameters(), lr=opts.r_lr) cls_criterion = nn.NLLLoss().to(opts.device) mse_criterion = nn.MSELoss().to(opts.device) noise = torch.FloatTensor(opts.batch_size, opts.nz).to(opts.device) input_res = torch.FloatTensor(opts.batch_size, opts.f_dim).to(opts.device) input_att = torch.FloatTensor(opts.batch_size, opts.atts_dim).to(opts.device) input_label = torch.LongTensor(opts.batch_size).to(opts.device) # training and test seenclasses = data.seenclasses.to(opts.device) unseenclasses = data.unseenclasses.to(opts.device)
fp_dataset_test = FloorplanGraphDataset(opt.data_path, transforms.Normalize(mean=[0.5], std=[0.5]), target_set=opt.target_set, split='eval') fp_loader_test = torch.utils.data.DataLoader(fp_dataset_test, batch_size=8, shuffle=True, num_workers=opt.n_cpu, collate_fn=floorplan_collate_fn, pin_memory=False) # Optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.g_lr, betas=(opt.b1, opt.b2)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.d_lr, betas=(opt.b1, opt.b2)) Tensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor # ---------- # Training # ---------- batches_done = 0 for epoch in range(opt.n_epochs): for i, batch in enumerate(fp_loader): # Unpack batch mks, nds, eds, nd_to_sample, ed_to_sample = batch indices = nd_to_sample, ed_to_sample # Adversarial ground truths
model.trans.load_state_dict(torch.load(sys.argv[2])) model.atmos.load_state_dict(torch.load(sys.argv[3])) ''' for param in model.trans.parameters(): param.requires_grad = False for param in model.atmos.parameters(): param.requires_grad = False ''' except Exception as e: try: model.load_state_dict(torch.load(sys.argv[2])) except Exception as e: print("No weights. Training from scratch.") if MODE == 'GAN': model_d = Discriminator().to(device) optimizer_d = torch.optim.Adam(model_d.parameters(), lr=learning_rate) try: model_d.load_state_dict(torch.load(sys.argv[3])) if opt['parallel']: model_d = nn.DataParallel(model_d) except Exception as e: print("No weights. Training from scratch discrim.") else: print('MODE INCORRECT : TRANS or ATMOS or FAST or DUAL or GAN') exit() # Wrap in Data Parallel for multi-GPU use if opt['parallel']: model = nn.DataParallel(model) # Set default early stop, if not defined
def main(): parser = argparse.ArgumentParser( description='Train Cartoon avatar Gan models') parser.add_argument('--crop_size', default=64, type=int, help='Training images crop size') parser.add_argument('--num_epochs', default=50, type=int, help='Train epoch number') parser.add_argument('--data_root', default='data/cartoon', help='Root directory for dataset') parser.add_argument('--worker', default=2, type=int, help='Number of workers for dataloader') parser.add_argument('--batch_size', default=16, type=int, help='Batch size during training') parser.add_argument('--channels', default=3, type=int, help='Number of channels in the training images') parser.add_argument('--nz', default=100, type=int, help='Size of generator input') parser.add_argument('--ngf', default=64, type=int, help='Size of feature maps in generator') parser.add_argument('--ndf', default=64, type=int, help='Size of feature maps in descriminator') parser.add_argument('--lr', default=0.0002, type=float, help='Learning rate for optimizer') parser.add_argument('--beta1', default=0.5, type=float, help='Beta1 hyperparam for Adam optimizers') parser.add_argument('--beta2', default=0.999, type=float, help='Beta2 hyperparam for Adam optimizers') parser.add_argument('--ngpu', default=1, type=int, help='Number of GPUs , use 0 for CPU mode') parser.add_argument( '--latent_vector_num', default=8, type=int, help= 'latent vectors that we will use to visualize , 8 means that it will visualize 8 images during training' ) opt = parser.parse_args() dataroot = opt.data_root workers = opt.worker batch_size = opt.batch_size image_size = opt.crop_size nc = opt.channels nz = opt.nz ngf = opt.ngf ndf = opt.ndf num_epochs = opt.num_epochs lr = opt.lr beta1 = opt.beta1 beta2 = opt.beta2 ngpu = opt.ngpu latent_vector_num = opt.latent_vector_num # Create the dataset dataset = dset.ImageFolder(root=dataroot, transform=transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) # Create the dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers) # Decide which device we want to run on device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") # Create the generator netG = Generator(ngpu, nz, ngf, nc).to(device) # Create the Discriminator netD = Discriminator(ngpu, nc, ndf).to(device) # Handle multi-gpu if desired if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) netD = nn.DataParallel(netD, list(range(ngpu))) # Apply the weights_init function to randomly initialize all weights # to mean=0, stdev=0.2. netG.apply(weights_init) netD.apply(weights_init) # Setup Adam optimizers for both G and D optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) # Print models print(netG) print(netD) # Initialize BCELoss function criterion = nn.BCELoss() # Create batch of latent vectors that we will use to visualize fixed_noise = torch.randn(latent_vector_num, nz, 1, 1, device=device) #real and fake labels during training real_label = 1 fake_label = 0 # Lists to keep track of progress img_list = [] G_losses = [] D_losses = [] iters = 0 print("Starting Training ...") for epoch in range(num_epochs): for i, data in enumerate(dataloader, 0): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ########################### ## Train with all-real batch netD.zero_grad() # Format batch real_cpu = data[0].to(device) b_size = real_cpu.size(0) label = torch.full((b_size, ), real_label, device=device) # Forward pass real batch through D output = netD(real_cpu).view(-1) # Calculate loss on all-real batch errD_real = criterion(output, label) # Calculate gradients for D in backward pass errD_real.backward() D_x = output.mean().item() ## Train with all-fake batch # Generate batch of latent vectors noise = torch.randn(b_size, nz, 1, 1, device=device) # Generate fake image batch with G fake = netG(noise) label.fill_(fake_label) # Classify all fake batch with D output = netD(fake.detach()).view(-1) # Calculate D's loss on the all-fake batch errD_fake = criterion(output, label) # Calculate the gradients for this batch errD_fake.backward() D_G_z1 = output.mean().item() # Add the gradients from the all-real and all-fake batches errD = errD_real + errD_fake # Update D optimizerD.step() ############################ # (2) Update G network: maximize log(D(G(z))) ########################### netG.zero_grad() label.fill_(real_label) # fake labels are real for generator cost # Since we just updated D, perform another forward pass of all-fake batch through D output = netD(fake).view(-1) # Calculate G's loss based on this output errG = criterion(output, label) # Calculate gradients for G errG.backward() D_G_z2 = output.mean().item() # Update G optimizerG.step() # Output training stats if i % 50 == 0: # Save model data torch.save(netG.state_dict(), 'pretrained_model/netG_epoch_%d.pth' % (iters)) torch.save(netD.state_dict(), 'pretrained_model/netD_epoch_%d.pth' % (iters)) # Print training stats print( '[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' % (epoch, num_epochs, i, len(dataloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) # Save Losses for plotting later G_losses.append(errG.item()) D_losses.append(errD.item()) # Check how the generator is doing by saving G's output on fixed_noise if (iters % 650 == 0) or ((epoch == num_epochs - 1) and (i == len(dataloader) - 1)): with torch.no_grad(): fake = netG(fixed_noise).detach().cpu() img_list.append( vutils.make_grid(fake, padding=2, normalize=True)) iters += 1 # Display and Save samples GIF fig = plt.figure(figsize=(8, 8)) plt.axis("off") ims = [[plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)] for i in img_list] ani = animation.ArtistAnimation(fig, ims, interval=1000, repeat_delay=1000, blit=True) ani.save('output/samples.gif', writer='imagemagick', fps=100)
netG = Generator(args.input_nc, args.output_nc, dim_lst=np.load(dim_lst_path), quant=quant).cuda() # D: netD = Discriminator(args.input_nc).cuda() ## results_dir: optimizer_str = 'adam_lr%s_wd%s' % (args.lr, args.wd) loss_str = 'beta%s_%s' % (args.beta, args.lc) results_dir = os.path.join('finetune_results', args.dataset, args.task, args.base_model_str, '%s_%s' % (optimizer_str, loss_str)) img_dir = os.path.join(results_dir, 'img') pth_dir = os.path.join(results_dir, 'pth') create_dir(img_dir), create_dir(pth_dir) # Optimizers optimizer_G = torch.optim.Adam(netG.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3 optimizer_D = torch.optim.Adam(netD.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3 # LR schedulers: lr_scheduler_G = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_G, args.epochs) lr_scheduler_D = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_D, args.epochs) # load trained models: if args.resume: last_epoch, loss_G_lst, loss_G_perceptual_lst, loss_G_GAN_lst, loss_D_lst, best_FID = load_ckpt_finetune( netG, netD, optimizer_G, optimizer_D, lr_scheduler_G, lr_scheduler_D, path=os.path.join(results_dir, 'pth', 'latest.pth') ) start_epoch = last_epoch + 1 else: