def train(model_config, train_config):
	mode = 'train'

	dataset = ShakespeareModern(train_shakespeare_path, test_shakespeare_path, train_modern_path, test_modern_path, mode=mode)	
	dataloader = DataLoader(dataset, batch_size=train_config['batch_size'], shuffle=False)
	print(dataset.domain_A_max_len)
	shakespeare_disc = Discriminator(model_config['embedding_size'], model_config['hidden_dim'], len(dataset.vocab), batch_size=train_config['batch_size']).cuda()
	shakespeare_disc.train()

	if train_config['continue_train']:
		shakespeare_disc.load_state_dict(torch.load(train_config['model_path']))

	criterion = nn.BCELoss().cuda()
	optimizer = torch.optim.Adam(shakespeare_disc.parameters(), lr=train_config['base_lr'],
								 weight_decay=1e-5)

	real_label = torch.ones((train_config['batch_size'], 1)).cuda()
	fake_label = torch.zeros((train_config['batch_size'], 1)).cuda()

	for epoch in range(train_config['num_epochs']):
		for idx, (s, s_addn_feats, m, m_addn_feats) in tqdm(enumerate(dataloader)):
			s = s.transpose(0, 1)
			m = m.transpose(0, 1)

			s = Variable(s).cuda()
			s_output = shakespeare_disc(s, s_addn_feats)
			s_loss = criterion(s_output, real_label)
			s_loss = 100 * s_loss
			optimizer.zero_grad()
			s_loss.backward()
			optimizer.step()
			shakespeare_disc.hidden = shakespeare_disc.init_hidden()

			m = Variable(m).cuda()
			m_output = shakespeare_disc(m, m_addn_feats)
			m_loss = criterion(m_output, fake_label)
			m_loss = 100 * m_loss
			optimizer.zero_grad()
			m_loss.backward()
			optimizer.step()
			shakespeare_disc.hidden = shakespeare_disc.init_hidden()

			if idx % 100 == 0:
				print('\tepoch [{}/{}], iter: {}, s_loss: {:.4f}, m_loss: {:.4f}, preds: s: {}, {}, m: {}, {}'
					.format(epoch+1, train_config['num_epochs'], idx, s_loss.item(), m_loss.item(), s_output.item(), round(s_output.item()), m_output.item(), round(m_output.item())))

		print('\tepoch [{}/{}]'.format(epoch+1, train_config['num_epochs']))

		torch.save(shakespeare_disc.state_dict(), './shakespeare_disc.pth')
Ejemplo n.º 2
0
## results_dir:
optimizer_str = 'adam_lr%s_wd%s' % (args.lr, args.wd)
loss_str = ''
results_dir = os.path.join('cp_finetune_results', args.dataset, args.task,
                           args.base_model_str,
                           '%s_%s' % (optimizer_str, loss_str))
img_dir = os.path.join(results_dir, 'img')
pth_dir = os.path.join(results_dir, 'pth')
create_dir(img_dir), create_dir(pth_dir)

# Optimizers
optimizer_G = torch.optim.Adam(netG.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd,
                               betas=(0.5, 0.999))  # lr=1e-3
optimizer_D = torch.optim.Adam(netD.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd,
                               betas=(0.5, 0.999))  # lr=1e-3

# LR schedulers:
lr_scheduler_G = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_G, args.epochs)
lr_scheduler_D = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_D, args.epochs)

# load trained models:
if args.resume:
    last_epoch, loss_G_lst, loss_G_perceptual_lst, loss_G_GAN_lst, loss_D_lst, best_FID = load_ckpt_finetune(
        netG,
        netD,
Ejemplo n.º 3
0
else:
    netG = Generator1(opts).to(opts.device)
    netD = Discriminator1(opts).to(opts.device)

# seen reconstructor
netRS = Reconstructor(opts).to(opts.device)
# unseen reconstructor
netRU = Reconstructor(opts).to(opts.device)

if opts.optimizer == "ADAM":
    optimzerF = optim.Adam
else:
    optimzerF = optim.RMSprop

#train setup
optimizerD = optimzerF(netD.parameters(), lr=opts.lr)
optimizerG = optimzerF(netG.parameters(), lr=opts.lr)
optimizerRS = optimzerF(netRS.parameters(), lr=opts.r_lr)
optimizerRU = optimzerF(netRU.parameters(), lr=opts.r_lr)

cls_criterion = nn.NLLLoss().to(opts.device)
mse_criterion = nn.MSELoss().to(opts.device)
noise = torch.FloatTensor(opts.batch_size, opts.nz).to(opts.device)
input_res = torch.FloatTensor(opts.batch_size, opts.f_dim).to(opts.device)
input_att = torch.FloatTensor(opts.batch_size, opts.atts_dim).to(opts.device)
input_label = torch.LongTensor(opts.batch_size).to(opts.device)


# training and test
seenclasses = data.seenclasses.to(opts.device)
unseenclasses = data.unseenclasses.to(opts.device)
Ejemplo n.º 4
0
fp_dataset_test = FloorplanGraphDataset(opt.data_path,
                                        transforms.Normalize(mean=[0.5],
                                                             std=[0.5]),
                                        target_set=opt.target_set,
                                        split='eval')
fp_loader_test = torch.utils.data.DataLoader(fp_dataset_test,
                                             batch_size=8,
                                             shuffle=True,
                                             num_workers=opt.n_cpu,
                                             collate_fn=floorplan_collate_fn,
                                             pin_memory=False)
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(),
                               lr=opt.g_lr,
                               betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(),
                               lr=opt.d_lr,
                               betas=(opt.b1, opt.b2))
Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
) else torch.FloatTensor

# ----------
#  Training
# ----------
batches_done = 0
for epoch in range(opt.n_epochs):
    for i, batch in enumerate(fp_loader):
        # Unpack batch
        mks, nds, eds, nd_to_sample, ed_to_sample = batch
        indices = nd_to_sample, ed_to_sample
        # Adversarial ground truths
Ejemplo n.º 5
0
        model.trans.load_state_dict(torch.load(sys.argv[2]))
        model.atmos.load_state_dict(torch.load(sys.argv[3]))
        '''
        for param in model.trans.parameters():
            param.requires_grad = False
        for param in model.atmos.parameters():
            param.requires_grad = False
        '''
    except Exception as e:
        try:
            model.load_state_dict(torch.load(sys.argv[2]))
        except Exception as e:
            print("No weights. Training from scratch.")
    if MODE == 'GAN':
        model_d = Discriminator().to(device)
        optimizer_d = torch.optim.Adam(model_d.parameters(), lr=learning_rate)
        try:
            model_d.load_state_dict(torch.load(sys.argv[3]))
            if opt['parallel']:
                model_d = nn.DataParallel(model_d)
        except Exception as e:
            print("No weights. Training from scratch discrim.")
else:
    print('MODE INCORRECT : TRANS or ATMOS or FAST or DUAL or GAN')
    exit()

# Wrap in Data Parallel for multi-GPU use
if opt['parallel']:
    model = nn.DataParallel(model)

# Set default early stop, if not defined
Ejemplo n.º 6
0
def main():

    parser = argparse.ArgumentParser(
        description='Train Cartoon avatar Gan models')
    parser.add_argument('--crop_size',
                        default=64,
                        type=int,
                        help='Training images crop size')
    parser.add_argument('--num_epochs',
                        default=50,
                        type=int,
                        help='Train epoch number')
    parser.add_argument('--data_root',
                        default='data/cartoon',
                        help='Root directory for dataset')
    parser.add_argument('--worker',
                        default=2,
                        type=int,
                        help='Number of workers for dataloader')
    parser.add_argument('--batch_size',
                        default=16,
                        type=int,
                        help='Batch size during training')
    parser.add_argument('--channels',
                        default=3,
                        type=int,
                        help='Number of channels in the training images')
    parser.add_argument('--nz',
                        default=100,
                        type=int,
                        help='Size of generator input')
    parser.add_argument('--ngf',
                        default=64,
                        type=int,
                        help='Size of feature maps in generator')
    parser.add_argument('--ndf',
                        default=64,
                        type=int,
                        help='Size of feature maps in descriminator')
    parser.add_argument('--lr',
                        default=0.0002,
                        type=float,
                        help='Learning rate for optimizer')
    parser.add_argument('--beta1',
                        default=0.5,
                        type=float,
                        help='Beta1 hyperparam for Adam optimizers')
    parser.add_argument('--beta2',
                        default=0.999,
                        type=float,
                        help='Beta2 hyperparam for Adam optimizers')
    parser.add_argument('--ngpu',
                        default=1,
                        type=int,
                        help='Number of GPUs , use 0 for CPU mode')
    parser.add_argument(
        '--latent_vector_num',
        default=8,
        type=int,
        help=
        'latent vectors that we will use to visualize , 8 means that it will visualize 8 images during training'
    )
    opt = parser.parse_args()

    dataroot = opt.data_root
    workers = opt.worker
    batch_size = opt.batch_size
    image_size = opt.crop_size
    nc = opt.channels
    nz = opt.nz
    ngf = opt.ngf
    ndf = opt.ndf
    num_epochs = opt.num_epochs
    lr = opt.lr
    beta1 = opt.beta1
    beta2 = opt.beta2
    ngpu = opt.ngpu
    latent_vector_num = opt.latent_vector_num

    # Create the dataset
    dataset = dset.ImageFolder(root=dataroot,
                               transform=transforms.Compose([
                                   transforms.Resize(image_size),
                                   transforms.CenterCrop(image_size),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5),
                                                        (0.5, 0.5, 0.5)),
                               ]))
    # Create the dataloader
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=workers)

    # Decide which device we want to run on
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and ngpu > 0) else "cpu")

    # Create the generator
    netG = Generator(ngpu, nz, ngf, nc).to(device)
    # Create the Discriminator
    netD = Discriminator(ngpu, nc, ndf).to(device)

    # Handle multi-gpu if desired
    if (device.type == 'cuda') and (ngpu > 1):
        netG = nn.DataParallel(netG, list(range(ngpu)))
        netD = nn.DataParallel(netD, list(range(ngpu)))

    # Apply the weights_init function to randomly initialize all weights
    #  to mean=0, stdev=0.2.
    netG.apply(weights_init)
    netD.apply(weights_init)

    # Setup Adam optimizers for both G and D
    optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

    # Print models
    print(netG)
    print(netD)

    # Initialize BCELoss function
    criterion = nn.BCELoss()

    # Create batch of latent vectors that we will use to visualize
    fixed_noise = torch.randn(latent_vector_num, nz, 1, 1, device=device)

    #real and fake labels during training
    real_label = 1
    fake_label = 0

    # Lists to keep track of progress
    img_list = []
    G_losses = []
    D_losses = []
    iters = 0

    print("Starting Training ...")

    for epoch in range(num_epochs):
        for i, data in enumerate(dataloader, 0):

            ############################
            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            ## Train with all-real batch
            netD.zero_grad()
            # Format batch
            real_cpu = data[0].to(device)
            b_size = real_cpu.size(0)
            label = torch.full((b_size, ), real_label, device=device)
            # Forward pass real batch through D
            output = netD(real_cpu).view(-1)
            # Calculate loss on all-real batch
            errD_real = criterion(output, label)
            # Calculate gradients for D in backward pass
            errD_real.backward()
            D_x = output.mean().item()

            ## Train with all-fake batch
            # Generate batch of latent vectors
            noise = torch.randn(b_size, nz, 1, 1, device=device)
            # Generate fake image batch with G
            fake = netG(noise)
            label.fill_(fake_label)
            # Classify all fake batch with D
            output = netD(fake.detach()).view(-1)
            # Calculate D's loss on the all-fake batch
            errD_fake = criterion(output, label)
            # Calculate the gradients for this batch
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            # Add the gradients from the all-real and all-fake batches
            errD = errD_real + errD_fake
            # Update D
            optimizerD.step()

            ############################
            # (2) Update G network: maximize log(D(G(z)))
            ###########################
            netG.zero_grad()
            label.fill_(real_label)
            # fake labels are real for generator cost
            # Since we just updated D, perform another forward pass of all-fake batch through D
            output = netD(fake).view(-1)
            # Calculate G's loss based on this output
            errG = criterion(output, label)
            # Calculate gradients for G
            errG.backward()
            D_G_z2 = output.mean().item()
            # Update G
            optimizerG.step()

            # Output training stats
            if i % 50 == 0:
                # Save model data
                torch.save(netG.state_dict(),
                           'pretrained_model/netG_epoch_%d.pth' % (iters))
                torch.save(netD.state_dict(),
                           'pretrained_model/netD_epoch_%d.pth' % (iters))
                # Print training stats
                print(
                    '[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                    % (epoch, num_epochs, i, len(dataloader), errD.item(),
                       errG.item(), D_x, D_G_z1, D_G_z2))

            # Save Losses for plotting later
            G_losses.append(errG.item())
            D_losses.append(errD.item())

            # Check how the generator is doing by saving G's output on fixed_noise
            if (iters % 650 == 0) or ((epoch == num_epochs - 1) and
                                      (i == len(dataloader) - 1)):
                with torch.no_grad():
                    fake = netG(fixed_noise).detach().cpu()
                img_list.append(
                    vutils.make_grid(fake, padding=2, normalize=True))

            iters += 1

    # Display and Save samples GIF
    fig = plt.figure(figsize=(8, 8))
    plt.axis("off")
    ims = [[plt.imshow(np.transpose(i, (1, 2, 0)), animated=True)]
           for i in img_list]
    ani = animation.ArtistAnimation(fig,
                                    ims,
                                    interval=1000,
                                    repeat_delay=1000,
                                    blit=True)
    ani.save('output/samples.gif', writer='imagemagick', fps=100)
Ejemplo n.º 7
0
netG = Generator(args.input_nc, args.output_nc, dim_lst=np.load(dim_lst_path), quant=quant).cuda()
# D:
netD = Discriminator(args.input_nc).cuda()

## results_dir:
optimizer_str = 'adam_lr%s_wd%s' % (args.lr, args.wd)
loss_str = 'beta%s_%s' % (args.beta, args.lc)
results_dir = os.path.join('finetune_results', args.dataset, args.task, args.base_model_str, 
    '%s_%s' % (optimizer_str, loss_str))
img_dir = os.path.join(results_dir, 'img')
pth_dir = os.path.join(results_dir, 'pth')
create_dir(img_dir), create_dir(pth_dir)

# Optimizers 
optimizer_G = torch.optim.Adam(netG.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3
optimizer_D = torch.optim.Adam(netD.parameters(), lr=args.lr, weight_decay=args.wd, betas=(0.5, 0.999)) # lr=1e-3

# LR schedulers:
lr_scheduler_G = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_G, args.epochs)
lr_scheduler_D = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_D, args.epochs)

# load trained models:
if args.resume:
    last_epoch, loss_G_lst, loss_G_perceptual_lst, loss_G_GAN_lst, loss_D_lst, best_FID = load_ckpt_finetune(
        netG, netD, 
        optimizer_G, optimizer_D, 
        lr_scheduler_G, lr_scheduler_D, 
        path=os.path.join(results_dir, 'pth', 'latest.pth')
    )
    start_epoch = last_epoch + 1
else: