def main(): parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=1, help='Color weight') parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--supervised', type=lambda x:x == 'True', default=True, help='Use unsupervised Blending GAN if False') parser.add_argument('--nz', type=int, default=100, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1000, help='# of iterations for optimizing z') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='models/blending_gan.npz', help='Path for pretrained Blending GAN model') parser.add_argument('--unsupervised_path', default='models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line') parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') parser.add_argument('--dst_image', default='', help='Path for destination image') parser.add_argument('--mask_image', default='', help='Path for mask image') parser.add_argument('--blended_image', default='', help='Where to save blended image') args = parser.parse_args() print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Init CNN model if args.supervised: G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size) print('Load pretrained Blending GAN model from {} ...'.format(args.g_path)) serializers.load_npz(args.g_path, G) else: G = DCGAN_G(args.image_size, args.nc, args.ngf) print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, G) if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU # Init image list if args.list_path: print('Load images from {} ...'.format(args.list_path)) with open(args.list_path) as f: test_list = [line.strip().split(';') for line in f] print('\t {} images in total ...\n'.format(len(test_list))) else: test_list = [(args.src_image, args.dst_image, args.mask_image)] if not args.blended_image: # Init result folder if not os.path.isdir(args.result_folder): os.makedirs(args.result_folder) print('Result will save to {} ...\n'.format(args.result_folder)) total_size = len(test_list) for idx in range(total_size): print('Processing {}/{} ...'.format(idx+1, total_size)) # load image obj = img_as_float(imread(test_list[idx][0])) bg = img_as_float(imread(test_list[idx][1])) mask = imread(test_list[idx][2]).astype(obj.dtype) blended_im = gp_gan(obj, bg, mask, G, args.image_size, args.gpu, color_weight=args.color_weight, sigma=args.sigma, gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma, supervised=args.supervised, nz=args.nz, n_iteration=args.n_iteration) if args.blended_image: imsave(args.blended_image, blended_im) else: imsave('{}/obj_{}_bg_{}_mask_{}.png'.format(args.result_folder, basename(test_list[idx][0]), basename(test_list[idx][1]), basename(test_list[idx][2])), blended_im)
def main(): parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--color_weight', type=float, default=0.2, help='Color weight') parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation') parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient') parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid') parser.add_argument('--supervised', type=lambda x: x == 'True', default=True, help='Use unsupervised Blending GAN if False') parser.add_argument('--nz', type=int, default=200, help='Size of the latent z vector') parser.add_argument('--n_iteration', type=int, default=1500, help='# of iterations for optimizing z') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--g_path', default='../models/blending_gan.npz', help='Path for pretrained Blending GAN model') parser.add_argument('--unsupervised_path', default='../models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model') parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line') parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results') parser.add_argument('--src_image', default='', help='Path for source image') parser.add_argument('--dst_image', default='', help='Path for destination image') parser.add_argument('--mask_image', default='', help='Path for mask image') parser.add_argument('--blended_image', default='', help='Where to save blended image') parser.add_argument('--car_type', default='rangerover', help='specify the car type') args = parser.parse_args() for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) # Init CNN model if args.supervised: G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size) print('Load pretrained Blending GAN model from {} ...'.format(args.g_path)) serializers.load_npz(args.g_path, G) else: chainer.config.use_cudnn = 'never' G = DCGAN_G(args.image_size, args.nc, args.ngf) print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path)) serializers.load_npz(args.unsupervised_path, G) if args.gpu >= 0: cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU # load car image based on name that was given by blender script car_image = cv2.imread(args.src_image) # Load background image based on name that was given by blender script cam_im_path = 'original/'+args.src_image.split('/')[2] camera_image = cv2.imread(cam_im_path) # Create mask mask = create_mask(car_image) # Create composite composite = create_composite(car_image,mask,camera_image) cv2.imwrite('composites/'+args.car_type+'/'+args.src_image.split('/')[2],composite) # Harmonize the composite GPGAN_result = harmonize(car_image,camera_image,mask,G,args.image_size,args.gpu, args.color_weight, args.sigma, args.gradient_kernel, args.smooth_sigma, args.supervised, args.nz,args.n_iteration) # Save the result cv2.imwrite('GPGAN_output/'+args.car_type+'/'+args.src_image.split('/')[2],GPGAN_result)
def main(): parser = argparse.ArgumentParser(description='Train Blending GAN') parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder') parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder') parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder') parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder') parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D') parser.add_argument('--lr_d', type=float, default=0.0002, help='Learning rate for Critic, default=0.0002') parser.add_argument('--lr_g', type=float, default=0.002, help='Learning rate for Generator, default=0.002') parser.add_argument('--beta1', type=float, default=0.5, help='Beta for Adam, default=0.5') parser.add_argument('--l2_weight', type=float, default=0.999, help='Weight for l2 loss, default=0.999') parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--n_epoch', type=int, default=25, help='# of epochs to train for') parser.add_argument('--data_root', help='Path to dataset') parser.add_argument('--load_size', type=int, default=64, help='Scale image to load_size') parser.add_argument( '--image_size', type=int, default=64, help='The height / width of the input image to network') parser.add_argument('--ratio', type=float, default=0.5, help='Ratio for center square size v.s. image_size') parser.add_argument('--val_ratio', type=float, default=0.05, help='Ratio for validation set v.s. data set') parser.add_argument('--d_iters', type=int, default=5, help='# of D iters per each G iter') parser.add_argument('--clamp_lower', type=float, default=-0.01, help='Lower bound for clipping') parser.add_argument('--clamp_upper', type=float, default=0.01, help='Upper bound for clipping') parser.add_argument('--experiment', default='encoder_decoder_blending_result', help='Where to store samples and models') parser.add_argument('--test_folder', default='samples', help='Where to store test results') parser.add_argument('--workers', type=int, default=10, help='# of data loading workers') parser.add_argument('--batch_size', type=int, default=64, help='Input batch size') parser.add_argument('--test_size', type=int, default=64, help='Batch size for testing') parser.add_argument('--train_samples', type=int, default=150000, help='# of training examples') parser.add_argument('--test_samples', type=int, default=256, help='# of testing examples') parser.add_argument('--manual_seed', type=int, default=5, help='Manul seed') parser.add_argument('--resume', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1, help='Interval of snapshot (epochs)') parser.add_argument('--print_interval', type=int, default=1, help='Interval of printing log to console (iteration)') parser.add_argument('--plot_interval', type=int, default=10, help='Interval of plot (iteration)') args = parser.parse_args() random.seed(args.manual_seed) print('Input arguments:') for key, value in vars(args).items(): print('\t{}: {}'.format(key, value)) print('') # Set up G & D print('Create & Init models ...') print('\tInit G network ...') G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size, conv_init=init_conv, bn_init=init_bn) print('\tInit D network ...') D = DCGAN_D(args.image_size, args.ndf, conv_init=init_conv, bn_init=init_bn) if args.gpu >= 0: print('\tCopy models to gpu {} ...'.format(args.gpu)) chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current G.to_gpu() # Copy the model to the GPU D.to_gpu() print('Init models done ...\n') # Setup an optimizer optimizer_d = make_optimizer(D, args.lr_d, args.beta1) optimizer_g = make_optimizer(G, args.lr_g, args.beta1) ######################################################################################################################## # Setup dataset & iterator print('Load images from {} ...'.format(args.data_root)) folders = sorted([ folder for folder in os.listdir(args.data_root) if os.path.isdir(os.path.join(args.data_root, folder)) ]) val_end = int(args.val_ratio * len(folders)) print('\t{} folders in total, {} val folders ...'.format( len(folders), val_end)) trainset = BlendingDataset(args.train_samples, folders[val_end:], args.data_root, args.ratio, args.load_size, args.image_size) valset = BlendingDataset(args.test_samples, folders[:val_end], args.data_root, args.ratio, args.load_size, args.image_size) print('\tTrainset contains {} image files'.format(len(trainset))) print('\tValset contains {} image files'.format(len(valset))) print('') train_iter = chainer.iterators.MultiprocessIterator( trainset, args.batch_size, n_processes=args.workers, n_prefetch=args.workers) ######################################################################################################################## # Set up a trainer updater = EncoderDecoderBlendingUpdater(models=(G, D), args=args, iterator=train_iter, optimizer={ 'main': optimizer_g, 'D': optimizer_d }, device=args.gpu) trainer = training.Trainer(updater, (args.n_epoch, 'epoch'), out=args.experiment) # Snapshot snapshot_interval = (args.snapshot_interval, 'epoch') trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(G, 'g_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(D, 'd_epoch_{.updater.epoch}.npz'), trigger=snapshot_interval) # Display print_interval = (args.print_interval, 'iteration') trainer.extend(extensions.LogReport(trigger=print_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'main/loss', 'D/loss', 'main/l2_loss']), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=args.print_interval)) trainer.extend(extensions.dump_graph('D/loss', out_name='TrainGraph.dot')) # Plot plot_interval = (args.plot_interval, 'iteration') trainer.extend(extensions.PlotReport(['main/loss'], 'iteration', file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['D/loss'], 'iteration', file_name='d_loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.PlotReport(['main/l2_loss'], 'iteration', file_name='l2_loss.png', trigger=plot_interval), trigger=plot_interval) # Eval path = os.path.join(args.experiment, args.test_folder) if not os.path.isdir(path): os.makedirs(path) print('Saving samples to {} ...\n'.format(path)) train_batch = [trainset[idx][0] for idx in range(args.test_size)] train_v = Variable(chainer.dataset.concat_examples(train_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, train_v, 'fake_samples_train_{}.png'), trigger=plot_interval) val_batch = [valset[idx][0] for idx in range(args.test_size)] val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu), volatile='on') trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'), trigger=plot_interval) if args.resume: # Resume from a snapshot print('Resume from {} ... \n'.format(args.resume)) chainer.serializers.load_npz(args.resume, trainer) # Run the training print('Training start ...\n') trainer.run()