Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending')
    parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder')
    parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G')
    parser.add_argument('--nc',  type=int, default=3,  help='# of output channels in decoder or G')
    parser.add_argument('--nBottleneck',  type=int, default=4000, help='# of output channels in encoder')
    parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D')

    parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network')
    
    parser.add_argument('--color_weight', type=float, default=1, help='Color weight')
    parser.add_argument('--sigma', type=float, default=0.5, help='Sigma for gaussian smooth of Gaussian-Poisson Equation')
    parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient')
    parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid')
    
    parser.add_argument('--supervised', type=lambda x:x == 'True', default=True, help='Use unsupervised Blending GAN if False')
    parser.add_argument('--nz',  type=int, default=100, help='Size of the latent z vector')
    parser.add_argument('--n_iteration', type=int, default=1000, help='# of iterations for optimizing z')
    
    parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--g_path', default='models/blending_gan.npz', help='Path for pretrained Blending GAN model')
    parser.add_argument('--unsupervised_path', default='models/unsupervised_blending_gan.npz', help='Path for pretrained unsupervised Blending GAN model')
    parser.add_argument('--list_path', default='', help='File for input list in csv format: obj_path;bg_path;mask_path in each line')
    parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results')

    parser.add_argument('--src_image', default='', help='Path for source image')
    parser.add_argument('--dst_image', default='', help='Path for destination image')
    parser.add_argument('--mask_image', default='', help='Path for mask image')
    parser.add_argument('--blended_image', default='', help='Where to save blended image')

    args = parser.parse_args()

    print('Input arguments:')
    for key, value in vars(args).items():
        print('\t{}: {}'.format(key, value))
    print('')

    # Init CNN model
    if args.supervised:
        G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size)
        print('Load pretrained Blending GAN model from {} ...'.format(args.g_path))
        serializers.load_npz(args.g_path, G)
    else:
        G = DCGAN_G(args.image_size, args.nc, args.ngf)
        print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path))
        serializers.load_npz(args.unsupervised_path, G)
    
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()  # Make a specified GPU current
        G.to_gpu()                       # Copy the model to the GPU
    
    # Init image list
    if args.list_path:
        print('Load images from {} ...'.format(args.list_path))
        with open(args.list_path) as f:
            test_list = [line.strip().split(';') for line in f]
        print('\t {} images in total ...\n'.format(len(test_list)))
    else:
        test_list = [(args.src_image, args.dst_image, args.mask_image)]
    
    if not args.blended_image:
        # Init result folder
        if not os.path.isdir(args.result_folder):
            os.makedirs(args.result_folder)
        print('Result will save to {} ...\n'.format(args.result_folder))

    total_size = len(test_list)
    for idx in range(total_size):
        print('Processing {}/{} ...'.format(idx+1, total_size))
        
        # load image
        obj = img_as_float(imread(test_list[idx][0]))
        bg  = img_as_float(imread(test_list[idx][1]))
        mask = imread(test_list[idx][2]).astype(obj.dtype)

        blended_im = gp_gan(obj, bg, mask, G, args.image_size, args.gpu, color_weight=args.color_weight, sigma=args.sigma,
                                gradient_kernel=args.gradient_kernel, smooth_sigma=args.smooth_sigma, supervised=args.supervised,
                                nz=args.nz, n_iteration=args.n_iteration)

        if args.blended_image:
            imsave(args.blended_image, blended_im)
        else:
            imsave('{}/obj_{}_bg_{}_mask_{}.png'.format(args.result_folder, basename(test_list[idx][0]), basename(test_list[idx][1]), basename(test_list[idx][2])), blended_im)
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description='Gaussian-Poisson GAN for high-resolution image blending')
   
    parser.add_argument('--nef', type=int, default=64, help='# of base filters in encoder')
    parser.add_argument('--ngf', type=int, default=64, help='# of base filters in decoder or G')
    parser.add_argument('--nc', type=int, default=3, help='# of output channels in decoder or G')
    parser.add_argument('--nBottleneck', type=int, default=4000, help='# of output channels in encoder')
    parser.add_argument('--ndf', type=int, default=64, help='# of base filters in D')

    parser.add_argument('--image_size', type=int, default=64, help='The height / width of the input image to network')

    parser.add_argument('--color_weight', type=float, default=0.2, help='Color weight')
    parser.add_argument('--sigma', type=float, default=0.5,
                        help='Sigma for gaussian smooth of Gaussian-Poisson Equation')
    parser.add_argument('--gradient_kernel', type=str, default='normal', help='Kernel type for calc gradient')
    parser.add_argument('--smooth_sigma', type=float, default=1, help='Sigma for gaussian smooth of Laplacian pyramid')

    parser.add_argument('--supervised', type=lambda x: x == 'True', default=True,
                        help='Use unsupervised Blending GAN if False')
    parser.add_argument('--nz', type=int, default=200, help='Size of the latent z vector')
    parser.add_argument('--n_iteration', type=int, default=1500, help='# of iterations for optimizing z')

    parser.add_argument('--gpu', type=int, default=0, help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--g_path', default='../models/blending_gan.npz', help='Path for pretrained Blending GAN model')
    parser.add_argument('--unsupervised_path', default='../models/unsupervised_blending_gan.npz',
                        help='Path for pretrained unsupervised Blending GAN model')
    parser.add_argument('--list_path', default='',
                        help='File for input list in csv format: obj_path;bg_path;mask_path in each line')
    parser.add_argument('--result_folder', default='blending_result', help='Name for folder storing results')

    parser.add_argument('--src_image', default='', help='Path for source image')
    parser.add_argument('--dst_image', default='', help='Path for destination image')
    parser.add_argument('--mask_image', default='', help='Path for mask image')
    parser.add_argument('--blended_image', default='', help='Where to save blended image')

    parser.add_argument('--car_type', default='rangerover', help='specify the car type')

    args = parser.parse_args()


    for key, value in vars(args).items():
        print('\t{}: {}'.format(key, value))

    # Init CNN model
    if args.supervised:
        G = EncoderDecoder(args.nef, args.ngf, args.nc, args.nBottleneck, image_size=args.image_size)
        print('Load pretrained Blending GAN model from {} ...'.format(args.g_path))
        serializers.load_npz(args.g_path, G)
    else:
        chainer.config.use_cudnn = 'never'
        G = DCGAN_G(args.image_size, args.nc, args.ngf)
        print('Load pretrained unsupervised Blending GAN model from {} ...'.format(args.unsupervised_path))
        serializers.load_npz(args.unsupervised_path, G)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()  # Make a specified GPU current
        G.to_gpu()  # Copy the model to the GPU

    # load car image based on name that was given by blender script
    car_image = cv2.imread(args.src_image)
    
    # Load background image based on name that was given by blender script
    cam_im_path = 'original/'+args.src_image.split('/')[2]
    camera_image = cv2.imread(cam_im_path)
    
    # Create mask
    mask = create_mask(car_image)
    
    # Create composite
    composite = create_composite(car_image,mask,camera_image)
    cv2.imwrite('composites/'+args.car_type+'/'+args.src_image.split('/')[2],composite)
    
    # Harmonize the composite
    GPGAN_result = harmonize(car_image,camera_image,mask,G,args.image_size,args.gpu, args.color_weight, args.sigma, args.gradient_kernel, args.smooth_sigma, args.supervised, args.nz,args.n_iteration)

    # Save the result
    cv2.imwrite('GPGAN_output/'+args.car_type+'/'+args.src_image.split('/')[2],GPGAN_result)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='Train Blending GAN')
    parser.add_argument('--nef',
                        type=int,
                        default=64,
                        help='# of base filters in encoder')
    parser.add_argument('--ngf',
                        type=int,
                        default=64,
                        help='# of base filters in decoder')
    parser.add_argument('--nc',
                        type=int,
                        default=3,
                        help='# of output channels in decoder')
    parser.add_argument('--nBottleneck',
                        type=int,
                        default=4000,
                        help='# of output channels in encoder')
    parser.add_argument('--ndf',
                        type=int,
                        default=64,
                        help='# of base filters in D')

    parser.add_argument('--lr_d',
                        type=float,
                        default=0.0002,
                        help='Learning rate for Critic, default=0.0002')
    parser.add_argument('--lr_g',
                        type=float,
                        default=0.002,
                        help='Learning rate for Generator, default=0.002')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.5,
                        help='Beta for Adam, default=0.5')
    parser.add_argument('--l2_weight',
                        type=float,
                        default=0.999,
                        help='Weight for l2 loss, default=0.999')

    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--n_epoch',
                        type=int,
                        default=25,
                        help='# of epochs to train for')

    parser.add_argument('--data_root', help='Path to dataset')
    parser.add_argument('--load_size',
                        type=int,
                        default=64,
                        help='Scale image to load_size')
    parser.add_argument(
        '--image_size',
        type=int,
        default=64,
        help='The height / width of the input image to network')
    parser.add_argument('--ratio',
                        type=float,
                        default=0.5,
                        help='Ratio for center square size v.s. image_size')
    parser.add_argument('--val_ratio',
                        type=float,
                        default=0.05,
                        help='Ratio for validation set v.s. data set')

    parser.add_argument('--d_iters',
                        type=int,
                        default=5,
                        help='# of D iters per each G iter')
    parser.add_argument('--clamp_lower',
                        type=float,
                        default=-0.01,
                        help='Lower bound for clipping')
    parser.add_argument('--clamp_upper',
                        type=float,
                        default=0.01,
                        help='Upper bound for clipping')

    parser.add_argument('--experiment',
                        default='encoder_decoder_blending_result',
                        help='Where to store samples and models')
    parser.add_argument('--test_folder',
                        default='samples',
                        help='Where to store test results')
    parser.add_argument('--workers',
                        type=int,
                        default=10,
                        help='# of data loading workers')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='Input batch size')
    parser.add_argument('--test_size',
                        type=int,
                        default=64,
                        help='Batch size for testing')

    parser.add_argument('--train_samples',
                        type=int,
                        default=150000,
                        help='# of training examples')
    parser.add_argument('--test_samples',
                        type=int,
                        default=256,
                        help='# of testing examples')

    parser.add_argument('--manual_seed',
                        type=int,
                        default=5,
                        help='Manul seed')

    parser.add_argument('--resume',
                        default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--snapshot_interval',
                        type=int,
                        default=1,
                        help='Interval of snapshot (epochs)')
    parser.add_argument('--print_interval',
                        type=int,
                        default=1,
                        help='Interval of printing log to console (iteration)')
    parser.add_argument('--plot_interval',
                        type=int,
                        default=10,
                        help='Interval of plot (iteration)')
    args = parser.parse_args()

    random.seed(args.manual_seed)

    print('Input arguments:')
    for key, value in vars(args).items():
        print('\t{}: {}'.format(key, value))
    print('')

    # Set up G & D
    print('Create & Init models ...')
    print('\tInit G network ...')
    G = EncoderDecoder(args.nef,
                       args.ngf,
                       args.nc,
                       args.nBottleneck,
                       image_size=args.image_size,
                       conv_init=init_conv,
                       bn_init=init_bn)
    print('\tInit D network ...')
    D = DCGAN_D(args.image_size,
                args.ndf,
                conv_init=init_conv,
                bn_init=init_bn)
    if args.gpu >= 0:
        print('\tCopy models to gpu {} ...'.format(args.gpu))
        chainer.cuda.get_device(args.gpu).use()  # Make a specified GPU current
        G.to_gpu()  # Copy the model to the GPU
        D.to_gpu()
    print('Init models done ...\n')
    # Setup an optimizer
    optimizer_d = make_optimizer(D, args.lr_d, args.beta1)
    optimizer_g = make_optimizer(G, args.lr_g, args.beta1)

    ########################################################################################################################
    # Setup dataset & iterator
    print('Load images from {} ...'.format(args.data_root))
    folders = sorted([
        folder for folder in os.listdir(args.data_root)
        if os.path.isdir(os.path.join(args.data_root, folder))
    ])
    val_end = int(args.val_ratio * len(folders))
    print('\t{} folders in total, {} val folders ...'.format(
        len(folders), val_end))
    trainset = BlendingDataset(args.train_samples, folders[val_end:],
                               args.data_root, args.ratio, args.load_size,
                               args.image_size)
    valset = BlendingDataset(args.test_samples, folders[:val_end],
                             args.data_root, args.ratio, args.load_size,
                             args.image_size)
    print('\tTrainset contains {} image files'.format(len(trainset)))
    print('\tValset contains {} image files'.format(len(valset)))
    print('')
    train_iter = chainer.iterators.MultiprocessIterator(
        trainset,
        args.batch_size,
        n_processes=args.workers,
        n_prefetch=args.workers)
    ########################################################################################################################

    # Set up a trainer
    updater = EncoderDecoderBlendingUpdater(models=(G, D),
                                            args=args,
                                            iterator=train_iter,
                                            optimizer={
                                                'main': optimizer_g,
                                                'D': optimizer_d
                                            },
                                            device=args.gpu)
    trainer = training.Trainer(updater, (args.n_epoch, 'epoch'),
                               out=args.experiment)

    # Snapshot
    snapshot_interval = (args.snapshot_interval, 'epoch')
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(G,
                                              'g_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(D,
                                              'd_epoch_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)

    # Display
    print_interval = (args.print_interval, 'iteration')
    trainer.extend(extensions.LogReport(trigger=print_interval))
    trainer.extend(extensions.PrintReport(
        ['iteration', 'main/loss', 'D/loss', 'main/l2_loss']),
                   trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=args.print_interval))

    trainer.extend(extensions.dump_graph('D/loss', out_name='TrainGraph.dot'))

    # Plot
    plot_interval = (args.plot_interval, 'iteration')

    trainer.extend(extensions.PlotReport(['main/loss'],
                                         'iteration',
                                         file_name='loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)
    trainer.extend(extensions.PlotReport(['D/loss'],
                                         'iteration',
                                         file_name='d_loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)
    trainer.extend(extensions.PlotReport(['main/l2_loss'],
                                         'iteration',
                                         file_name='l2_loss.png',
                                         trigger=plot_interval),
                   trigger=plot_interval)

    # Eval
    path = os.path.join(args.experiment, args.test_folder)
    if not os.path.isdir(path):
        os.makedirs(path)
    print('Saving samples to {} ...\n'.format(path))

    train_batch = [trainset[idx][0] for idx in range(args.test_size)]
    train_v = Variable(chainer.dataset.concat_examples(train_batch, args.gpu),
                       volatile='on')
    trainer.extend(sampler(G, path, train_v, 'fake_samples_train_{}.png'),
                   trigger=plot_interval)

    val_batch = [valset[idx][0] for idx in range(args.test_size)]
    val_v = Variable(chainer.dataset.concat_examples(val_batch, args.gpu),
                     volatile='on')
    trainer.extend(sampler(G, path, val_v, 'fake_samples_val_{}.png'),
                   trigger=plot_interval)

    if args.resume:
        # Resume from a snapshot
        print('Resume from {} ... \n'.format(args.resume))
        chainer.serializers.load_npz(args.resume, trainer)

    # Run the training
    print('Training start ...\n')
    trainer.run()