def main(argv): # load image and turn into grayscale img = rgb2gray(imread('media/newyork.png')) # generate data inputs, outputs = generate_data_from_image( img, input_mask, output_mask, 220000) # split data into training, test, and validation sets inputs = split(inputs, [100000, 200000], 1) outputs = split(outputs, [100000, 200000], 1) data_train = inputs[0], outputs[0] data_test = inputs[1], outputs[1] data_valid = inputs[2], outputs[2] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM( dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=32) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) # evaluate model print 'Average log-likelihood: {0:.4f} [bit/px]'.format( -model.evaluate(data_test[0], data_test[1], pre)) # synthesize a new image img_sample = sample_image(img, model, input_mask, output_mask, pre) imwrite('newyork_sample.png', img_sample, cmap='gray', vmin=min(img), vmax=max(img)) # save model with open('image_model.pck', 'wb') as handle: dump({ 'model': model, 'input_mask': input_mask, 'output_mask': output_mask}, handle, 1) return 0
def main(argv): # load image and turn into grayscale img = rgb2ycc(imread('media/newyork.png')) # generate masks for grayscale and color model, respectively input_mask0, output_mask0 = generate_masks(7, 1) input_mask1, output_mask1 = generate_masks([5, 7, 7], 1, [1, 0, 0]) # train model model0, pre0 = train_model(img[:, :, 0], input_mask0, output_mask0) model1, pre1 = train_model(img, input_mask1, output_mask1) # synthesize a new image img_sample = img.copy() # sample intensities img_sample[:, :, 0] = sample_image(img_sample[:, :, 0], model0, input_mask0, output_mask0, pre0) # sample color img_sample = sample_image(img_sample, model1, input_mask1, output_mask1, pre1) # convert back to RGB and enforce constraints img_sample = ycc2rgb(img_sample) imwrite('newyork_sample.png', img_sample, vmin=0, vmax=255) # save model with open('image_model.pck', 'wb') as handle: dump( { 'model0': model0, 'model1': model1, 'input_mask0': input_mask0, 'input_mask1': input_mask1, 'output_mask0': output_mask0, 'output_mask1': output_mask1 }, handle, 1) return 0
def main(argv): # load image and turn into grayscale img = rgb2ycc(imread('media/newyork.png')) # generate masks for grayscale and color model, respectively input_mask0, output_mask0 = generate_masks(7, 1) input_mask1, output_mask1 = generate_masks([5, 7, 7], 1, [1, 0, 0]) # train model model0, pre0 = train_model(img[:, :, 0], input_mask0, output_mask0) model1, pre1 = train_model(img, input_mask1, output_mask1) # synthesize a new image img_sample = img.copy() # sample intensities img_sample[:, :, 0] = sample_image( img_sample[:, :, 0], model0, input_mask0, output_mask0, pre0) # sample color img_sample = sample_image( img_sample, model1, input_mask1, output_mask1, pre1) # convert back to RGB and enforce constraints img_sample = ycc2rgb(img_sample) imwrite('newyork_sample.png', img_sample, vmin=0, vmax=255) # save model with open('image_model.pck', 'wb') as handle: dump({ 'model0': model0, 'model1': model1, 'input_mask0': input_mask0, 'input_mask1': input_mask1, 'output_mask0': output_mask0, 'output_mask1': output_mask1}, handle, 1) return 0
def main(argv): # load image and turn into grayscale img = rgb2gray(imread('media/newyork.png')) # generate data inputs, outputs = generate_data_from_image(img, input_mask, output_mask, 220000) # split data into training, test, and validation sets inputs = split(inputs, [100000, 200000], 1) outputs = split(outputs, [100000, 200000], 1) data_train = inputs[0], outputs[0] data_test = inputs[1], outputs[1] data_valid = inputs[2], outputs[2] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM(dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=32) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) # evaluate model print 'Average log-likelihood: {0:.4f} [bit/px]'.format( -model.evaluate(data_test[0], data_test[1], pre)) # synthesize a new image img_sample = sample_image(img, model, input_mask, output_mask, pre) imwrite('newyork_sample.png', img_sample, cmap='gray', vmin=min(img), vmax=max(img)) # save model with open('image_model.pck', 'wb') as handle: dump( { 'model': model, 'input_mask': input_mask, 'output_mask': output_mask }, handle, 1) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('model', type=str) parser.add_argument('--num_rows', '-r', type=int, default=256) parser.add_argument('--num_cols', '-c', type=int, default=256) parser.add_argument('--data', '-d', type=str, default=None) parser.add_argument('--log', '-L', type=int, default=0) parser.add_argument('--output', '-o', type=str, default='sample.png') parser.add_argument('--margin', '-M', type=int, default=8) args = parser.parse_args(argv[1:]) model = Experiment(args.model)['model'] if isinstance(model, PatchRIDE): img = model.sample()[0] imwrite(args.output, imformat(img, vmin=0, vmax=255, symmetric=False)) else: if args.data is None: # initialize image with white noise img_init = randn(1, args.num_rows + args.margin * 2, args.num_cols + args.margin * 2, sum(model.num_channels)) / 10. img = model.sample(img_init) if args.log: # linearize and gamma-correct img = power(exp(img), .45) if args.margin > 0: img = img[:, args.margin:-args.margin, args.margin:-args.margin] if img.shape[-1] == 3: img[img > 255.] = 255. img[img < 0.] = 0. imwrite(args.output, asarray(img[0, :, :, :], dtype='uint8')) else: imwrite(args.output, imformat(img[0, :, :, 0], perc=99)) else: if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] vmin, vmax = 0, 255 else: data = loadmat(args.data)['data'] vmin = percentile(data, 0.02) vmax = percentile(data, 98.) if data.ndim < 4: data = data[:, :, :, None] if isinstance(model, MultiscaleRIDE): num_channels = 1 elif isinstance(model, ColorRIDE): num_channels = 3 else: num_channels = model.num_channels num_pixels = (args.num_rows + args.margin) * (args.num_cols + args.margin * 2) # initialize image with white noise (but correct marginal distribution) img_init = [] for c in range(num_channels): indices = randint(data.size // num_channels, size=num_pixels) img_init.append( asarray(data[:, :, :, c].ravel()[indices], dtype=float).reshape( 1, args.num_rows + args.margin, args.num_cols + args.margin * 2, 1)) img_init = concatenate(img_init, 3) img_init[img_init < vmin] = vmin img_init[img_init > vmax] = vmax if isinstance(model, MultiscaleRIDE) or isinstance( model, ColorRIDE): data = model._transform(data) idx = randint(data.shape[0]) img = model.sample( img_init, # min_values=data[idx].min(1).min(0), # max_values=data[idx].max(1).max(0)) min_values=data.min(2).min(1).min(0), max_values=data.max(2).max(1).max(0)) else: # img_init[:] = img_init.mean() img = model.sample(img_init, min_values=percentile(data, .1), max_values=percentile(data, 99.8)) # min_values=percentile(data, 1.), # max_values=percentile(data, 96.)) if args.log: # linearize and gamma-correct img = power(exp(img), .45) vmin = power(exp(vmin), .45) vmax = power(exp(vmax), .45) try: savez(args.output.split('.')[0] + '.npz', sample=img) except: pass if args.margin > 0: img = img[:, args.margin:, args.margin:-args.margin] if num_channels == 1: imwrite( args.output, imformat(img[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) else: imwrite( args.output, imformat(img[0], vmin=vmin, vmax=vmax, symmetric=False)) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--patch_size', '-p', type=int, default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+') parser.add_argument('--row_multiplier', '-R', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--col_multiplier', '-C', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--num_patches', '-P', type=int, default=None, help='If given, subsample training data.') parser.add_argument('--num_valid', '-V', type=int, default=0, help='Number of training images used for validation error based early stopping.') parser.add_argument('--finetune', '-F', type=int, default=[1], nargs='+', help='Indicate iterations in which to finetune MCGSM with L-BFGS.') parser.add_argument('--learning_rate', '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+') parser.add_argument('--momentum', '-m', type=float, default=[.9], nargs='+') parser.add_argument('--batch_size', '-B', type=int, default=[50], nargs='+') parser.add_argument('--nb_size', '-b', type=int, default=5, help='Size of the causal neighborhood of pixels.') parser.add_argument('--num_hiddens', '-n', type=int, default=64) parser.add_argument('--num_components', '-c', type=int, default=32) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=32) parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+') parser.add_argument('--precondition', '-Q', type=int, default=1) parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+') parser.add_argument('--data', '-d', type=str, default='data/deadleaves_train.mat') parser.add_argument('--noise', '-N', type=float, default=None, help='Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).') parser.add_argument('--model', '-I', type=str, default='', help='Start with this model as initialization. Other flags will be ignored.') parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+') parser.add_argument('--train_top_layer', '-T', type=int, default=[0], nargs='+') parser.add_argument('--train_means', '-S', type=int, default=[0], nargs='+') parser.add_argument('--mode', '-q', type=str, default='CPU', choices=['CPU', 'GPU']) parser.add_argument('--device', '-D', type=int, default=0) parser.add_argument('--augment', '-A', type=int, default=1, help='Increase training set size by transforming data.') parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+') parser.add_argument('--output', '-o', type=str, default='results/deadleaves/') parser.add_argument('--patch_model', type=int, default=0, help='Train a patch-based model instead of a stochastic process.') parser.add_argument('--extended', '-X', type=int, default=0, help='Use extended version of spatial LSTM.') parser.add_argument('--multiscale', '-Y', type=int, default=0, help='Apply recurrent image model to multiscale representation of images.') parser.add_argument('--color', '-Z', type=int, default=0, help='Use separate models to model color and grayscale values.') args = parser.parse_args(argv[1:]) experiment = Experiment() if args.mode.upper() == 'GPU': caffe.set_mode_gpu() caffe.set_device(args.device) else: caffe.set_mode_cpu() # load data if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] data += rand(*data.shape) else: data = loadmat(args.data)['data'] if args.augment > 0: data = vstack([data, data[:, :, ::-1]]) if args.augment > 1: data = vstack([data, data[:, ::-1, :]]) if args.noise is not None: # add noise as a means for regularization data += randn(*data.shape) * (std(data, ddof=1) / args.noise) if args.num_valid > 0: if args.num_valid >= data.shape[0]: print 'Cannot use {0} for validation, there are only {1} training images.'.format( args.num_valid, data.shape[0]) return 1 # select subset for validation idx = random_select(args.num_valid, data.shape[0]) data_valid = data[idx] data = asarray([image for i, image in enumerate(data) if i not in idx]) print '{0} training images'.format(data.shape[0]) print '{0} validation images'.format(data_valid.shape[0]) patches_valid = [] patch_size = min([64, data.shape[1], data.shape[2]]) for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size): for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size): patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size]) patches_valid = vstack(patches_valid) if args.model: # load pretrained model results = Experiment(args.model) model = results['model'] loss = [results['loss']] if args.patch_model and not isinstance(model, PatchRIDE): model = PatchRIDE( model=model, num_rows=args.patch_size[0], num_cols=args.patch_size[0]) else: # create recurrent image model if args.patch_model: model = PatchRIDE( num_rows=args.patch_size[0], num_cols=args.patch_size[0], num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, model_class=ColorRIDE if args.color else RIDE) if args.extended: print 'Extended patch model not supported.' return 0 if args.multiscale: print 'Multiscale patch model not supported.' return 0 else: if args.multiscale: if data.ndim > 3 and data.shape[-1] > 1: print 'Multiscale color model not supported.' return 0 model = MultiscaleRIDE( num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) elif args.color: if data.ndim < 4 or data.shape[-1] != 3: print 'These images don\'t look like RGB images.' return 0 model = ColorRIDE( num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) else: model = RIDE( num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) loss = [] # compute initial performance loss_valid = [] if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) model_copy = deepcopy(model) for k, patch_size in enumerate(args.patch_size): if args.multiscale: patch_size *= 2 if k < len(args.add_layer): for _ in range(args.add_layer[k]): # add spatial LSTM to the network model.add_layer() # extract patches of given patch size patches = [] row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)] col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)] if isinstance(model, PatchRIDE): model.num_rows = row_size model.num_cols = col_size for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]): for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]): patches.append(data[:, i:i + row_size, j:j + col_size]) patches = vstack(patches) # randomize order of patches if args.num_patches is not None and args.num_patches < len(patches): patches = patches[random_select(args.num_patches, len(patches))] else: patches = patches[permutation(len(patches))] # determine batch size if args.method[k % len(args.method)].upper() == 'SFO': num_batches = int(max([25, sqrt(patches.shape[0]) / 5.])) batch_size = patches.shape[0] // num_batches else: batch_size = args.batch_size[k % len(args.batch_size)] if batch_size < 1: raise RuntimeError('Too little data.') print 'Patch size: {0}x{1}'.format(row_size, col_size) print 'Number of patches: {0}'.format(patches.shape[0]) print 'Batch size: {0}'.format(batch_size) # train recurrent image model print 'Training...' loss.append( model.train(patches, batch_size=batch_size, method=args.method[k % len(args.method)], num_epochs=args.num_epochs[k % len(args.num_epochs)], learning_rate=args.learning_rate[k % len(args.learning_rate)], momentum=args.momentum[k % len(args.momentum)], precondition=args.precondition > 0, train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0, train_means=args.train_means[k % len(args.train_means)] > 0)) if args.finetune[k % len(args.finetune)]: print 'Finetuning...' model.finetune(patches, num_samples_train=1000000, max_iter=500) if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) if loss_valid[-1] > loss_valid[-2]: print 'Performance got worse. Stopping optimization.' model = model_copy break print 'Copying model...' model_copy = deepcopy(model) experiment['batch_size'] = batch_size experiment['args'] = args experiment['model'] = model experiment['loss_valid'] = loss_valid experiment['loss'] = hstack(loss) if len(loss) > 0 else [] experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck')) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('model', type=str) parser.add_argument('--num_rows', '-r', type=int, default=256) parser.add_argument('--num_cols', '-c', type=int, default=256) parser.add_argument('--data', '-d', type=str, default=None) parser.add_argument('--log', '-L', type=int, default=0) parser.add_argument('--output', '-o', type=str, default='sample.png') parser.add_argument('--margin', '-M', type=int, default=8) args = parser.parse_args(argv[1:]) model = Experiment(args.model)['model'] if isinstance(model, PatchRIDE): img = model.sample()[0] imwrite(args.output, imformat(img, vmin=0, vmax=255, symmetric=False)) else: if args.data is None: # initialize image with white noise img_init = randn(1, args.num_rows + args.margin * 2, args.num_cols + args.margin * 2, sum(model.num_channels)) / 10. img = model.sample(img_init) if args.log: # linearize and gamma-correct img = power(exp(img), .45) if args.margin > 0: img = img[:, args.margin:-args.margin, args.margin:-args.margin] if img.shape[-1] == 3: img[img > 255.] = 255. img[img < 0.] = 0. imwrite(args.output, asarray(img[0, :, :, :], dtype='uint8')) else: imwrite(args.output, imformat(img[0, :, :, 0], perc=99)) else: if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] vmin, vmax = 0, 255 else: data = loadmat(args.data)['data'] vmin = percentile(data, 0.02) vmax = percentile(data, 98.) if data.ndim < 4: data = data[:, :, :, None] if isinstance(model, MultiscaleRIDE): num_channels = 1 elif isinstance(model, ColorRIDE): num_channels = 3 else: num_channels = model.num_channels num_pixels = (args.num_rows + args.margin) * (args.num_cols + args.margin * 2) # initialize image with white noise (but correct marginal distribution) img_init = [] for c in range(num_channels): indices = randint(data.size // num_channels, size=num_pixels) img_init.append( asarray(data[:, :, :, c].ravel()[indices], dtype=float).reshape( 1, args.num_rows + args.margin, args.num_cols + args.margin * 2, 1)) img_init = concatenate(img_init, 3) img_init[img_init < vmin] = vmin img_init[img_init > vmax] = vmax if isinstance(model, MultiscaleRIDE) or isinstance(model, ColorRIDE): data = model._transform(data) idx = randint(data.shape[0]) img = model.sample(img_init, # min_values=data[idx].min(1).min(0), # max_values=data[idx].max(1).max(0)) min_values=data.min(2).min(1).min(0), max_values=data.max(2).max(1).max(0)) else: # img_init[:] = img_init.mean() img = model.sample(img_init, min_values=percentile(data, .1), max_values=percentile(data, 99.8)) # min_values=percentile(data, 1.), # max_values=percentile(data, 96.)) if args.log: # linearize and gamma-correct img = power(exp(img), .45) vmin = power(exp(vmin), .45) vmax = power(exp(vmax), .45) try: savez(args.output.split('.')[0] + '.npz', sample=img) except: pass if args.margin > 0: img = img[:, args.margin:, args.margin:-args.margin] if num_channels == 1: imwrite(args.output, imformat(img[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) else: imwrite(args.output, imformat(img[0], vmin=vmin, vmax=vmax, symmetric=False)) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--patch_size', '-p', type=int, default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+') parser.add_argument('--row_multiplier', '-R', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--col_multiplier', '-C', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--num_patches', '-P', type=int, default=None, help='If given, subsample training data.') parser.add_argument( '--num_valid', '-V', type=int, default=0, help= 'Number of training images used for validation error based early stopping.' ) parser.add_argument( '--finetune', '-F', type=int, default=[1], nargs='+', help='Indicate iterations in which to finetune MCGSM with L-BFGS.') parser.add_argument('--learning_rate', '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+') parser.add_argument('--momentum', '-m', type=float, default=[.9], nargs='+') parser.add_argument('--batch_size', '-B', type=int, default=[50], nargs='+') parser.add_argument('--nb_size', '-b', type=int, default=5, help='Size of the causal neighborhood of pixels.') parser.add_argument('--num_hiddens', '-n', type=int, default=64) parser.add_argument('--num_components', '-c', type=int, default=32) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=32) parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+') parser.add_argument('--precondition', '-Q', type=int, default=1) parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+') parser.add_argument('--data', '-d', type=str, default='data/deadleaves_train.mat') parser.add_argument( '--noise', '-N', type=float, default=None, help= 'Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).' ) parser.add_argument( '--model', '-I', type=str, default='', help= 'Start with this model as initialization. Other flags will be ignored.' ) parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+') parser.add_argument('--train_top_layer', '-T', type=int, default=[0], nargs='+') parser.add_argument('--train_means', '-S', type=int, default=[0], nargs='+') parser.add_argument('--mode', '-q', type=str, default='CPU', choices=['CPU', 'GPU']) parser.add_argument('--device', '-D', type=int, default=0) parser.add_argument( '--augment', '-A', type=int, default=1, help='Increase training set size by transforming data.') parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+') parser.add_argument('--output', '-o', type=str, default='results/deadleaves/') parser.add_argument( '--patch_model', type=int, default=0, help='Train a patch-based model instead of a stochastic process.') parser.add_argument('--extended', '-X', type=int, default=0, help='Use extended version of spatial LSTM.') parser.add_argument( '--multiscale', '-Y', type=int, default=0, help= 'Apply recurrent image model to multiscale representation of images.') parser.add_argument( '--color', '-Z', type=int, default=0, help='Use separate models to model color and grayscale values.') args = parser.parse_args(argv[1:]) experiment = Experiment() if args.mode.upper() == 'GPU': caffe.set_mode_gpu() caffe.set_device(args.device) else: caffe.set_mode_cpu() # load data if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] data += rand(*data.shape) else: data = loadmat(args.data)['data'] if args.augment > 0: data = vstack([data, data[:, :, ::-1]]) if args.augment > 1: data = vstack([data, data[:, ::-1, :]]) if args.noise is not None: # add noise as a means for regularization data += randn(*data.shape) * (std(data, ddof=1) / args.noise) if args.num_valid > 0: if args.num_valid >= data.shape[0]: print 'Cannot use {0} for validation, there are only {1} training images.'.format( args.num_valid, data.shape[0]) return 1 # select subset for validation idx = random_select(args.num_valid, data.shape[0]) data_valid = data[idx] data = asarray([image for i, image in enumerate(data) if i not in idx]) print '{0} training images'.format(data.shape[0]) print '{0} validation images'.format(data_valid.shape[0]) patches_valid = [] patch_size = min([64, data.shape[1], data.shape[2]]) for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size): for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size): patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size]) patches_valid = vstack(patches_valid) if args.model: # load pretrained model results = Experiment(args.model) model = results['model'] loss = [results['loss']] if args.patch_model and not isinstance(model, PatchRIDE): model = PatchRIDE(model=model, num_rows=args.patch_size[0], num_cols=args.patch_size[0]) else: # create recurrent image model if args.patch_model: model = PatchRIDE( num_rows=args.patch_size[0], num_cols=args.patch_size[0], num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, model_class=ColorRIDE if args.color else RIDE) if args.extended: print 'Extended patch model not supported.' return 0 if args.multiscale: print 'Multiscale patch model not supported.' return 0 else: if args.multiscale: if data.ndim > 3 and data.shape[-1] > 1: print 'Multiscale color model not supported.' return 0 model = MultiscaleRIDE(num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) elif args.color: if data.ndim < 4 or data.shape[-1] != 3: print 'These images don\'t look like RGB images.' return 0 model = ColorRIDE(num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) else: model = RIDE( num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) loss = [] # compute initial performance loss_valid = [] if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) model_copy = deepcopy(model) for k, patch_size in enumerate(args.patch_size): if args.multiscale: patch_size *= 2 if k < len(args.add_layer): for _ in range(args.add_layer[k]): # add spatial LSTM to the network model.add_layer() # extract patches of given patch size patches = [] row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)] col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)] if isinstance(model, PatchRIDE): model.num_rows = row_size model.num_cols = col_size for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]): for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]): patches.append(data[:, i:i + row_size, j:j + col_size]) patches = vstack(patches) # randomize order of patches if args.num_patches is not None and args.num_patches < len(patches): patches = patches[random_select(args.num_patches, len(patches))] else: patches = patches[permutation(len(patches))] # determine batch size if args.method[k % len(args.method)].upper() == 'SFO': num_batches = int(max([25, sqrt(patches.shape[0]) / 5.])) batch_size = patches.shape[0] // num_batches else: batch_size = args.batch_size[k % len(args.batch_size)] if batch_size < 1: raise RuntimeError('Too little data.') print 'Patch size: {0}x{1}'.format(row_size, col_size) print 'Number of patches: {0}'.format(patches.shape[0]) print 'Batch size: {0}'.format(batch_size) # train recurrent image model print 'Training...' loss.append( model.train( patches, batch_size=batch_size, method=args.method[k % len(args.method)], num_epochs=args.num_epochs[k % len(args.num_epochs)], learning_rate=args.learning_rate[k % len(args.learning_rate)], momentum=args.momentum[k % len(args.momentum)], precondition=args.precondition > 0, train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0, train_means=args.train_means[k % len(args.train_means)] > 0)) if args.finetune[k % len(args.finetune)]: print 'Finetuning...' model.finetune(patches, num_samples_train=1000000, max_iter=500) if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) if loss_valid[-1] > loss_valid[-2]: print 'Performance got worse. Stopping optimization.' model = model_copy break print 'Copying model...' model_copy = deepcopy(model) experiment['batch_size'] = batch_size experiment['args'] = args experiment['model'] = model experiment['loss_valid'] = loss_valid experiment['loss'] = hstack(loss) if len(loss) > 0 else [] experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck')) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('image', type=str) parser.add_argument('model', type=str) parser.add_argument('--init', '-I', type=str, default=None) parser.add_argument('--index', '-x', type=int, default=0, help='Determines which image is used when whole dataset is given instead of image.') parser.add_argument('--fill_region', '-f', type=int, default=71) parser.add_argument('--outer_patch_size', '-p', type=int, default=19) parser.add_argument('--inner_patch_size', '-i', type=int, default=5) parser.add_argument('--stride', '-s', type=int, default=3) parser.add_argument('--candidates', '-C', type=int, default=5, help='The best initialization is taken out of this many initializations.') parser.add_argument('--num_epochs', '-e', type=int, default=1000) parser.add_argument('--method', '-m', type=str, default='SAMPLE', choices=['SAMPLE', 'MAP']) parser.add_argument('--step_width', '-l', type=float, default=100.) parser.add_argument('--output', '-o', type=str, default='results/inpainting/') parser.add_argument('--flip', '-F', type=int, default=0, help='If > 0, assume horizontal symmetry. If > 1, assume vertical symmetry.') args = parser.parse_args(argv[1:]) ### DATA # load image if args.image.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: image = imread(args.image)[None] vmin, vmax = 0, 255 else: image = loadmat(args.image)['data'][[args.index]] vmin, vmax = image.min(), image.max() if image.ndim < 4: image = image[:, :, :, None] image = asarray(image, dtype=float) imwrite(os.path.join(args.output, 'original.png'), imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) # remove center portion i_start = (image.shape[1] - args.fill_region) // 2 j_start = (image.shape[2] - args.fill_region) // 2 image[0, i_start:i_start + args.fill_region, j_start:j_start + args.fill_region, 0] = vmin + rand(args.fill_region, args.fill_region) * (vmax - vmin) imwrite(os.path.join(args.output, 'start.png'), imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) ### MODEL # load model model = Experiment(args.model)['model'] model.verbosity = False # use different models for sampling and likelihoods because of SLSTM caching model_copy = deepcopy(model) # create mask indicating pixels to replace M = args.outer_patch_size N = args.inner_patch_size m = (M - N) // 2 n = M - N - m patch_mask = zeros([M, M], dtype=bool) patch_mask[m:-n, m:-n] = True if args.init is None: candidates = [] logliks = [] for _ in range(args.candidates): # replace missing pixels by ancestral sampling patch = image[:, i_start - M:i_start + args.fill_region, j_start - M:j_start + args.fill_region + M] sample_mask = zeros([patch.shape[1], patch.shape[2]], dtype=bool) sample_mask[M:, M:-M] = True image[:, i_start - M:i_start + args.fill_region, j_start - M:j_start + args.fill_region + M] = model.sample(patch, mask=sample_mask, min_values=vmin, max_values=vmax) candidates.append(image.copy()) logliks.append(model.loglikelihood(image).sum()) image = candidates[argmax(logliks)] imwrite(os.path.join(args.output, 'fillin.0.png'), imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) start_epoch = 0 else: init = load(args.init) image = init['image'] start_epoch = init['epoch'] ### INPAINTING try: for epoch in range(start_epoch, args.num_epochs): print epoch h_flipped = False if args.flip > 0 and rand() < .5: print 'Horizontal flip.' # flip image horizontally image = image[:, :, ::-1] j_start = image.shape[2] - j_start - args.fill_region h_flipped = True v_flipped = False if args.flip > 0 and rand() < .5: print 'Vertical flip.' # flip image vertically image = image[:, ::-1, :] i_start = image.shape[1] - i_start - args.fill_region v_flipped = True for i in range(i_start - m, i_start - m + args.fill_region - N + 1, args.stride): for j in range(j_start - m, j_start - m + args.fill_region - N + 1, args.stride): patch = image[:, i:i + M, j:j + M] if args.method == 'SAMPLE': # proposal patch_pr, logq_pr = model.sample(patch.copy(), mask=patch_mask, min_values=vmin, max_values=vmax, return_loglik=True) # conditional log-density logq = model_copy._logq(patch, patch_mask) # joint log-densities logp = model_copy.loglikelihood(patch).sum() logp_pr = model_copy.loglikelihood(patch_pr).sum() if rand() < exp(logp_pr - logp - logq_pr + logq): # accept proposal patch[:] = patch_pr else: # gradient step grad = model.gradient(patch)[1] patch[:, patch_mask] += grad[:, patch_mask] * args.step_width # flip back if h_flipped: image = image[:, :, ::-1] j_start = image.shape[2] - j_start - args.fill_region if v_flipped: image = image[:, ::-1, :] i_start = image.shape[1] - i_start - args.fill_region imwrite(os.path.join(args.output, 'fillin.{0}.png'.format(epoch + 1)), imformat(image[0, :, :, 0], vmin=vmin, vmax=vmax, symmetric=False)) except KeyboardInterrupt: pass savez(os.path.join(args.output, 'final.npz'), image=image, epoch=epoch) return 0