def main(argv): experiment = Experiment() # load and preprocess data samples data = load('./data/vanhateren4x4.npz')['data'] data = preprocess(data) # train mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data, num_epochs=100) # split data batches = mixture.split(data) # Gaussianize data for k in range(len(mixture)): batches[k] = RadialGaussianization(mixture[k], symmetric=False)(batches[k]) # store results experiment.results['mixture'] = mixture experiment.results['batches'] = batches experiment.save('results/experiment01/experiment01a.{0}.{1}.xpck') return 0
def main(argv): preconditioners = [] models = [] for i in range(63): files = glob('results/BSDS300/snapshots/mcgsm_{0}_128.*.xpck'.format(i)) files.sort(key=os.path.getmtime) if len(files) == 0: print 'Could not find snapshot for model {0}.'.format(i) files = glob('results/BSDS300/snapshots/mcgsm_{0}_64.*.xpck'.format(i)) filepath = files[-1] print 'Using {0}.'.format(filepath) experiment = Experiment(filepath) preconditioners.append(experiment['preconditioners'][i]) models.append(experiment['models'][i]) experiment = Experiment() experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/mcgsm_128_merged.xpck', overwrite=True) return 0
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<experiment>', '[data_points]' return 0 experiment = Experiment() # range of data points evaluated if len(argv) < 3: fr, to = 0, 1000 else: if '-' in argv[2]: fr, to = argv[2].split('-') fr, to = int(fr), int(to) else: fr, to = 0, int(argv[2]) indices = range(fr, to) # load experiment with trained model results = Experiment(argv[1]) # generate test data data = load('data/vanhateren.{0}.0.npz'.format(results['parameters'][0]))['data'] data = preprocess(data, shuffle=False) # compute importance weights estimating likelihoods ais_weights = results['model'].loglikelihood(data[:, indices], num_samples=NUM_AIS_SAMPLES, sampling_method=('ais', {'num_steps': NUM_AIS_STEPS}), return_all=True) # average log-likelihood in [bit/pixel] loglik = mean(logmeanexp(ais_weights, 0)) / log(2.) / data.shape[0] sem = std(logmeanexp(ais_weights, 0), ddof=1) / log(2.) / data.shape[0] / sqrt(ais_weights.shape[1]) # store save results experiment['indices'] = indices experiment['ais_weights'] = ais_weights experiment['loglik'] = loglik experiment['sem'] = sem experiment['fixed'] = True experiment.save(argv[1][:-4] + '{0}-{1}.xpck'.format(fr, to)) return 0
def main(argv): experiment = Experiment() # load and preprocess data data = load('./data/vanhateren8x8.npz')['data'] data = preprocess(data) # train a mixture of Gaussian scale mixtures mixture = MoGSM(data.shape[0], 8, 4) mixture.train(data[:, :100000], num_epochs=100) # compute training error avglogloss = mixture.evaluate(data[:, 100000:]) # store results experiment.results['mixture'] = mixture experiment.results['avglogloss'] = avglogloss experiment.save('results/experiment01/experiment01b.{0}.{1}.xpck') return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--patch_size', '-p', type=int, default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+') parser.add_argument('--row_multiplier', '-R', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--col_multiplier', '-C', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--num_patches', '-P', type=int, default=None, help='If given, subsample training data.') parser.add_argument('--num_valid', '-V', type=int, default=0, help='Number of training images used for validation error based early stopping.') parser.add_argument('--finetune', '-F', type=int, default=[1], nargs='+', help='Indicate iterations in which to finetune MCGSM with L-BFGS.') parser.add_argument('--learning_rate', '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+') parser.add_argument('--momentum', '-m', type=float, default=[.9], nargs='+') parser.add_argument('--batch_size', '-B', type=int, default=[50], nargs='+') parser.add_argument('--nb_size', '-b', type=int, default=5, help='Size of the causal neighborhood of pixels.') parser.add_argument('--num_hiddens', '-n', type=int, default=64) parser.add_argument('--num_components', '-c', type=int, default=32) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=32) parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+') parser.add_argument('--precondition', '-Q', type=int, default=1) parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+') parser.add_argument('--data', '-d', type=str, default='data/deadleaves_train.mat') parser.add_argument('--noise', '-N', type=float, default=None, help='Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).') parser.add_argument('--model', '-I', type=str, default='', help='Start with this model as initialization. Other flags will be ignored.') parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+') parser.add_argument('--train_top_layer', '-T', type=int, default=[0], nargs='+') parser.add_argument('--train_means', '-S', type=int, default=[0], nargs='+') parser.add_argument('--mode', '-q', type=str, default='CPU', choices=['CPU', 'GPU']) parser.add_argument('--device', '-D', type=int, default=0) parser.add_argument('--augment', '-A', type=int, default=1, help='Increase training set size by transforming data.') parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+') parser.add_argument('--output', '-o', type=str, default='results/deadleaves/') parser.add_argument('--patch_model', type=int, default=0, help='Train a patch-based model instead of a stochastic process.') parser.add_argument('--extended', '-X', type=int, default=0, help='Use extended version of spatial LSTM.') parser.add_argument('--multiscale', '-Y', type=int, default=0, help='Apply recurrent image model to multiscale representation of images.') parser.add_argument('--color', '-Z', type=int, default=0, help='Use separate models to model color and grayscale values.') args = parser.parse_args(argv[1:]) experiment = Experiment() if args.mode.upper() == 'GPU': caffe.set_mode_gpu() caffe.set_device(args.device) else: caffe.set_mode_cpu() # load data if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] data += rand(*data.shape) else: data = loadmat(args.data)['data'] if args.augment > 0: data = vstack([data, data[:, :, ::-1]]) if args.augment > 1: data = vstack([data, data[:, ::-1, :]]) if args.noise is not None: # add noise as a means for regularization data += randn(*data.shape) * (std(data, ddof=1) / args.noise) if args.num_valid > 0: if args.num_valid >= data.shape[0]: print 'Cannot use {0} for validation, there are only {1} training images.'.format( args.num_valid, data.shape[0]) return 1 # select subset for validation idx = random_select(args.num_valid, data.shape[0]) data_valid = data[idx] data = asarray([image for i, image in enumerate(data) if i not in idx]) print '{0} training images'.format(data.shape[0]) print '{0} validation images'.format(data_valid.shape[0]) patches_valid = [] patch_size = min([64, data.shape[1], data.shape[2]]) for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size): for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size): patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size]) patches_valid = vstack(patches_valid) if args.model: # load pretrained model results = Experiment(args.model) model = results['model'] loss = [results['loss']] if args.patch_model and not isinstance(model, PatchRIDE): model = PatchRIDE( model=model, num_rows=args.patch_size[0], num_cols=args.patch_size[0]) else: # create recurrent image model if args.patch_model: model = PatchRIDE( num_rows=args.patch_size[0], num_cols=args.patch_size[0], num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, model_class=ColorRIDE if args.color else RIDE) if args.extended: print 'Extended patch model not supported.' return 0 if args.multiscale: print 'Multiscale patch model not supported.' return 0 else: if args.multiscale: if data.ndim > 3 and data.shape[-1] > 1: print 'Multiscale color model not supported.' return 0 model = MultiscaleRIDE( num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) elif args.color: if data.ndim < 4 or data.shape[-1] != 3: print 'These images don\'t look like RGB images.' return 0 model = ColorRIDE( num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) else: model = RIDE( num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) loss = [] # compute initial performance loss_valid = [] if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) model_copy = deepcopy(model) for k, patch_size in enumerate(args.patch_size): if args.multiscale: patch_size *= 2 if k < len(args.add_layer): for _ in range(args.add_layer[k]): # add spatial LSTM to the network model.add_layer() # extract patches of given patch size patches = [] row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)] col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)] if isinstance(model, PatchRIDE): model.num_rows = row_size model.num_cols = col_size for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]): for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]): patches.append(data[:, i:i + row_size, j:j + col_size]) patches = vstack(patches) # randomize order of patches if args.num_patches is not None and args.num_patches < len(patches): patches = patches[random_select(args.num_patches, len(patches))] else: patches = patches[permutation(len(patches))] # determine batch size if args.method[k % len(args.method)].upper() == 'SFO': num_batches = int(max([25, sqrt(patches.shape[0]) / 5.])) batch_size = patches.shape[0] // num_batches else: batch_size = args.batch_size[k % len(args.batch_size)] if batch_size < 1: raise RuntimeError('Too little data.') print 'Patch size: {0}x{1}'.format(row_size, col_size) print 'Number of patches: {0}'.format(patches.shape[0]) print 'Batch size: {0}'.format(batch_size) # train recurrent image model print 'Training...' loss.append( model.train(patches, batch_size=batch_size, method=args.method[k % len(args.method)], num_epochs=args.num_epochs[k % len(args.num_epochs)], learning_rate=args.learning_rate[k % len(args.learning_rate)], momentum=args.momentum[k % len(args.momentum)], precondition=args.precondition > 0, train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0, train_means=args.train_means[k % len(args.train_means)] > 0)) if args.finetune[k % len(args.finetune)]: print 'Finetuning...' model.finetune(patches, num_samples_train=1000000, max_iter=500) if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) if loss_valid[-1] > loss_valid[-2]: print 'Performance got worse. Stopping optimization.' model = model_copy break print 'Copying model...' model_copy = deepcopy(model) experiment['batch_size'] = batch_size experiment['args'] = args experiment['model'] = model experiment['loss_valid'] = loss_valid experiment['loss'] = hstack(loss) if len(loss) > 0 else [] experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck')) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/BSDS300_8x8.mat') parser.add_argument('--num_train', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--num_components', '-n', type=int, default=128) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--train_means', '-M', type=int, default=0) parser.add_argument('--indices', '-I', type=int, default=[], nargs='+') parser.add_argument('--initialize', '-i', type=str, default=None) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--max_iter', '-m', type=int, default=2000) args = parser.parse_args(argv[1:]) experiment = Experiment() data_train = loadmat(args.data)['patches_train'] data_valid = loadmat(args.data)['patches_valid'] if args.initialize: results = Experiment(args.initialize) models = results['models'] preconditioners = results['preconditioners'] else: models = [None] * data_train.shape[1] preconditioners = [None] * data_train.shape[1] def preprocess(data, i, N): if N > 0 and N < data.shape[0]: # select subset of data idx = random_select(N, data.shape[0]) return data[idx, :i].T, data[idx, i][None, :] return data.T[:i], data.T[[i]] for i in range(data_train.shape[1]): if args.indices and i not in args.indices: # skip this one continue print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1]) inputs_train, outputs_train = preprocess(data_train, i, args.num_train) inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid) if i > 0: if preconditioners[i] is None: preconditioners[i] = WhiteningPreconditioner(inputs_train, outputs_train) inputs_train, outputs_train = preconditioners[i](inputs_train, outputs_train) inputs_valid, outputs_valid = preconditioners[i](inputs_valid, outputs_valid) if models[i] is None: models[i] = MCGSM( dim_in=i, dim_out=1, num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) models[i].train( inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': 1, 'max_iter': args.max_iter, 'train_means': args.train_means > 0}) else: preconditioners[i] = None if models[i] is None: models[i] = MoGSM( dim=1, num_components=4, num_scales=8) models[i].train( outputs_train, outputs_valid, parameters={ 'verbosity': 1, 'threshold': -1., 'train_means': 1, 'max_iter': 100}) experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format(i, args.num_components)) if not args.indices: experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck') return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--patch_size', '-p', type=int, default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+') parser.add_argument('--row_multiplier', '-R', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--col_multiplier', '-C', type=int, default=[1], nargs='+', help='Can be used to train on elongated patches.') parser.add_argument('--num_patches', '-P', type=int, default=None, help='If given, subsample training data.') parser.add_argument( '--num_valid', '-V', type=int, default=0, help= 'Number of training images used for validation error based early stopping.' ) parser.add_argument( '--finetune', '-F', type=int, default=[1], nargs='+', help='Indicate iterations in which to finetune MCGSM with L-BFGS.') parser.add_argument('--learning_rate', '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+') parser.add_argument('--momentum', '-m', type=float, default=[.9], nargs='+') parser.add_argument('--batch_size', '-B', type=int, default=[50], nargs='+') parser.add_argument('--nb_size', '-b', type=int, default=5, help='Size of the causal neighborhood of pixels.') parser.add_argument('--num_hiddens', '-n', type=int, default=64) parser.add_argument('--num_components', '-c', type=int, default=32) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=32) parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+') parser.add_argument('--precondition', '-Q', type=int, default=1) parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+') parser.add_argument('--data', '-d', type=str, default='data/deadleaves_train.mat') parser.add_argument( '--noise', '-N', type=float, default=None, help= 'Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).' ) parser.add_argument( '--model', '-I', type=str, default='', help= 'Start with this model as initialization. Other flags will be ignored.' ) parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+') parser.add_argument('--train_top_layer', '-T', type=int, default=[0], nargs='+') parser.add_argument('--train_means', '-S', type=int, default=[0], nargs='+') parser.add_argument('--mode', '-q', type=str, default='CPU', choices=['CPU', 'GPU']) parser.add_argument('--device', '-D', type=int, default=0) parser.add_argument( '--augment', '-A', type=int, default=1, help='Increase training set size by transforming data.') parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+') parser.add_argument('--output', '-o', type=str, default='results/deadleaves/') parser.add_argument( '--patch_model', type=int, default=0, help='Train a patch-based model instead of a stochastic process.') parser.add_argument('--extended', '-X', type=int, default=0, help='Use extended version of spatial LSTM.') parser.add_argument( '--multiscale', '-Y', type=int, default=0, help= 'Apply recurrent image model to multiscale representation of images.') parser.add_argument( '--color', '-Z', type=int, default=0, help='Use separate models to model color and grayscale values.') args = parser.parse_args(argv[1:]) experiment = Experiment() if args.mode.upper() == 'GPU': caffe.set_mode_gpu() caffe.set_device(args.device) else: caffe.set_mode_cpu() # load data if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']: data = imread(args.data)[None] data += rand(*data.shape) else: data = loadmat(args.data)['data'] if args.augment > 0: data = vstack([data, data[:, :, ::-1]]) if args.augment > 1: data = vstack([data, data[:, ::-1, :]]) if args.noise is not None: # add noise as a means for regularization data += randn(*data.shape) * (std(data, ddof=1) / args.noise) if args.num_valid > 0: if args.num_valid >= data.shape[0]: print 'Cannot use {0} for validation, there are only {1} training images.'.format( args.num_valid, data.shape[0]) return 1 # select subset for validation idx = random_select(args.num_valid, data.shape[0]) data_valid = data[idx] data = asarray([image for i, image in enumerate(data) if i not in idx]) print '{0} training images'.format(data.shape[0]) print '{0} validation images'.format(data_valid.shape[0]) patches_valid = [] patch_size = min([64, data.shape[1], data.shape[2]]) for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size): for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size): patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size]) patches_valid = vstack(patches_valid) if args.model: # load pretrained model results = Experiment(args.model) model = results['model'] loss = [results['loss']] if args.patch_model and not isinstance(model, PatchRIDE): model = PatchRIDE(model=model, num_rows=args.patch_size[0], num_cols=args.patch_size[0]) else: # create recurrent image model if args.patch_model: model = PatchRIDE( num_rows=args.patch_size[0], num_cols=args.patch_size[0], num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, model_class=ColorRIDE if args.color else RIDE) if args.extended: print 'Extended patch model not supported.' return 0 if args.multiscale: print 'Multiscale patch model not supported.' return 0 else: if args.multiscale: if data.ndim > 3 and data.shape[-1] > 1: print 'Multiscale color model not supported.' return 0 model = MultiscaleRIDE(num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) elif args.color: if data.ndim < 4 or data.shape[-1] != 3: print 'These images don\'t look like RGB images.' return 0 model = ColorRIDE(num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) else: model = RIDE( num_channels=data.shape[-1] if data.ndim > 3 else 1, num_hiddens=args.num_hiddens, nb_size=args.nb_size, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features, extended=args.extended > 0) loss = [] # compute initial performance loss_valid = [] if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) model_copy = deepcopy(model) for k, patch_size in enumerate(args.patch_size): if args.multiscale: patch_size *= 2 if k < len(args.add_layer): for _ in range(args.add_layer[k]): # add spatial LSTM to the network model.add_layer() # extract patches of given patch size patches = [] row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)] col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)] if isinstance(model, PatchRIDE): model.num_rows = row_size model.num_cols = col_size for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]): for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]): patches.append(data[:, i:i + row_size, j:j + col_size]) patches = vstack(patches) # randomize order of patches if args.num_patches is not None and args.num_patches < len(patches): patches = patches[random_select(args.num_patches, len(patches))] else: patches = patches[permutation(len(patches))] # determine batch size if args.method[k % len(args.method)].upper() == 'SFO': num_batches = int(max([25, sqrt(patches.shape[0]) / 5.])) batch_size = patches.shape[0] // num_batches else: batch_size = args.batch_size[k % len(args.batch_size)] if batch_size < 1: raise RuntimeError('Too little data.') print 'Patch size: {0}x{1}'.format(row_size, col_size) print 'Number of patches: {0}'.format(patches.shape[0]) print 'Batch size: {0}'.format(batch_size) # train recurrent image model print 'Training...' loss.append( model.train( patches, batch_size=batch_size, method=args.method[k % len(args.method)], num_epochs=args.num_epochs[k % len(args.num_epochs)], learning_rate=args.learning_rate[k % len(args.learning_rate)], momentum=args.momentum[k % len(args.momentum)], precondition=args.precondition > 0, train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0, train_means=args.train_means[k % len(args.train_means)] > 0)) if args.finetune[k % len(args.finetune)]: print 'Finetuning...' model.finetune(patches, num_samples_train=1000000, max_iter=500) if args.num_valid > 0: print 'Computing validation loss...' loss_valid.append(model.evaluate(patches_valid)) if loss_valid[-1] > loss_valid[-2]: print 'Performance got worse. Stopping optimization.' model = model_copy break print 'Copying model...' model_copy = deepcopy(model) experiment['batch_size'] = batch_size experiment['args'] = args experiment['model'] = model experiment['loss_valid'] = loss_valid experiment['loss'] = hstack(loss) if len(loss) > 0 else [] experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck')) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/BSDS300_8x8.mat') parser.add_argument('--num_train', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--num_components', '-n', type=int, default=128) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--train_means', '-M', type=int, default=0) parser.add_argument('--indices', '-I', type=int, default=[], nargs='+') parser.add_argument('--initialize', '-i', type=str, default=None) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--max_iter', '-m', type=int, default=2000) args = parser.parse_args(argv[1:]) experiment = Experiment() data_train = loadmat(args.data)['patches_train'] data_valid = loadmat(args.data)['patches_valid'] if args.initialize: results = Experiment(args.initialize) models = results['models'] preconditioners = results['preconditioners'] else: models = [None] * data_train.shape[1] preconditioners = [None] * data_train.shape[1] def preprocess(data, i, N): if N > 0 and N < data.shape[0]: # select subset of data idx = random_select(N, data.shape[0]) return data[idx, :i].T, data[idx, i][None, :] return data.T[:i], data.T[[i]] for i in range(data_train.shape[1]): if args.indices and i not in args.indices: # skip this one continue print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1]) inputs_train, outputs_train = preprocess(data_train, i, args.num_train) inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid) if i > 0: if preconditioners[i] is None: preconditioners[i] = WhiteningPreconditioner( inputs_train, outputs_train) inputs_train, outputs_train = preconditioners[i](inputs_train, outputs_train) inputs_valid, outputs_valid = preconditioners[i](inputs_valid, outputs_valid) if models[i] is None: models[i] = MCGSM(dim_in=i, dim_out=1, num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) models[i].train(inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': 1, 'max_iter': args.max_iter, 'train_means': args.train_means > 0 }) else: preconditioners[i] = None if models[i] is None: models[i] = MoGSM(dim=1, num_components=4, num_scales=8) models[i].train(outputs_train, outputs_valid, parameters={ 'verbosity': 1, 'threshold': -1., 'train_means': 1, 'max_iter': 100 }) experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save( 'results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format( i, args.num_components)) if not args.indices: experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck') return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--num_components', '-c', type=int, default=20) parser.add_argument('--max_epochs', '-E', type=int, default=4) parser.add_argument('--max_iter_tr', '-m', type=int, default=5, help='Number of steps in the inner loop of the trust-region method.') parser.add_argument('--output', '-o', type=str, default='results/mnist/') args = parser.parse_args(argv[1:]) # create directories if necessary if not os.path.exists(args.output): os.makedirs(args.output) experiment = Experiment() data = load('data/mnist.npz')['train'] data = data[:, permutation(data.shape[1])] data = asarray(data, dtype=float) / 255. data = asarray(rand(*data.shape) < data, dtype=float, order='F') def callback(model): if model.num_updates * args.max_iter_tr % 25: return callback.num_updates.append(model.num_updates) callback.lower_bound.append(model.lower_bound(data)) print callback.lower_bound[-1] p = [] for k in range(len(model)): p.append(model[k].alpha / (model[k].alpha + model[k].beta)) p = hstack(p) imsave(os.path.join(args.output, 'mnist.{0}.png').format(callback.counter), stitch(p.T.reshape(-1, 28, 28), num_rows=4), cmap='gray', vmin=0., vmax=1.) callback.counter += 1 callback.counter = 0 callback.num_updates = [] callback.lower_bound = [] os.system('rm -f {0}'.format(os.path.join(args.output, 'mnist.*.png'))) try: model = MoBernoulli(dim=784, num_components=args.num_components) model.train(data, batch_size=200, max_epochs=args.max_epochs, max_iter_tr=args.max_iter_tr, tau=100., callback=callback) except KeyboardInterrupt: pass experiment['args'] = args experiment['model'] = model experiment['num_updates'] = callback.num_updates experiment['lower_bound'] = callback.lower_bound experiment.save(os.path.join(args.output, 'mnist.{0}.{1}.{{0}}.{{1}}.xpck').format( args.num_components, args.max_iter_tr)) os.system('ffmpeg -r 25 -i {1} -vcodec mjpeg -sameq {0}'.format( os.path.join( args.output, 'mnist.{0}.{1}.avi'.format(args.num_components, args.max_iter_tr)), os.path.join(args.output, 'mnist.%d.png'))) return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int( os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') # generate visible and corresponding hidden states Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # energy of posterior samples should be around this value energy = mean(ica.prior_energy(Y)) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior( X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2) xlabel('time in seconds') ylabel('average energy') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/toyexample/toyexample_trace.tex') return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/BSDS300_8x8.mat') parser.add_argument('--nb_size', '-b', type=int, default=5, help='Size of the causal neighborhood of pixels.') parser.add_argument('--num_train', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--num_hiddens', '-n', type=int, default=64) parser.add_argument('--num_components', '-c', type=int, default=32) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=32) parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+') parser.add_argument('--learning_rate', '-l', type=float, nargs='+', default=[.5, .1, .05, .01, .005, .001, 0.0005]) parser.add_argument('--batch_size', '-B', type=int, nargs='+', default=[50]) parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+') parser.add_argument('--finetune', '-F', type=int, default=[1], nargs='+', help='Indicate iterations in which to finetune MCGSM with L-BFGS.') parser.add_argument('--precondition', '-Q', type=int, default=1) parser.add_argument('--output', '-o', type=str, default='results/BSDS300/') args = parser.parse_args(argv[1:]) experiment = Experiment() print 'Loading data...' data_train = loadmat(args.data)['patches_train'] data_valid = loadmat(args.data)['patches_valid'] # reconstruct patches data_train = hstack([data_train, -sum(data_train, 1)[:, None]]) data_valid = hstack([data_valid, -sum(data_valid, 1)[:, None]]) patch_size = int(sqrt(data_train.shape[1]) + .5) data_train = data_train.reshape(-1, patch_size, patch_size) data_valid = data_valid.reshape(-1, patch_size, patch_size) print 'Creating model...' model = PatchRIDE( num_rows=8, num_cols=8, model_class=RIDE_BSDS300, # ensures the bottom-right pixel will be ignored nb_size=args.nb_size, num_hiddens=args.num_hiddens, num_components=args.num_components, num_scales=args.num_scales, num_features=args.num_features) print 'Evaluating...' loss = [] loss_valid = [] loss_valid.append(model.evaluate(data_valid)) for i, learning_rate in enumerate(args.learning_rate): print 'Training...' if i < len(args.add_layer): for _ in range(args.add_layer[i]): # add spatial LSTM to the network model.add_layer() # randomize patch order data_train = data_train[permutation(data_train.shape[0])] # store current parameters model_copy = deepcopy(model) # train loss.append( model.train(data_train, learning_rate=learning_rate, precondition=args.precondition > 0, batch_size=args.batch_size[i % len(args.batch_size)], num_epochs=args.num_epochs[i % len(args.num_epochs)])) print 'Evaluating...' # evaluate model loss_valid.append(model.evaluate(data_valid)) if loss_valid[-1] > loss_valid[-2]: # restore previous parameters model = model_copy print 'Performance got worse... Stopping optimization.' break # fine-tune if args.finetune[i % len(args.finetune)]: print 'Finetuning...' # store current parameters model_copy = deepcopy(model) model.finetune(data_train, num_samples_train=1000000, max_iter=500) print 'Evaluating...' loss_valid.append(model.evaluate(data_valid)) if loss_valid[-1] > loss_valid[-2]: print 'Performance got worse... Restoring parameters.' model = model_copy loss_valid[-1] = loss_valid[-2] experiment['args'] = args experiment['loss'] = loss experiment['loss_valid'] = loss_valid experiment['model'] = model experiment.save(os.path.join(args.output, 'patchrim.{0}.{1}.xpck')) return 0
def main(argv): experiment = Experiment() parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/vanhateren_deq2_train.mat') parser.add_argument('--num_data', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--input_size', '-i', type=int, default=9) parser.add_argument('--max_iter', '-I', type=int, default=3000) parser.add_argument('--num_components', '-c', type=int, default=128) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--output', '-o', type=str, default='results/vanhateren_deq2/mcgsm.{0}.{1}.xpck') args = parser.parse_args(argv[1:]) ### DATA HANDLING if args.verbosity > 0: print 'Loading data...' # load data images = loadmat(args.data)['data'] # define causal neighborhood input_mask, output_mask = generate_masks(input_size=args.input_size, output_size=1) # extract causal neighborhoods num_samples = int((args.num_data + args.num_valid) / images.shape[0] + .9) def extract(image): return generate_data_from_image(image, input_mask, output_mask, num_samples) inputs, outputs = zip(*mapp(extract, images)) inputs, outputs = hstack(inputs), hstack(outputs) inputs_train = inputs[:, :args.num_data] outputs_train = outputs[:, :args.num_data] inputs_valid = inputs[:, args.num_data:] outputs_valid = outputs[:, args.num_data:] if inputs_valid.size < 100: print 'Not enough data for validation.' inputs_valid = None outputs_valid = None ### MODEL TRAINING if args.verbosity > 0: print 'Preconditioning...' preconditioner = WhiteningPreconditioner(inputs_train, outputs_train) inputs_train, outputs_train = preconditioner(inputs_train, outputs_train) if inputs_valid is not None: inputs_valid, outputs_valid = preconditioner(inputs_valid, outputs_valid) # free memory del inputs del outputs if args.verbosity > 0: print 'Training model...' model = MCGSM(dim_in=inputs_train.shape[0], dim_out=outputs_train.shape[0], num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) def callback(i, mcgsm): experiment['args'] = args experiment['model'] = mcgsm experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) model.train(inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': args.verbosity, 'cb_iter': 500, 'callback': callback, 'max_iter': args.max_iter }) ### SAVE RESULTS experiment['args'] = args experiment['model'] = model experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) return 0
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<param_id>' print print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format( 'ID', 'PS', 'NS', 'TI', 'DC') for id, params in enumerate(parameters): print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format(id, *params) print print ' ID = parameter set' print ' PS = patch size' print ' NS = number of scales' print ' TI = number of training iterations' print ' DC = model DC component separately' return 0 # start experiment experiment = Experiment(server='10.38.138.150') # hyperparameters patch_size, num_scales, max_iter, separate_dc = parameters[int(argv[1])] ### DATA PREPROCESSING # load data, log-transform and center data data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = data[:, :100000] data = preprocess(data) ### MODEL DEFINITION AND TRAINING if separate_dc: # discrete cosine transform and symmetric whitening transform dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT') wt = WhiteningTransform(dct(data)[1:], symmetric=True) model = StackedModel(dct, ConcatModel( MoGaussian(20), StackedModel(wt, GSM(data.shape[0] - 1, num_scales)))) else: # symmetric whitening transform wt = WhiteningTransform(data, symmetric=True) model = StackedModel(wt, GSM(data.shape[0], num_scales)) ### MODEL TRAINING AND EVALUATION model.train(data, max_iter=max_iter, tol=1e-7) # load and preprocess test data data = load('data/vanhateren.{0}.0.npz'.format(patch_size))['data'] data = preprocess(data, shuffle=False) # log-likelihod in [bit/pixel] logliks = model.loglikelihood(data) / log(2.) / data.shape[0] loglik = mean(logliks) sem = std(logliks, ddof=1) / sqrt(logliks.shape[1]) print 'log-likelihood: {0:.4f} +- {1:.4f} [bit/pixel]'.format(loglik, sem) experiment['logliks'] = logliks experiment['loglik'] = loglik experiment['sem'] = sem experiment.save('results/vanhateren/gsm.{0}.{{0}}.{{1}}.xpck'.format(argv[1])) return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') # generate visible and corresponding hidden states Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # energy of posterior samples should be around this value energy = mean(ica.prior_energy(Y)) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2) xlabel('time in seconds') ylabel('average energy') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/toyexample/toyexample_trace.tex') return 0
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<param_id>', '[experiment]' print print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format( 'ID', 'PS', 'OC', 'TI', 'FI', 'LP', 'SC') for id, params in enumerate(parameters): print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format(id, *params) print print ' ID = parameter set' print ' PS = patch size' print ' OC = overcompleteness' print ' TI = number of training iterations' print ' FI = number of fine-tuning iterations' print ' LP = optimize marginal distributions' print ' SC = initialize with sparse coding' return 0 seterr(invalid='raise', over='raise', divide='raise') # start experiment experiment = Experiment() # hyperparameters patch_size, \ overcompleteness, \ max_iter, \ max_iter_ft, \ train_prior, \ sparse_coding = parameters[int(argv[1])] ### DATA PREPROCESSING # load data, log-transform and center data data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = data[:, :100000] data = preprocess(data) # discrete cosine transform and whitening transform dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT') wt = WhiteningTransform(dct(data)[1:], symmetric=True) ### MODEL DEFINITION isa = ISA(num_visibles=data.shape[0] - 1, num_hiddens=data.shape[0] * overcompleteness - 1, ssize=1) # model DC component with a mixture of Gaussians model = StackedModel(dct, ConcatModel(MoGaussian(20), StackedModel(wt, isa))) ### MODEL TRAINING # variables to store in results experiment['model'] = model experiment['parameters'] = parameters[int(argv[1])] def callback(phase, isa, iteration): """ Saves intermediate results every few iterations. """ if not iteration % 5: # whitened filters A = dot(dct.A[1:].T, isa.A) patch_size = int(sqrt(A.shape[0]) + 0.5) # save intermediate results experiment.save('results/vanhateren.{0}/results.{1}.{2}.xpck'.format(argv[1], phase, iteration)) # visualize basis imsave('results/vanhateren.{0}/basis.{1}.{2:0>3}.png'.format(argv[1], phase, iteration), stitch(imformat(A.T.reshape(-1, patch_size, patch_size)))) if len(argv) > 2: # initialize model with trained model results = Experiment(argv[2]) model = results['model'] isa = model.model[1].model dct = model.transforms[0] experiment['model'] = model else: # enable regularization of marginals for gsm in isa.subspaces: gsm.gamma = 1e-3 gsm.alpha = 2. gsm.beta = 1. # train mixture of Gaussians on DC component model.train(data, 0, max_iter=100) # initialize filters and marginals model.initialize(data, 1) model.initialize(model=1, method='laplace') experiment.progress(10) if sparse_coding: # initialize with sparse coding if patch_size == '16x16': model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.05, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.3, }), callback=lambda isa, iteration: callback(0, isa, iteration)) else: model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.1, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.5, }), callback=lambda isa, iteration: callback(0, isa, iteration)) isa.orthogonalize() else: if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # train model using a subset of the data model.train(data[:, :20000], 1, max_iter=max_iter, train_prior=train_prior, persistent=True, init_sampling_steps=5, method=('sgd', {'momentum': 0.8}), callback=lambda isa, iteration: callback(0, isa, iteration), sampling_method=('gibbs', {'num_steps': 1})) experiment.progress(50) if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # disable regularization for gsm in isa.subspaces: gsm.gamma = 0. # fine-tune model using all the data model.train(data, 1, max_iter=max_iter_ft, train_prior=train_prior, train_subspaces=False, persistent=True, init_sampling_steps=10 if not len(argv) > 2 and (sparse_coding or not train_prior) else 50, method=('lbfgs', {'max_fun': 50}), callback=lambda isa, iteration: callback(1, isa, iteration), sampling_method=('gibbs', {'num_steps': 2})) experiment.save('results/vanhateren/vanhateren.{0}.{{0}}.{{1}}.xpck'.format(argv[1])) return 0
def main(argv): experiment = Experiment() parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/vanhateren_deq2_train.mat') parser.add_argument('--num_data', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--input_size', '-i', type=int, default=9) parser.add_argument('--max_iter', '-I', type=int, default=3000) parser.add_argument('--num_components', '-c', type=int, default=128) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--output', '-o', type=str, default='results/vanhateren_deq2/mcgsm.{0}.{1}.xpck') args = parser.parse_args(argv[1:]) ### DATA HANDLING if args.verbosity > 0: print 'Loading data...' # load data images = loadmat(args.data)['data'] # define causal neighborhood input_mask, output_mask = generate_masks(input_size=args.input_size, output_size=1) # extract causal neighborhoods num_samples = int((args.num_data + args.num_valid) / images.shape[0] + .9) def extract(image): return generate_data_from_image( image, input_mask, output_mask, num_samples) inputs, outputs = zip(*mapp(extract, images)) inputs, outputs = hstack(inputs), hstack(outputs) inputs_train = inputs[:, :args.num_data] outputs_train = outputs[:, :args.num_data] inputs_valid = inputs[:, args.num_data:] outputs_valid = outputs[:, args.num_data:] if inputs_valid.size < 100: print 'Not enough data for validation.' inputs_valid = None outputs_valid = None ### MODEL TRAINING if args.verbosity > 0: print 'Preconditioning...' preconditioner = WhiteningPreconditioner(inputs_train, outputs_train) inputs_train, outputs_train = preconditioner(inputs_train, outputs_train) if inputs_valid is not None: inputs_valid, outputs_valid = preconditioner(inputs_valid, outputs_valid) # free memory del inputs del outputs if args.verbosity > 0: print 'Training model...' model = MCGSM( dim_in=inputs_train.shape[0], dim_out=outputs_train.shape[0], num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) def callback(i, mcgsm): experiment['args'] = args experiment['model'] = mcgsm experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) model.train( inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': args.verbosity, 'cb_iter': 500, 'callback': callback, 'max_iter': args.max_iter}) ### SAVE RESULTS experiment['args'] = args experiment['model'] = model experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') Y_ = ica.sample_prior(NUM_AUTOCORR) X_ = dot(ica.A, Y_) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # measure time required by transition operator start = time() # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # number of mcmc steps to run for this method num_mcmc_steps = int(NUM_SECONDS_RUN / duration + 1.) num_autocorr_steps = int(NUM_SECONDS_VIS / duration + 1.) # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 # posterior samples Y = [Y_] # Markov chain for i in range(num_mcmc_steps): Y.append(ica.sample_posterior(X_, method=(method['method'], dict(method['parameters'], Y=Y[-1])))) ac = [] for j in range(NUM_AUTOCORR): # collect samples belonging to one posterior distribution S = hstack([Y[k][:, [j]] for k in range(num_mcmc_steps)]) # compute autocorrelation for j-th posterior ac = [autocorr(S, num_autocorr_steps)] # average and plot autocorrelation functions plot(arange(num_autocorr_steps) * duration, mean(ac, 0), '-', color=method['color'], line_width=1.2, comment=str(method['parameters'])) xlabel('time in seconds') ylabel('autocorrelation') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS_VIS savefig('results/toyexample/toyexample_autocorr2.tex') return 0