Ejemplo n.º 1
0
def main(argv):
    experiment = Experiment()

    # load and preprocess data samples
    data = load('./data/vanhateren4x4.npz')['data']
    data = preprocess(data)

    # train mixture of Gaussian scale mixtures
    mixture = MoGSM(data.shape[0], 8, 4)
    mixture.train(data, num_epochs=100)

    # split data
    batches = mixture.split(data)

    # Gaussianize data
    for k in range(len(mixture)):
        batches[k] = RadialGaussianization(mixture[k],
                                           symmetric=False)(batches[k])

    # store results
    experiment.results['mixture'] = mixture
    experiment.results['batches'] = batches
    experiment.save('results/experiment01/experiment01a.{0}.{1}.xpck')

    return 0
Ejemplo n.º 2
0
def main(argv):
	preconditioners = []
	models = []

	for i in range(63):
		files = glob('results/BSDS300/snapshots/mcgsm_{0}_128.*.xpck'.format(i))
		files.sort(key=os.path.getmtime)

		if len(files) == 0:
			print 'Could not find snapshot for model {0}.'.format(i)
			files = glob('results/BSDS300/snapshots/mcgsm_{0}_64.*.xpck'.format(i))

		filepath = files[-1]

		print 'Using {0}.'.format(filepath)

		experiment = Experiment(filepath)

		preconditioners.append(experiment['preconditioners'][i])
		models.append(experiment['models'][i])

	experiment = Experiment()
	experiment['models'] = models
	experiment['preconditioners'] = preconditioners
	experiment.save('results/BSDS300/mcgsm_128_merged.xpck', overwrite=True)

	return 0
Ejemplo n.º 3
0
def main(argv):
	if len(argv) < 2:
		print 'Usage:', argv[0], '<experiment>', '[data_points]'
		return 0

	experiment = Experiment()

	# range of data points evaluated
	if len(argv) < 3:
		fr, to = 0, 1000
	else:
		if '-' in argv[2]:
			fr, to = argv[2].split('-')
			fr, to = int(fr), int(to)
		else:
			fr, to = 0, int(argv[2])

	indices = range(fr, to)

	# load experiment with trained model
	results = Experiment(argv[1])

	# generate test data
	data = load('data/vanhateren.{0}.0.npz'.format(results['parameters'][0]))['data']
	data = preprocess(data, shuffle=False)

	# compute importance weights estimating likelihoods
	ais_weights = results['model'].loglikelihood(data[:, indices],
		num_samples=NUM_AIS_SAMPLES, sampling_method=('ais', {'num_steps': NUM_AIS_STEPS}), return_all=True)

	# average log-likelihood in [bit/pixel]
	loglik = mean(logmeanexp(ais_weights, 0)) / log(2.) / data.shape[0]
	sem = std(logmeanexp(ais_weights, 0), ddof=1) / log(2.) / data.shape[0] / sqrt(ais_weights.shape[1])

	# store save results
	experiment['indices'] = indices
	experiment['ais_weights'] = ais_weights
	experiment['loglik'] = loglik
	experiment['sem'] = sem
	experiment['fixed'] = True
	experiment.save(argv[1][:-4] + '{0}-{1}.xpck'.format(fr, to))

	return 0
Ejemplo n.º 4
0
def main(argv):
	experiment = Experiment()

	# load and preprocess data
	data = load('./data/vanhateren8x8.npz')['data']
	data = preprocess(data)

	# train a mixture of Gaussian scale mixtures
	mixture = MoGSM(data.shape[0], 8, 4)
	mixture.train(data[:, :100000], num_epochs=100)

	# compute training error
	avglogloss = mixture.evaluate(data[:, 100000:])

	# store results
	experiment.results['mixture'] = mixture
	experiment.results['avglogloss'] = avglogloss
	experiment.save('results/experiment01/experiment01b.{0}.{1}.xpck')

	return 0
Ejemplo n.º 5
0
def main(argv):
    experiment = Experiment()

    # load and preprocess data
    data = load('./data/vanhateren8x8.npz')['data']
    data = preprocess(data)

    # train a mixture of Gaussian scale mixtures
    mixture = MoGSM(data.shape[0], 8, 4)
    mixture.train(data[:, :100000], num_epochs=100)

    # compute training error
    avglogloss = mixture.evaluate(data[:, 100000:])

    # store results
    experiment.results['mixture'] = mixture
    experiment.results['avglogloss'] = avglogloss
    experiment.save('results/experiment01/experiment01b.{0}.{1}.xpck')

    return 0
Ejemplo n.º 6
0
def main(argv):
	experiment = Experiment()

	# load and preprocess data samples
	data = load('./data/vanhateren4x4.npz')['data']
	data = preprocess(data)

	# train mixture of Gaussian scale mixtures
	mixture = MoGSM(data.shape[0], 8, 4)
	mixture.train(data, num_epochs=100)

	# split data
	batches = mixture.split(data)

	# Gaussianize data
	for k in range(len(mixture)):
		batches[k] = RadialGaussianization(mixture[k], symmetric=False)(batches[k])

	# store results
	experiment.results['mixture'] = mixture
	experiment.results['batches'] = batches
	experiment.save('results/experiment01/experiment01a.{0}.{1}.xpck')

	return 0
Ejemplo n.º 7
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--patch_size',      '-p', type=int,   default=[8, 10, 12, 14, 16, 18, 20, 22], nargs='+')
	parser.add_argument('--row_multiplier',  '-R', type=int, default=[1], nargs='+',
		help='Can be used to train on elongated patches.')
	parser.add_argument('--col_multiplier',  '-C', type=int, default=[1], nargs='+',
		help='Can be used to train on elongated patches.')
	parser.add_argument('--num_patches',     '-P', type=int,   default=None,
		help='If given, subsample training data.')
	parser.add_argument('--num_valid',       '-V', type=int,   default=0,
		help='Number of training images used for validation error based early stopping.')
	parser.add_argument('--finetune',        '-F', type=int,   default=[1], nargs='+',
		help='Indicate iterations in which to finetune MCGSM with L-BFGS.')
	parser.add_argument('--learning_rate',   '-l', type=float, default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005], nargs='+')
	parser.add_argument('--momentum',        '-m', type=float, default=[.9], nargs='+')
	parser.add_argument('--batch_size',      '-B', type=int,   default=[50], nargs='+')
	parser.add_argument('--nb_size',         '-b', type=int,   default=5,
		help='Size of the causal neighborhood of pixels.')
	parser.add_argument('--num_hiddens',     '-n', type=int,   default=64)
	parser.add_argument('--num_components',  '-c', type=int,   default=32)
	parser.add_argument('--num_scales',      '-s', type=int,   default=4)
	parser.add_argument('--num_features',    '-f', type=int,   default=32)
	parser.add_argument('--num_epochs',      '-e', type=int,   default=[1], nargs='+')
	parser.add_argument('--precondition',    '-Q', type=int,   default=1)
	parser.add_argument('--method',          '-M', type=str,   default=['SGD'], nargs='+')
	parser.add_argument('--data',            '-d', type=str,   default='data/deadleaves_train.mat')
	parser.add_argument('--noise',           '-N', type=float, default=None,
		help='Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).')
	parser.add_argument('--model',           '-I', type=str,   default='',
		help='Start with this model as initialization. Other flags will be ignored.')
	parser.add_argument('--add_layer',       '-a', type=int,   default=[0], nargs='+')
	parser.add_argument('--train_top_layer', '-T', type=int,   default=[0], nargs='+')
	parser.add_argument('--train_means',     '-S', type=int,   default=[0], nargs='+')
	parser.add_argument('--mode',            '-q', type=str,   default='CPU', choices=['CPU', 'GPU'])
	parser.add_argument('--device',          '-D', type=int,   default=0)
	parser.add_argument('--augment',         '-A', type=int,   default=1,
		help='Increase training set size by transforming data.')
	parser.add_argument('--overlap',         '-O', type=int,   default=[1], nargs='+')
	parser.add_argument('--output',          '-o', type=str,   default='results/deadleaves/')
	parser.add_argument('--patch_model',           type=int,   default=0,
		help='Train a patch-based model instead of a stochastic process.')
	parser.add_argument('--extended',        '-X', type=int,   default=0,
		help='Use extended version of spatial LSTM.')
	parser.add_argument('--multiscale',      '-Y', type=int,   default=0,
		help='Apply recurrent image model to multiscale representation of images.')
	parser.add_argument('--color',           '-Z', type=int,   default=0,
		help='Use separate models to model color and grayscale values.')

	args = parser.parse_args(argv[1:])

	experiment = Experiment()

	if args.mode.upper() == 'GPU':
		caffe.set_mode_gpu()
		caffe.set_device(args.device)
	else:
		caffe.set_mode_cpu()

	# load data
	if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
		data = imread(args.data)[None]
		data += rand(*data.shape)
	else:
		data = loadmat(args.data)['data']

	if args.augment > 0:
		data = vstack([data, data[:, :, ::-1]])
	if args.augment > 1:
		data = vstack([data, data[:, ::-1, :]])

	if args.noise is not None:
		# add noise as a means for regularization
		data += randn(*data.shape) * (std(data, ddof=1) / args.noise)

	if args.num_valid > 0:
		if args.num_valid >= data.shape[0]:
			print 'Cannot use {0} for validation, there are only {1} training images.'.format(
					args.num_valid, data.shape[0])
			return 1

		# select subset for validation
		idx = random_select(args.num_valid, data.shape[0])
		data_valid = data[idx]
		data = asarray([image for i, image in enumerate(data) if i not in idx])

		print '{0} training images'.format(data.shape[0])
		print '{0} validation images'.format(data_valid.shape[0])

		patches_valid = []
		patch_size = min([64, data.shape[1], data.shape[2]])
		for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size):
			for j in range(0, data_valid.shape[2] - patch_size + 1, patch_size):
				patches_valid.append(data_valid[:, i:i + patch_size, j:j + patch_size])
		patches_valid = vstack(patches_valid)

	if args.model:
		# load pretrained model
		results = Experiment(args.model)
		model = results['model']
		loss = [results['loss']]

		if args.patch_model and not isinstance(model, PatchRIDE):
			model = PatchRIDE(
				model=model,
				num_rows=args.patch_size[0],
				num_cols=args.patch_size[0])

	else:
		# create recurrent image model
		if args.patch_model:
			model = PatchRIDE(
				num_rows=args.patch_size[0],
				num_cols=args.patch_size[0],
				num_channels=data.shape[-1] if data.ndim > 3 else 1,
				num_hiddens=args.num_hiddens,
				nb_size=args.nb_size,
				num_components=args.num_components,
				num_scales=args.num_scales,
				num_features=args.num_features,
				model_class=ColorRIDE if args.color else RIDE)

			if args.extended:
				print 'Extended patch model not supported.'
				return 0

			if args.multiscale:
				print 'Multiscale patch model not supported.'
				return 0

		else:
			if args.multiscale:
				if data.ndim > 3 and data.shape[-1] > 1:
					print 'Multiscale color model not supported.'
					return 0

				model = MultiscaleRIDE(
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

			elif args.color:
				if data.ndim < 4 or data.shape[-1] != 3:
					print 'These images don\'t look like RGB images.'
					return 0
				model = ColorRIDE(
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

			else:
				model = RIDE(
					num_channels=data.shape[-1] if data.ndim > 3 else 1,
					num_hiddens=args.num_hiddens,
					nb_size=args.nb_size,
					num_components=args.num_components,
					num_scales=args.num_scales,
					num_features=args.num_features,
					extended=args.extended > 0)

		loss = []

	# compute initial performance
	loss_valid = []

	if args.num_valid > 0:
		print 'Computing validation loss...'
		loss_valid.append(model.evaluate(patches_valid))
		model_copy = deepcopy(model)

	for k, patch_size in enumerate(args.patch_size):
		if args.multiscale:
			patch_size *= 2

		if k < len(args.add_layer):
			for _ in range(args.add_layer[k]):
				# add spatial LSTM to the network
				model.add_layer()

		# extract patches of given patch size
		patches = []

		row_size = patch_size * args.row_multiplier[k % len(args.row_multiplier)]
		col_size = patch_size * args.col_multiplier[k % len(args.col_multiplier)]

		if isinstance(model, PatchRIDE):
			model.num_rows = row_size
			model.num_cols = col_size

		for i in range(0, data.shape[1] - row_size + 1, row_size / args.overlap[k % len(args.overlap)]):
			for j in range(0, data.shape[2] - col_size + 1, col_size / args.overlap[k % len(args.overlap)]):
				patches.append(data[:, i:i + row_size, j:j + col_size])
		patches = vstack(patches)

		# randomize order of patches
		if args.num_patches is not None and args.num_patches < len(patches):
			patches = patches[random_select(args.num_patches, len(patches))]
		else:
			patches = patches[permutation(len(patches))]

		# determine batch size
		if args.method[k % len(args.method)].upper() == 'SFO':
			num_batches = int(max([25, sqrt(patches.shape[0]) / 5.]))
			batch_size = patches.shape[0] // num_batches
		else:
			batch_size = args.batch_size[k % len(args.batch_size)]

		if batch_size < 1:
			raise RuntimeError('Too little data.')

		print 'Patch size: {0}x{1}'.format(row_size, col_size)
		print 'Number of patches: {0}'.format(patches.shape[0])
		print 'Batch size: {0}'.format(batch_size)

		# train recurrent image model
		print 'Training...'
		loss.append(
			model.train(patches,
				batch_size=batch_size,
				method=args.method[k % len(args.method)],
				num_epochs=args.num_epochs[k % len(args.num_epochs)],
				learning_rate=args.learning_rate[k % len(args.learning_rate)],
				momentum=args.momentum[k % len(args.momentum)],
				precondition=args.precondition > 0,
				train_top_layer=args.train_top_layer[k % len(args.train_top_layer)] > 0,
				train_means=args.train_means[k % len(args.train_means)] > 0))

		if args.finetune[k % len(args.finetune)]:
			print 'Finetuning...'
			model.finetune(patches, num_samples_train=1000000, max_iter=500)

		if args.num_valid > 0:
			print 'Computing validation loss...'
			loss_valid.append(model.evaluate(patches_valid))

			if loss_valid[-1] > loss_valid[-2]:
				print 'Performance got worse. Stopping optimization.'
				model = model_copy
				break

			print 'Copying model...'

			model_copy = deepcopy(model)

		experiment['batch_size'] = batch_size
		experiment['args'] = args
		experiment['model'] = model
		experiment['loss_valid'] = loss_valid
		experiment['loss'] = hstack(loss) if len(loss) > 0 else []
		experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck'))

	return 0
Ejemplo n.º 8
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--data',           '-d', type=str, default='data/BSDS300_8x8.mat')
	parser.add_argument('--num_train',      '-N', type=int, default=1000000)
	parser.add_argument('--num_valid',      '-V', type=int, default=200000)
	parser.add_argument('--num_components', '-n', type=int, default=128)
	parser.add_argument('--num_scales',     '-s', type=int, default=4)
	parser.add_argument('--num_features',   '-f', type=int, default=48)
	parser.add_argument('--train_means',    '-M', type=int, default=0)
	parser.add_argument('--indices',        '-I', type=int, default=[], nargs='+')
	parser.add_argument('--initialize',     '-i', type=str, default=None)
	parser.add_argument('--verbosity',      '-v', type=int, default=1)
	parser.add_argument('--max_iter',       '-m', type=int, default=2000)

	args = parser.parse_args(argv[1:])

	experiment = Experiment()

	data_train = loadmat(args.data)['patches_train']
	data_valid = loadmat(args.data)['patches_valid']

	if args.initialize:
		results = Experiment(args.initialize)
		models = results['models']
		preconditioners = results['preconditioners']
	else:
		models = [None] * data_train.shape[1]
		preconditioners = [None] * data_train.shape[1]

	def preprocess(data, i, N):
		if N > 0 and N < data.shape[0]:
			# select subset of data
			idx = random_select(N, data.shape[0])
			return data[idx, :i].T, data[idx, i][None, :]
		return data.T[:i], data.T[[i]]

	for i in range(data_train.shape[1]):
		if args.indices and i not in args.indices:
			# skip this one
			continue

		print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1])

		inputs_train, outputs_train = preprocess(data_train, i, args.num_train)
		inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid)

		if i > 0:
			if preconditioners[i] is None:
				preconditioners[i] = WhiteningPreconditioner(inputs_train, outputs_train)

			inputs_train, outputs_train = preconditioners[i](inputs_train, outputs_train)
			inputs_valid, outputs_valid = preconditioners[i](inputs_valid, outputs_valid)

			if models[i] is None:
				models[i] = MCGSM(
					dim_in=i,
					dim_out=1,
					num_components=args.num_components,
					num_features=args.num_features,
					num_scales=args.num_scales)
			models[i].train(
				inputs_train, outputs_train,
				inputs_valid, outputs_valid,
				parameters={
					'verbosity': 1,
					'max_iter': args.max_iter,
					'train_means': args.train_means > 0})
		else:
			preconditioners[i] = None

			if models[i] is None:
				models[i] = MoGSM(
					dim=1,
					num_components=4,
					num_scales=8)
			models[i].train(
				outputs_train,
				outputs_valid,
				parameters={
					'verbosity': 1,
					'threshold': -1.,
					'train_means': 1,
					'max_iter': 100})

		experiment['args'] = args
		experiment['models'] = models
		experiment['preconditioners'] = preconditioners
		experiment.save('results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format(i, args.num_components))

	if not args.indices:
		experiment['args'] = args
		experiment['models'] = models
		experiment['preconditioners'] = preconditioners
		experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck')

	return 0
Ejemplo n.º 9
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--patch_size',
                        '-p',
                        type=int,
                        default=[8, 10, 12, 14, 16, 18, 20, 22],
                        nargs='+')
    parser.add_argument('--row_multiplier',
                        '-R',
                        type=int,
                        default=[1],
                        nargs='+',
                        help='Can be used to train on elongated patches.')
    parser.add_argument('--col_multiplier',
                        '-C',
                        type=int,
                        default=[1],
                        nargs='+',
                        help='Can be used to train on elongated patches.')
    parser.add_argument('--num_patches',
                        '-P',
                        type=int,
                        default=None,
                        help='If given, subsample training data.')
    parser.add_argument(
        '--num_valid',
        '-V',
        type=int,
        default=0,
        help=
        'Number of training images used for validation error based early stopping.'
    )
    parser.add_argument(
        '--finetune',
        '-F',
        type=int,
        default=[1],
        nargs='+',
        help='Indicate iterations in which to finetune MCGSM with L-BFGS.')
    parser.add_argument('--learning_rate',
                        '-l',
                        type=float,
                        default=[1., .5, .1, .05, .01, 0.005, 0.001, 0.0005],
                        nargs='+')
    parser.add_argument('--momentum',
                        '-m',
                        type=float,
                        default=[.9],
                        nargs='+')
    parser.add_argument('--batch_size',
                        '-B',
                        type=int,
                        default=[50],
                        nargs='+')
    parser.add_argument('--nb_size',
                        '-b',
                        type=int,
                        default=5,
                        help='Size of the causal neighborhood of pixels.')
    parser.add_argument('--num_hiddens', '-n', type=int, default=64)
    parser.add_argument('--num_components', '-c', type=int, default=32)
    parser.add_argument('--num_scales', '-s', type=int, default=4)
    parser.add_argument('--num_features', '-f', type=int, default=32)
    parser.add_argument('--num_epochs', '-e', type=int, default=[1], nargs='+')
    parser.add_argument('--precondition', '-Q', type=int, default=1)
    parser.add_argument('--method', '-M', type=str, default=['SGD'], nargs='+')
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='data/deadleaves_train.mat')
    parser.add_argument(
        '--noise',
        '-N',
        type=float,
        default=None,
        help=
        'Standard deviation of Gaussian noise added to data before training (as fraction of data standard deviation).'
    )
    parser.add_argument(
        '--model',
        '-I',
        type=str,
        default='',
        help=
        'Start with this model as initialization. Other flags will be ignored.'
    )
    parser.add_argument('--add_layer', '-a', type=int, default=[0], nargs='+')
    parser.add_argument('--train_top_layer',
                        '-T',
                        type=int,
                        default=[0],
                        nargs='+')
    parser.add_argument('--train_means',
                        '-S',
                        type=int,
                        default=[0],
                        nargs='+')
    parser.add_argument('--mode',
                        '-q',
                        type=str,
                        default='CPU',
                        choices=['CPU', 'GPU'])
    parser.add_argument('--device', '-D', type=int, default=0)
    parser.add_argument(
        '--augment',
        '-A',
        type=int,
        default=1,
        help='Increase training set size by transforming data.')
    parser.add_argument('--overlap', '-O', type=int, default=[1], nargs='+')
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        default='results/deadleaves/')
    parser.add_argument(
        '--patch_model',
        type=int,
        default=0,
        help='Train a patch-based model instead of a stochastic process.')
    parser.add_argument('--extended',
                        '-X',
                        type=int,
                        default=0,
                        help='Use extended version of spatial LSTM.')
    parser.add_argument(
        '--multiscale',
        '-Y',
        type=int,
        default=0,
        help=
        'Apply recurrent image model to multiscale representation of images.')
    parser.add_argument(
        '--color',
        '-Z',
        type=int,
        default=0,
        help='Use separate models to model color and grayscale values.')

    args = parser.parse_args(argv[1:])

    experiment = Experiment()

    if args.mode.upper() == 'GPU':
        caffe.set_mode_gpu()
        caffe.set_device(args.device)
    else:
        caffe.set_mode_cpu()

    # load data
    if args.data.lower()[-4:] in ['.gif', '.png', '.jpg', 'jpeg']:
        data = imread(args.data)[None]
        data += rand(*data.shape)
    else:
        data = loadmat(args.data)['data']

    if args.augment > 0:
        data = vstack([data, data[:, :, ::-1]])
    if args.augment > 1:
        data = vstack([data, data[:, ::-1, :]])

    if args.noise is not None:
        # add noise as a means for regularization
        data += randn(*data.shape) * (std(data, ddof=1) / args.noise)

    if args.num_valid > 0:
        if args.num_valid >= data.shape[0]:
            print 'Cannot use {0} for validation, there are only {1} training images.'.format(
                args.num_valid, data.shape[0])
            return 1

        # select subset for validation
        idx = random_select(args.num_valid, data.shape[0])
        data_valid = data[idx]
        data = asarray([image for i, image in enumerate(data) if i not in idx])

        print '{0} training images'.format(data.shape[0])
        print '{0} validation images'.format(data_valid.shape[0])

        patches_valid = []
        patch_size = min([64, data.shape[1], data.shape[2]])
        for i in range(0, data_valid.shape[1] - patch_size + 1, patch_size):
            for j in range(0, data_valid.shape[2] - patch_size + 1,
                           patch_size):
                patches_valid.append(data_valid[:, i:i + patch_size,
                                                j:j + patch_size])
        patches_valid = vstack(patches_valid)

    if args.model:
        # load pretrained model
        results = Experiment(args.model)
        model = results['model']
        loss = [results['loss']]

        if args.patch_model and not isinstance(model, PatchRIDE):
            model = PatchRIDE(model=model,
                              num_rows=args.patch_size[0],
                              num_cols=args.patch_size[0])

    else:
        # create recurrent image model
        if args.patch_model:
            model = PatchRIDE(
                num_rows=args.patch_size[0],
                num_cols=args.patch_size[0],
                num_channels=data.shape[-1] if data.ndim > 3 else 1,
                num_hiddens=args.num_hiddens,
                nb_size=args.nb_size,
                num_components=args.num_components,
                num_scales=args.num_scales,
                num_features=args.num_features,
                model_class=ColorRIDE if args.color else RIDE)

            if args.extended:
                print 'Extended patch model not supported.'
                return 0

            if args.multiscale:
                print 'Multiscale patch model not supported.'
                return 0

        else:
            if args.multiscale:
                if data.ndim > 3 and data.shape[-1] > 1:
                    print 'Multiscale color model not supported.'
                    return 0

                model = MultiscaleRIDE(num_hiddens=args.num_hiddens,
                                       nb_size=args.nb_size,
                                       num_components=args.num_components,
                                       num_scales=args.num_scales,
                                       num_features=args.num_features,
                                       extended=args.extended > 0)

            elif args.color:
                if data.ndim < 4 or data.shape[-1] != 3:
                    print 'These images don\'t look like RGB images.'
                    return 0
                model = ColorRIDE(num_hiddens=args.num_hiddens,
                                  nb_size=args.nb_size,
                                  num_components=args.num_components,
                                  num_scales=args.num_scales,
                                  num_features=args.num_features,
                                  extended=args.extended > 0)

            else:
                model = RIDE(
                    num_channels=data.shape[-1] if data.ndim > 3 else 1,
                    num_hiddens=args.num_hiddens,
                    nb_size=args.nb_size,
                    num_components=args.num_components,
                    num_scales=args.num_scales,
                    num_features=args.num_features,
                    extended=args.extended > 0)

        loss = []

    # compute initial performance
    loss_valid = []

    if args.num_valid > 0:
        print 'Computing validation loss...'
        loss_valid.append(model.evaluate(patches_valid))
        model_copy = deepcopy(model)

    for k, patch_size in enumerate(args.patch_size):
        if args.multiscale:
            patch_size *= 2

        if k < len(args.add_layer):
            for _ in range(args.add_layer[k]):
                # add spatial LSTM to the network
                model.add_layer()

        # extract patches of given patch size
        patches = []

        row_size = patch_size * args.row_multiplier[k %
                                                    len(args.row_multiplier)]
        col_size = patch_size * args.col_multiplier[k %
                                                    len(args.col_multiplier)]

        if isinstance(model, PatchRIDE):
            model.num_rows = row_size
            model.num_cols = col_size

        for i in range(0, data.shape[1] - row_size + 1,
                       row_size / args.overlap[k % len(args.overlap)]):
            for j in range(0, data.shape[2] - col_size + 1,
                           col_size / args.overlap[k % len(args.overlap)]):
                patches.append(data[:, i:i + row_size, j:j + col_size])
        patches = vstack(patches)

        # randomize order of patches
        if args.num_patches is not None and args.num_patches < len(patches):
            patches = patches[random_select(args.num_patches, len(patches))]
        else:
            patches = patches[permutation(len(patches))]

        # determine batch size
        if args.method[k % len(args.method)].upper() == 'SFO':
            num_batches = int(max([25, sqrt(patches.shape[0]) / 5.]))
            batch_size = patches.shape[0] // num_batches
        else:
            batch_size = args.batch_size[k % len(args.batch_size)]

        if batch_size < 1:
            raise RuntimeError('Too little data.')

        print 'Patch size: {0}x{1}'.format(row_size, col_size)
        print 'Number of patches: {0}'.format(patches.shape[0])
        print 'Batch size: {0}'.format(batch_size)

        # train recurrent image model
        print 'Training...'
        loss.append(
            model.train(
                patches,
                batch_size=batch_size,
                method=args.method[k % len(args.method)],
                num_epochs=args.num_epochs[k % len(args.num_epochs)],
                learning_rate=args.learning_rate[k % len(args.learning_rate)],
                momentum=args.momentum[k % len(args.momentum)],
                precondition=args.precondition > 0,
                train_top_layer=args.train_top_layer[k %
                                                     len(args.train_top_layer)]
                > 0,
                train_means=args.train_means[k % len(args.train_means)] > 0))

        if args.finetune[k % len(args.finetune)]:
            print 'Finetuning...'
            model.finetune(patches, num_samples_train=1000000, max_iter=500)

        if args.num_valid > 0:
            print 'Computing validation loss...'
            loss_valid.append(model.evaluate(patches_valid))

            if loss_valid[-1] > loss_valid[-2]:
                print 'Performance got worse. Stopping optimization.'
                model = model_copy
                break

            print 'Copying model...'

            model_copy = deepcopy(model)

        experiment['batch_size'] = batch_size
        experiment['args'] = args
        experiment['model'] = model
        experiment['loss_valid'] = loss_valid
        experiment['loss'] = hstack(loss) if len(loss) > 0 else []
        experiment.save(os.path.join(args.output, 'rim.{0}.{1}.xpck'))

    return 0
Ejemplo n.º 10
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='data/BSDS300_8x8.mat')
    parser.add_argument('--num_train', '-N', type=int, default=1000000)
    parser.add_argument('--num_valid', '-V', type=int, default=200000)
    parser.add_argument('--num_components', '-n', type=int, default=128)
    parser.add_argument('--num_scales', '-s', type=int, default=4)
    parser.add_argument('--num_features', '-f', type=int, default=48)
    parser.add_argument('--train_means', '-M', type=int, default=0)
    parser.add_argument('--indices', '-I', type=int, default=[], nargs='+')
    parser.add_argument('--initialize', '-i', type=str, default=None)
    parser.add_argument('--verbosity', '-v', type=int, default=1)
    parser.add_argument('--max_iter', '-m', type=int, default=2000)

    args = parser.parse_args(argv[1:])

    experiment = Experiment()

    data_train = loadmat(args.data)['patches_train']
    data_valid = loadmat(args.data)['patches_valid']

    if args.initialize:
        results = Experiment(args.initialize)
        models = results['models']
        preconditioners = results['preconditioners']
    else:
        models = [None] * data_train.shape[1]
        preconditioners = [None] * data_train.shape[1]

    def preprocess(data, i, N):
        if N > 0 and N < data.shape[0]:
            # select subset of data
            idx = random_select(N, data.shape[0])
            return data[idx, :i].T, data[idx, i][None, :]
        return data.T[:i], data.T[[i]]

    for i in range(data_train.shape[1]):
        if args.indices and i not in args.indices:
            # skip this one
            continue

        print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1])

        inputs_train, outputs_train = preprocess(data_train, i, args.num_train)
        inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid)

        if i > 0:
            if preconditioners[i] is None:
                preconditioners[i] = WhiteningPreconditioner(
                    inputs_train, outputs_train)

            inputs_train, outputs_train = preconditioners[i](inputs_train,
                                                             outputs_train)
            inputs_valid, outputs_valid = preconditioners[i](inputs_valid,
                                                             outputs_valid)

            if models[i] is None:
                models[i] = MCGSM(dim_in=i,
                                  dim_out=1,
                                  num_components=args.num_components,
                                  num_features=args.num_features,
                                  num_scales=args.num_scales)
            models[i].train(inputs_train,
                            outputs_train,
                            inputs_valid,
                            outputs_valid,
                            parameters={
                                'verbosity': 1,
                                'max_iter': args.max_iter,
                                'train_means': args.train_means > 0
                            })
        else:
            preconditioners[i] = None

            if models[i] is None:
                models[i] = MoGSM(dim=1, num_components=4, num_scales=8)
            models[i].train(outputs_train,
                            outputs_valid,
                            parameters={
                                'verbosity': 1,
                                'threshold': -1.,
                                'train_means': 1,
                                'max_iter': 100
                            })

        experiment['args'] = args
        experiment['models'] = models
        experiment['preconditioners'] = preconditioners
        experiment.save(
            'results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format(
                i, args.num_components))

    if not args.indices:
        experiment['args'] = args
        experiment['models'] = models
        experiment['preconditioners'] = preconditioners
        experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck')

    return 0
Ejemplo n.º 11
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--num_components', '-c', type=int, default=20)
	parser.add_argument('--max_epochs',     '-E', type=int, default=4)
	parser.add_argument('--max_iter_tr',    '-m', type=int, default=5,
		help='Number of steps in the inner loop of the trust-region method.')
	parser.add_argument('--output',         '-o', type=str, default='results/mnist/')

	args = parser.parse_args(argv[1:])

	# create directories if necessary
	if not os.path.exists(args.output):
		os.makedirs(args.output)

	experiment = Experiment()

	data = load('data/mnist.npz')['train']
	data = data[:, permutation(data.shape[1])]
	data = asarray(data, dtype=float) / 255.
	data = asarray(rand(*data.shape) < data, dtype=float, order='F')

	def callback(model):
		if model.num_updates * args.max_iter_tr % 25:
			return

		callback.num_updates.append(model.num_updates)
		callback.lower_bound.append(model.lower_bound(data))

		print callback.lower_bound[-1]

		p = []
		for k in range(len(model)):
			p.append(model[k].alpha / (model[k].alpha + model[k].beta))
		p = hstack(p)

		imsave(os.path.join(args.output, 'mnist.{0}.png').format(callback.counter),
			stitch(p.T.reshape(-1, 28, 28), num_rows=4), cmap='gray', vmin=0., vmax=1.)

		callback.counter += 1

	callback.counter = 0
	callback.num_updates = []
	callback.lower_bound = []

	os.system('rm -f {0}'.format(os.path.join(args.output, 'mnist.*.png')))

	try:
		model = MoBernoulli(dim=784, num_components=args.num_components)
		model.train(data,
			batch_size=200,
			max_epochs=args.max_epochs,
			max_iter_tr=args.max_iter_tr,
			tau=100.,
			callback=callback)
	except KeyboardInterrupt:
		pass

	experiment['args'] = args
	experiment['model'] = model
	experiment['num_updates'] = callback.num_updates
	experiment['lower_bound'] = callback.lower_bound
	experiment.save(os.path.join(args.output, 'mnist.{0}.{1}.{{0}}.{{1}}.xpck').format(
		args.num_components, args.max_iter_tr))

 	os.system('ffmpeg -r 25 -i {1} -vcodec mjpeg -sameq {0}'.format(
		os.path.join(
			args.output,
			'mnist.{0}.{1}.avi'.format(args.num_components, args.max_iter_tr)),
		os.path.join(args.output, 'mnist.%d.png')))

	return 0
Ejemplo n.º 12
0
def main(argv):
    seterr(over='raise', divide='raise', invalid='raise')

    try:
        if int(os.environ['OMP_NUM_THREADS']) > 1 or int(
                os.environ['MKL_NUM_THREADS']) > 1:
            print 'It seems that parallelization is turned on. This will skew the results. To turn it off:'
            print '\texport OMP_NUM_THREADS=1'
            print '\texport MKL_NUM_THREADS=1'
    except:
        print 'Parallelization of BLAS might be turned on. This could skew results.'

    experiment = Experiment(seed=42)

    if os.path.exists('results/toyexample/toyexample.xpck'):
        results = Experiment('results/toyexample/toyexample.xpck')
        ica = results['ica']
    else:
        # toy model
        ica = ISA(1, 3)
        ica.initialize(method='exponpow')
        ica.A = 1. + randn(1, 3) / 5.

        experiment['ica'] = ica
        experiment.save('results/toyexample/toyexample.xpck')

    # generate visible and corresponding hidden states
    Y = ica.sample_prior(NUM_SAMPLES)
    X = dot(ica.A, Y)

    # energy of posterior samples should be around this value
    energy = mean(ica.prior_energy(Y))

    for method in sampling_methods:
        # disable output and parallelization
        Distribution.VERBOSITY = 0
        mapp.max_processes = 1

        # measure time required by transition operator
        start = time()

        # initial hidden states
        Y = dot(pinv(ica.A), X)

        # increase number of steps to reduce overhead
        ica.sample_posterior(
            X,
            method=(method['method'],
                    dict(method['parameters'],
                         Y=Y,
                         num_steps=method['parameters']['num_steps'] *
                         NUM_STEPS_MULTIPLIER)))

        # time required per transition operator application
        duration = (time() - start) / NUM_STEPS_MULTIPLIER

        # enable output and parallelization
        Distribution.VERBOSITY = 2
        mapp.max_processes = 2

        energies = [mean(ica.prior_energy(Y))]

        # Markov chain
        for i in range(int(NUM_SECONDS / duration + 1.)):
            Y = ica.sample_posterior(X,
                                     method=(method['method'],
                                             dict(method['parameters'], Y=Y)))
            energies.append(mean(ica.prior_energy(Y)))

        plot(arange(len(energies)) * duration,
             energies,
             '-',
             color=method['color'],
             line_width=1.2,
             pgf_options=['forget plot'],
             comment=str(method['parameters']))

    plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2)

    xlabel('time in seconds')
    ylabel('average energy')
    title('toy example')

    gca().width = 7
    gca().height = 7
    gca().xmin = -1
    gca().xmax = NUM_SECONDS

    savefig('results/toyexample/toyexample_trace.tex')

    return 0
Ejemplo n.º 13
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--data',           '-d', type=str, default='data/BSDS300_8x8.mat')
	parser.add_argument('--nb_size',        '-b', type=int, default=5,
		help='Size of the causal neighborhood of pixels.')
	parser.add_argument('--num_train',      '-N', type=int, default=1000000)
	parser.add_argument('--num_valid',      '-V', type=int, default=200000)
	parser.add_argument('--num_hiddens',    '-n', type=int, default=64)
	parser.add_argument('--num_components', '-c', type=int, default=32)
	parser.add_argument('--num_scales',     '-s', type=int, default=4)
	parser.add_argument('--num_features',   '-f', type=int, default=32)
	parser.add_argument('--add_layer',      '-a', type=int,   default=[0], nargs='+')
	parser.add_argument('--learning_rate',  '-l', type=float, nargs='+', default=[.5, .1, .05, .01, .005, .001, 0.0005])
	parser.add_argument('--batch_size',     '-B', type=int, nargs='+', default=[50])
	parser.add_argument('--num_epochs',     '-e', type=int, default=[1], nargs='+')
	parser.add_argument('--finetune',       '-F', type=int, default=[1], nargs='+',
		help='Indicate iterations in which to finetune MCGSM with L-BFGS.')
	parser.add_argument('--precondition',   '-Q', type=int, default=1)
	parser.add_argument('--output',         '-o', type=str, default='results/BSDS300/')

	args = parser.parse_args(argv[1:])

	experiment = Experiment()

	print 'Loading data...'

	data_train = loadmat(args.data)['patches_train']
	data_valid = loadmat(args.data)['patches_valid']

	# reconstruct patches
	data_train = hstack([data_train, -sum(data_train, 1)[:, None]])
	data_valid = hstack([data_valid, -sum(data_valid, 1)[:, None]])
	patch_size = int(sqrt(data_train.shape[1]) + .5)
	data_train = data_train.reshape(-1, patch_size, patch_size)
	data_valid = data_valid.reshape(-1, patch_size, patch_size)

	print 'Creating model...'

	model = PatchRIDE(
		num_rows=8,
		num_cols=8,
		model_class=RIDE_BSDS300, # ensures the bottom-right pixel will be ignored
		nb_size=args.nb_size,
		num_hiddens=args.num_hiddens,
		num_components=args.num_components,
		num_scales=args.num_scales,
		num_features=args.num_features)

	print 'Evaluating...'

	loss = []
	loss_valid = []
	loss_valid.append(model.evaluate(data_valid))

	for i, learning_rate in enumerate(args.learning_rate):
		print 'Training...'

		if i < len(args.add_layer):
			for _ in range(args.add_layer[i]):
				# add spatial LSTM to the network
				model.add_layer()

		# randomize patch order
		data_train = data_train[permutation(data_train.shape[0])]

		# store current parameters
		model_copy = deepcopy(model)

		# train
		loss.append(
			model.train(data_train,
				learning_rate=learning_rate,
				precondition=args.precondition > 0,
				batch_size=args.batch_size[i % len(args.batch_size)],
				num_epochs=args.num_epochs[i % len(args.num_epochs)]))

		print 'Evaluating...'

		# evaluate model
		loss_valid.append(model.evaluate(data_valid))

		if loss_valid[-1] > loss_valid[-2]:
			# restore previous parameters
			model = model_copy

			print 'Performance got worse... Stopping optimization.'
			break

		# fine-tune
		if args.finetune[i % len(args.finetune)]:
			print 'Finetuning...'

			# store current parameters
			model_copy = deepcopy(model)

			model.finetune(data_train, num_samples_train=1000000, max_iter=500)

			print 'Evaluating...'

			loss_valid.append(model.evaluate(data_valid))

			if loss_valid[-1] > loss_valid[-2]:
				print 'Performance got worse... Restoring parameters.'

				model = model_copy
				loss_valid[-1] = loss_valid[-2]

		experiment['args'] = args
		experiment['loss'] = loss
		experiment['loss_valid'] = loss_valid
		experiment['model'] = model
		experiment.save(os.path.join(args.output, 'patchrim.{0}.{1}.xpck'))

	return 0
Ejemplo n.º 14
0
def main(argv):
    experiment = Experiment()

    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='data/vanhateren_deq2_train.mat')
    parser.add_argument('--num_data', '-N', type=int, default=1000000)
    parser.add_argument('--num_valid', '-V', type=int, default=200000)
    parser.add_argument('--input_size', '-i', type=int, default=9)
    parser.add_argument('--max_iter', '-I', type=int, default=3000)
    parser.add_argument('--num_components', '-c', type=int, default=128)
    parser.add_argument('--num_features', '-f', type=int, default=48)
    parser.add_argument('--num_scales', '-s', type=int, default=4)
    parser.add_argument('--verbosity', '-v', type=int, default=1)
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        default='results/vanhateren_deq2/mcgsm.{0}.{1}.xpck')

    args = parser.parse_args(argv[1:])

    ### DATA HANDLING

    if args.verbosity > 0:
        print 'Loading data...'

    # load data
    images = loadmat(args.data)['data']

    # define causal neighborhood
    input_mask, output_mask = generate_masks(input_size=args.input_size,
                                             output_size=1)

    # extract causal neighborhoods
    num_samples = int((args.num_data + args.num_valid) / images.shape[0] + .9)

    def extract(image):
        return generate_data_from_image(image, input_mask, output_mask,
                                        num_samples)

    inputs, outputs = zip(*mapp(extract, images))
    inputs, outputs = hstack(inputs), hstack(outputs)

    inputs_train = inputs[:, :args.num_data]
    outputs_train = outputs[:, :args.num_data]
    inputs_valid = inputs[:, args.num_data:]
    outputs_valid = outputs[:, args.num_data:]

    if inputs_valid.size < 100:
        print 'Not enough data for validation.'
        inputs_valid = None
        outputs_valid = None

    ### MODEL TRAINING

    if args.verbosity > 0:
        print 'Preconditioning...'

    preconditioner = WhiteningPreconditioner(inputs_train, outputs_train)

    inputs_train, outputs_train = preconditioner(inputs_train, outputs_train)
    if inputs_valid is not None:
        inputs_valid, outputs_valid = preconditioner(inputs_valid,
                                                     outputs_valid)

    # free memory
    del inputs
    del outputs

    if args.verbosity > 0:
        print 'Training model...'

    model = MCGSM(dim_in=inputs_train.shape[0],
                  dim_out=outputs_train.shape[0],
                  num_components=args.num_components,
                  num_features=args.num_features,
                  num_scales=args.num_scales)

    def callback(i, mcgsm):
        experiment['args'] = args
        experiment['model'] = mcgsm
        experiment['preconditioner'] = preconditioner
        experiment['input_mask'] = input_mask
        experiment['output_mask'] = output_mask
        experiment.save(args.output)

    model.train(inputs_train,
                outputs_train,
                inputs_valid,
                outputs_valid,
                parameters={
                    'verbosity': args.verbosity,
                    'cb_iter': 500,
                    'callback': callback,
                    'max_iter': args.max_iter
                })

    ### SAVE RESULTS

    experiment['args'] = args
    experiment['model'] = model
    experiment['preconditioner'] = preconditioner
    experiment['input_mask'] = input_mask
    experiment['output_mask'] = output_mask
    experiment.save(args.output)

    return 0
Ejemplo n.º 15
0
def main(argv):
	if len(argv) < 2:
		print 'Usage:', argv[0], '<param_id>'
		print
		print '  {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format(
			'ID', 'PS', 'NS', 'TI', 'DC')

		for id, params in enumerate(parameters):
			print '  {0:>3} {1:>7} {2:>5} {3:>5} {4:>5}'.format(id, *params)

		print
		print '  ID = parameter set'
		print '  PS = patch size'
		print '  NS = number of scales'
		print '  TI = number of training iterations'
		print '  DC = model DC component separately'

		return 0

	# start experiment
	experiment = Experiment(server='10.38.138.150')

	# hyperparameters
	patch_size, num_scales, max_iter, separate_dc = parameters[int(argv[1])]



	### DATA PREPROCESSING

	# load data, log-transform and center data
	data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data']
	data = data[:, :100000]
	data = preprocess(data)



	### MODEL DEFINITION AND TRAINING

	if separate_dc:
		# discrete cosine transform and symmetric whitening transform
		dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT')
		wt = WhiteningTransform(dct(data)[1:], symmetric=True)

		model = StackedModel(dct, ConcatModel(
			MoGaussian(20), 
			StackedModel(wt, GSM(data.shape[0] - 1, num_scales))))

	else:
		# symmetric whitening transform
		wt = WhiteningTransform(data, symmetric=True)
		model = StackedModel(wt, GSM(data.shape[0], num_scales))



	### MODEL TRAINING AND EVALUATION

	model.train(data, max_iter=max_iter, tol=1e-7)

	# load and preprocess test data
	data = load('data/vanhateren.{0}.0.npz'.format(patch_size))['data']
	data = preprocess(data, shuffle=False)

	# log-likelihod in [bit/pixel]
	logliks = model.loglikelihood(data) / log(2.) / data.shape[0]
	loglik = mean(logliks)
	sem = std(logliks, ddof=1) / sqrt(logliks.shape[1])

	print 'log-likelihood: {0:.4f} +- {1:.4f} [bit/pixel]'.format(loglik, sem)

	experiment['logliks'] = logliks
	experiment['loglik'] = loglik
	experiment['sem'] = sem
	experiment.save('results/vanhateren/gsm.{0}.{{0}}.{{1}}.xpck'.format(argv[1]))

	return 0
Ejemplo n.º 16
0
def main(argv):
	seterr(over='raise', divide='raise', invalid='raise')

	try:
		if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1:
			print 'It seems that parallelization is turned on. This will skew the results. To turn it off:'
			print '\texport OMP_NUM_THREADS=1'
			print '\texport MKL_NUM_THREADS=1'
	except:
		print 'Parallelization of BLAS might be turned on. This could skew results.'

	experiment = Experiment(seed=42)

	if os.path.exists('results/toyexample/toyexample.xpck'):
		results = Experiment('results/toyexample/toyexample.xpck')
		ica = results['ica']
	else:
		# toy model
		ica = ISA(1, 3)
		ica.initialize(method='exponpow')
		ica.A = 1. + randn(1, 3) / 5.

		experiment['ica'] = ica
		experiment.save('results/toyexample/toyexample.xpck')

	# generate visible and corresponding hidden states
	Y = ica.sample_prior(NUM_SAMPLES)
	X = dot(ica.A, Y)

	# energy of posterior samples should be around this value
	energy = mean(ica.prior_energy(Y))

	for method in sampling_methods:
		# disable output and parallelization
		Distribution.VERBOSITY = 0
		mapp.max_processes = 1

		# measure time required by transition operator
		start = time()

		# initial hidden states
		Y = dot(pinv(ica.A), X)

		# increase number of steps to reduce overhead
		ica.sample_posterior(X, method=(method['method'], 
			dict(method['parameters'], Y=Y, 
				num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER)))

		# time required per transition operator application
		duration = (time() - start) / NUM_STEPS_MULTIPLIER

		# enable output and parallelization
		Distribution.VERBOSITY = 2
		mapp.max_processes = 2

		energies = [mean(ica.prior_energy(Y))]

		# Markov chain
		for i in range(int(NUM_SECONDS / duration + 1.)):
			Y = ica.sample_posterior(X,
				method=(method['method'], dict(method['parameters'], Y=Y)))
			energies.append(mean(ica.prior_energy(Y)))

		plot(arange(len(energies)) * duration, energies, '-', color=method['color'],
			line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters']))
	
	plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2)

	xlabel('time in seconds')
	ylabel('average energy')
	title('toy example')

	gca().width = 7
	gca().height = 7
	gca().xmin = -1
	gca().xmax = NUM_SECONDS

	savefig('results/toyexample/toyexample_trace.tex')

	return 0
Ejemplo n.º 17
0
def main(argv):
	if len(argv) < 2:
		print 'Usage:', argv[0], '<param_id>', '[experiment]'
		print
		print '  {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format(
			'ID', 'PS', 'OC', 'TI', 'FI', 'LP', 'SC')

		for id, params in enumerate(parameters):
			print '  {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format(id, *params)

		print
		print '  ID = parameter set'
		print '  PS = patch size'
		print '  OC = overcompleteness'
		print '  TI = number of training iterations'
		print '  FI = number of fine-tuning iterations'
		print '  LP = optimize marginal distributions'
		print '  SC = initialize with sparse coding'

		return 0

	seterr(invalid='raise', over='raise', divide='raise')

	# start experiment
	experiment = Experiment()

	# hyperparameters
	patch_size, \
	overcompleteness, \
	max_iter, \
	max_iter_ft, \
	train_prior, \
	sparse_coding = parameters[int(argv[1])]


	
	### DATA PREPROCESSING

	# load data, log-transform and center data
	data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data']
	data = data[:, :100000]
	data = preprocess(data)

	# discrete cosine transform and whitening transform
	dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT')
	wt = WhiteningTransform(dct(data)[1:], symmetric=True)


	### MODEL DEFINITION

	isa = ISA(num_visibles=data.shape[0] - 1,
	          num_hiddens=data.shape[0] * overcompleteness - 1, ssize=1)

	# model DC component with a mixture of Gaussians
	model = StackedModel(dct,
		ConcatModel(MoGaussian(20), StackedModel(wt, isa)))



	### MODEL TRAINING

	# variables to store in results
	experiment['model'] = model
	experiment['parameters'] = parameters[int(argv[1])]



	def callback(phase, isa, iteration):
		"""
		Saves intermediate results every few iterations.
		"""

		if not iteration % 5:
			# whitened filters
			A = dot(dct.A[1:].T, isa.A)

			patch_size = int(sqrt(A.shape[0]) + 0.5)

			# save intermediate results
			experiment.save('results/vanhateren.{0}/results.{1}.{2}.xpck'.format(argv[1], phase, iteration))

			# visualize basis
			imsave('results/vanhateren.{0}/basis.{1}.{2:0>3}.png'.format(argv[1], phase, iteration),
				stitch(imformat(A.T.reshape(-1, patch_size, patch_size))))



	if len(argv) > 2:
		# initialize model with trained model
		results = Experiment(argv[2])
		model = results['model']

		isa = model.model[1].model
		dct = model.transforms[0]

		experiment['model'] = model

	else:
		# enable regularization of marginals
		for gsm in isa.subspaces:
			gsm.gamma = 1e-3
			gsm.alpha = 2.
			gsm.beta = 1.

		# train mixture of Gaussians on DC component
		model.train(data, 0, max_iter=100)

		# initialize filters and marginals
		model.initialize(data, 1)
		model.initialize(model=1, method='laplace')

		experiment.progress(10)

		if sparse_coding:
			# initialize with sparse coding
			if patch_size == '16x16':
				model.train(data, 1,
					method=('of', {
						'max_iter': max_iter,
						'noise_var': 0.05,
						'var_goal': 1.,
						'beta': 10.,
						'step_width': 0.01,
						'sigma': 0.3,
						}),
					callback=lambda isa, iteration: callback(0, isa, iteration))
			else:
				model.train(data, 1,
					method=('of', {
						'max_iter': max_iter,
						'noise_var': 0.1,
						'var_goal': 1.,
						'beta': 10.,
						'step_width': 0.01,
						'sigma': 0.5,
						}),
					callback=lambda isa, iteration: callback(0, isa, iteration))
			isa.orthogonalize()

		else:
			if patch_size == '16x16':
				# prevents out-of-memory
				mapp.max_processes = 1

			# train model using a subset of the data
			model.train(data[:, :20000], 1,
				max_iter=max_iter,
				train_prior=train_prior,
				persistent=True,
				init_sampling_steps=5,
				method=('sgd', {'momentum': 0.8}),
				callback=lambda isa, iteration: callback(0, isa, iteration),
				sampling_method=('gibbs', {'num_steps': 1}))

	experiment.progress(50)

	if patch_size == '16x16':
		# prevents out-of-memory
		mapp.max_processes = 1

	# disable regularization
	for gsm in isa.subspaces:
		gsm.gamma = 0.

	# fine-tune model using all the data
	model.train(data, 1,
		max_iter=max_iter_ft,
		train_prior=train_prior,
		train_subspaces=False,
		persistent=True,
		init_sampling_steps=10 if not len(argv) > 2 and (sparse_coding or not train_prior) else 50,
		method=('lbfgs', {'max_fun': 50}),
		callback=lambda isa, iteration: callback(1, isa, iteration),
		sampling_method=('gibbs', {'num_steps': 2}))

	experiment.save('results/vanhateren/vanhateren.{0}.{{0}}.{{1}}.xpck'.format(argv[1]))

	return 0
Ejemplo n.º 18
0
def main(argv):
	experiment = Experiment()

	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--data',           '-d', type=str, default='data/vanhateren_deq2_train.mat')
	parser.add_argument('--num_data',       '-N', type=int, default=1000000)
	parser.add_argument('--num_valid',      '-V', type=int, default=200000)
	parser.add_argument('--input_size',     '-i', type=int, default=9)
	parser.add_argument('--max_iter',       '-I', type=int, default=3000)
	parser.add_argument('--num_components', '-c', type=int, default=128)
	parser.add_argument('--num_features',   '-f', type=int, default=48)
	parser.add_argument('--num_scales',     '-s', type=int, default=4)
	parser.add_argument('--verbosity',      '-v', type=int, default=1)
	parser.add_argument('--output',         '-o', type=str, default='results/vanhateren_deq2/mcgsm.{0}.{1}.xpck')

	args = parser.parse_args(argv[1:])


	### DATA HANDLING

	if args.verbosity > 0:
		print 'Loading data...'

	# load data
	images = loadmat(args.data)['data']

	# define causal neighborhood
	input_mask, output_mask = generate_masks(input_size=args.input_size, output_size=1)

	# extract causal neighborhoods
	num_samples = int((args.num_data + args.num_valid) / images.shape[0] + .9)

	def extract(image):
		return generate_data_from_image(
			image, input_mask, output_mask, num_samples)

	inputs, outputs = zip(*mapp(extract, images))
	inputs, outputs = hstack(inputs), hstack(outputs)

	inputs_train = inputs[:, :args.num_data]
	outputs_train = outputs[:, :args.num_data]
	inputs_valid = inputs[:, args.num_data:]
	outputs_valid = outputs[:, args.num_data:]

	if inputs_valid.size < 100:
		print 'Not enough data for validation.'
		inputs_valid = None
		outputs_valid = None


	### MODEL TRAINING

	if args.verbosity > 0:
		print 'Preconditioning...'

	preconditioner = WhiteningPreconditioner(inputs_train, outputs_train)

	inputs_train, outputs_train = preconditioner(inputs_train, outputs_train)
	if inputs_valid is not None:
		inputs_valid, outputs_valid = preconditioner(inputs_valid, outputs_valid)

	# free memory
	del inputs
	del outputs

	if args.verbosity > 0:
		print 'Training model...'

	model = MCGSM(
		dim_in=inputs_train.shape[0],
		dim_out=outputs_train.shape[0],
		num_components=args.num_components,
		num_features=args.num_features,
		num_scales=args.num_scales)

	def callback(i, mcgsm):
		experiment['args'] = args
		experiment['model'] = mcgsm
		experiment['preconditioner'] = preconditioner
		experiment['input_mask'] = input_mask
		experiment['output_mask'] = output_mask
		experiment.save(args.output)

	model.train(
		inputs_train, outputs_train,
		inputs_valid, outputs_valid,
		parameters={
			'verbosity': args.verbosity,
			'cb_iter': 500,
			'callback': callback,
			'max_iter': args.max_iter})


	### SAVE RESULTS

	experiment['args'] = args
	experiment['model'] = model
	experiment['preconditioner'] = preconditioner
	experiment['input_mask'] = input_mask
	experiment['output_mask'] = output_mask
	experiment.save(args.output)

	return 0
Ejemplo n.º 19
0
def main(argv):
	seterr(over='raise', divide='raise', invalid='raise')

	try:
		if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1:
			print 'It seems that parallelization is turned on. This will skew the results. To turn it off:'
			print '\texport OMP_NUM_THREADS=1'
			print '\texport MKL_NUM_THREADS=1'
	except:
		print 'Parallelization of BLAS might be turned on. This could skew results.'

	experiment = Experiment(seed=42)

	if os.path.exists('results/toyexample/toyexample.xpck'):
		results = Experiment('results/toyexample/toyexample.xpck')
		ica = results['ica']
	else:
		# toy model
		ica = ISA(1, 3)
		ica.initialize(method='exponpow')
		ica.A = 1. + randn(1, 3) / 5.

		experiment['ica'] = ica
		experiment.save('results/toyexample/toyexample.xpck')

	Y_ = ica.sample_prior(NUM_AUTOCORR)
	X_ = dot(ica.A, Y_)

	for method in sampling_methods:
		# disable output and parallelization
		Distribution.VERBOSITY = 0
		mapp.max_processes = 1

		Y = ica.sample_prior(NUM_SAMPLES)
		X = dot(ica.A, Y)

		# measure time required by transition operator
		start = time()

		# increase number of steps to reduce overhead
		ica.sample_posterior(X, method=(method['method'], dict(method['parameters'],
			Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER)))

		# time required per transition operator application
		duration = (time() - start) / NUM_STEPS_MULTIPLIER

		# number of mcmc steps to run for this method
		num_mcmc_steps = int(NUM_SECONDS_RUN / duration + 1.)
		num_autocorr_steps = int(NUM_SECONDS_VIS / duration + 1.)

		# enable output and parallelization
		Distribution.VERBOSITY = 2
		mapp.max_processes = 2

		# posterior samples
		Y = [Y_]

		# Markov chain
		for i in range(num_mcmc_steps):
			Y.append(ica.sample_posterior(X_, 
				method=(method['method'], dict(method['parameters'], Y=Y[-1]))))

		ac = []

		for j in range(NUM_AUTOCORR):
			# collect samples belonging to one posterior distribution
			S = hstack([Y[k][:, [j]] for k in range(num_mcmc_steps)])

			# compute autocorrelation for j-th posterior
			ac = [autocorr(S, num_autocorr_steps)]

		# average and plot autocorrelation functions
		plot(arange(num_autocorr_steps) * duration, mean(ac, 0), '-', 
			color=method['color'],
			line_width=1.2,
			comment=str(method['parameters']))

	xlabel('time in seconds')
	ylabel('autocorrelation')
	title('toy example')

	gca().width = 7
	gca().height = 7
	gca().xmin = -1
	gca().xmax = NUM_SECONDS_VIS

	savefig('results/toyexample/toyexample_autocorr2.tex')

	return 0