def test_patchmcgsm_stationary(self): xmask = ones([2, 2], dtype='bool') ymask = zeros([2, 2], dtype='bool') xmask[-1, -1] = False ymask[-1, -1] = True model = PatchMCGSM(3, 3, xmask, ymask, model=MCGSM(sum(xmask), 1, 2, 2)) data = randn(4, 10000) model.initialize(data) model.train(data, parameters={ 'verbosity': 0, 'max_iter': 10, 'stationary': True, 'treshold': 1e-4 }) self.assertTrue(all(model[0, 2].predictors == model[0, 1].predictors)) self.assertFalse(all(model[1, 0].predictors == model[0, 1].predictors)) self.assertTrue(all(model[1, 2].weights == model[1, 1].weights)) self.assertTrue(all(model[1, 2].features == model[1, 1].features)) self.assertTrue(all(model[1, 2].scales == model[1, 1].scales)) self.assertTrue(all(model[1, 2].priors == model[1, 1].priors)) xmask, ymask = generate_masks(3) model = PatchMCGSM(3, 3, xmask, ymask, model=MCGSM(sum(xmask), 1, 2, 2)) data = randn(4, 10000) model.initialize(data) model.train(data, parameters={ 'verbosity': 0, 'max_iter': 10, 'stationary': True, 'treshold': 1e-4 }) self.assertTrue(all(model[0, 2].weights == model[0, 1].weights)) self.assertTrue(all(model[2, 0].features == model[1, 0].features)) self.assertTrue(all(model[2, 2].scales == model[1, 2].scales))
def test_patchmcgsm(self): xmask = ones([8, 8], dtype='bool') ymask = zeros([8, 8], dtype='bool') xmask[-1, -1] = False ymask[-1, -1] = True model = PatchMCGSM(8, 8, xmask, ymask, model=MCGSM(sum(xmask), 1)) self.assertLess(max(abs(model.input_mask() - xmask)), 1e-8) self.assertLess(max(abs(model.output_mask() - ymask)), 1e-8) for i in range(8): for j in range(8): self.assertEqual(model[i, j].dim_in, (i + 1) * (j + 1) - 1) self.assertTrue(isinstance(model[i, j], MCGSM)) # random pixel ordering rows, cols = 7, 5 order = [(i // cols, i % cols) for i in permutation(rows * cols)] model = PatchMCGSM(rows, cols, xmask, ymask, order, MCGSM(sum(xmask), 1)) self.assertLess(max(abs(model.input_mask() - xmask)), 1e-8) self.assertLess(max(abs(model.output_mask() - ymask)), 1e-8) for i in range(rows): for j in range(cols): self.assertEqual( model.input_mask(i, j).sum(), model[i, j].dim_in) # test constructors model0 = PatchMCGSM(rows, cols, max_pcs=3) model1 = PatchMCGSM(rows, cols, model0.input_mask(), model0.output_mask(), model0.order) self.assertLess(max(abs(model0.input_mask() - model1.input_mask())), 1e-8) self.assertLess(max(abs(model0.output_mask() - model1.output_mask())), 1e-8) self.assertLess( max(abs(asarray(model0.order) - asarray(model1.order))), 1e-8) # test computation of input masks model = PatchMCGSM(rows, cols, order, max_pcs=3) i, j = model0.order[0] input_mask = model.input_mask(i, j) for i, j in model.order[1:]: self.assertEqual(sum(model.input_mask(i, j) - input_mask), 1) input_mask = model.input_mask(i, j)
def test_conditional_loglikelihood(self): mcgsm = MCGSM(3, 1, 2, 1, 4) mcgsm.linear_features = randn(mcgsm.num_components, mcgsm.dim_in) / 5. mcgsm.means = randn(mcgsm.dim_out, mcgsm.num_components) / 5. M = 100 inputs = randn(mcgsm.dim_in, M) outputs = mcgsm.sample(inputs) loglik0 = mcgsm.loglikelihood(inputs, outputs) loglik1 = [] N = 1000 # estimate log-likelihood via sampling for _ in range(N): labels = mcgsm.sample_prior(inputs) loglik1.append(mcgsm.loglikelihood(inputs, outputs, labels)) loglik1 = vstack(loglik1) d = abs(logmeanexp(loglik1, 0) - loglik0).ravel() s = std(loglik1, 0, ddof=1).ravel() for i in range(M): self.assertLess(d[i], 6. * s[i] / sqrt(N))
def test_fill_in_image(self): xmask = asarray([[1, 1, 1], [1, 0, 0], [0, 0, 0]], dtype='bool') ymask = asarray([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype='bool') fmask = rand(10, 10) > .9 fmask[0] = False fmask[:, 0] = False fmask[-1] = False fmask[:, -1] = False img = randn(10, 10) model = MCGSM(4, 1) # this should raise an exception self.assertRaises(TypeError, fill_in_image, (img, model, xmask, ymask, fmask, 10.)) # this should raise no exception wt = WhiteningPreconditioner(randn(4, 1000), randn(1, 1000)) fill_in_image_map(img, model, xmask, ymask, fmask, wt, num_iter=1, patch_size=20)
def train_model(img, input_mask, output_mask): # generate data inputs, outputs = generate_data_from_image(img, input_mask, output_mask, 120000) # split data into training and validation sets data_train = inputs[:, :100000], outputs[:, :100000] data_valid = inputs[:, 100000:], outputs[:, 100000:] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM(dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=30) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) return model, pre
def test_train(self): mcgsm = MCGSM(8, 3, 4, 2, 20) priors = mcgsm.priors scales = mcgsm.scales weights = mcgsm.weights features = mcgsm.features predictor = mcgsm.predictors[0] mcgsm.train(randn(mcgsm.dim_in, 20000), randn(mcgsm.dim_out, 20000), parameters={ 'verbosity': 0, 'max_iter': 0, }) # this should raise errors self.assertRaises(RuntimeError, mcgsm.train, randn(mcgsm.dim_in - 1, 2000), randn(1, 2000)) self.assertRaises(RuntimeError, mcgsm.train, randn(mcgsm.dim_in - 1, 2000), randn(2000)) self.assertRaises(RuntimeError, mcgsm.train, randn(mcgsm.dim_in - 1, 2000), randn(mcgsm.dim_out, 2000), randn(mcgsm.dim_in - 1, 1000), randn(mcgsm.dim_out, 1000)) # parameters should not have changed self.assertLess(max(abs(mcgsm.priors - priors)), 1e-20) self.assertLess(max(abs(mcgsm.scales - scales)), 1e-20) self.assertLess(max(abs(mcgsm.weights - weights)), 1e-20) self.assertLess(max(abs(mcgsm.features - features)), 1e-20) self.assertLess(max(abs(mcgsm.predictors[0] - predictor)), 1e-20) count = [] def callback(i, mcgsm): count.append(i) return max_iter = 10 cb_iter = 2 # make sure training doesn't throw any errors mcgsm.train(randn(mcgsm.dim_in, 10000), randn(mcgsm.dim_out, 10000), parameters={ 'verbosity': 0, 'max_iter': max_iter, 'threshold': 0., 'batch_size': 1999, 'callback': callback, 'cb_iter': cb_iter, }) # test callback self.assertTrue(range(cb_iter, max_iter + 1, cb_iter) == count)
def test_basics(self): dim_in = 10 dim_out = 3 num_components = 7 num_scales = 5 num_features = 50 num_samples = 100 # create model mcgsm = MCGSM(dim_in, dim_out, num_components, num_scales, num_features) # generate output input = randn(dim_in, num_samples) output = mcgsm.sample(input) loglik = mcgsm.loglikelihood(input, output) post = mcgsm.posterior(input, output) samples = mcgsm.sample_posterior(input, output) # check hyperparameters self.assertEqual(mcgsm.dim_in, dim_in) self.assertEqual(mcgsm.dim_out, dim_out) self.assertEqual(mcgsm.num_components, num_components) self.assertEqual(mcgsm.num_scales, num_scales) self.assertEqual(mcgsm.num_features, num_features) # check parameters self.assertEqual(mcgsm.priors.shape[0], num_components) self.assertEqual(mcgsm.priors.shape[1], num_scales) self.assertEqual(mcgsm.scales.shape[0], num_components) self.assertEqual(mcgsm.scales.shape[1], num_scales) self.assertEqual(mcgsm.weights.shape[0], num_components) self.assertEqual(mcgsm.weights.shape[1], num_features) self.assertEqual(mcgsm.features.shape[0], dim_in) self.assertEqual(mcgsm.features.shape[1], num_features) self.assertEqual(len(mcgsm.cholesky_factors), num_components) self.assertEqual(len(mcgsm.predictors), num_components) self.assertEqual(mcgsm.cholesky_factors[0].shape[0], dim_out) self.assertEqual(mcgsm.cholesky_factors[0].shape[1], dim_out) self.assertEqual(mcgsm.predictors[0].shape[0], dim_out) self.assertEqual(mcgsm.predictors[0].shape[1], dim_in) self.assertEqual(mcgsm.linear_features.shape[0], num_components) self.assertEqual(mcgsm.linear_features.shape[1], dim_in) self.assertEqual(mcgsm.means.shape[0], dim_out) self.assertEqual(mcgsm.means.shape[1], num_components) # check dimensionality of output self.assertEqual(output.shape[0], dim_out) self.assertEqual(output.shape[1], num_samples) self.assertEqual(loglik.shape[0], 1) self.assertEqual(loglik.shape[1], num_samples) self.assertEqual(post.shape[0], num_components) self.assertEqual(post.shape[1], num_samples) self.assertLess(max(samples), mcgsm.num_components) self.assertGreaterEqual(min(samples), 0) self.assertEqual(samples.shape[0], 1) self.assertEqual(samples.shape[1], num_samples)
def test_sample(self): mcgsm = MCGSM(1, 1, 1, 1, 1) mcgsm.scales = [[0.]] mcgsm.predictors = [[0.]] samples = mcgsm.sample(zeros([1, 10000])).flatten() p = kstest(samples, lambda x: norm.cdf(x, scale=1.))[1] # make sure Gaussian random number generation works self.assertTrue(p > 0.0001)
def test_evaluate(self): mcgsm = MCGSM(5, 3, 4, 2, 10) inputs = randn(mcgsm.dim_in, 100) outputs = mcgsm.sample(inputs) pre = WhiteningPreconditioner(inputs, outputs) loglik1 = -mcgsm.evaluate(inputs, outputs, pre) loglik2 = ( mcgsm.loglikelihood(*pre(inputs, outputs)).mean() + pre.logjacobian(inputs, outputs).mean()) / log(2.) / mcgsm.dim_out self.assertAlmostEqual(loglik1, loglik2, 8)
def add_layer(self): """ Add another spatial LSTM to the network and reinitialize MCGSM. """ self.num_layers += 1 # reinitialize MCGSM self.mcgsm = MCGSM(dim_in=self.num_hiddens, dim_out=self.num_channels, num_components=self.mcgsm.num_components, num_scales=self.mcgsm.num_scales, num_features=self.mcgsm.num_features) # add slot for another layer self.slstm.append(None)
def robust_linear_regression(x, y, num_scales=3, max_iter=1000): """ Performs linear regression with Gaussian scale mixture residuals. $$y = ax + b + \\varepsilon,$$ where $\\varepsilon$ is assumed to be Gaussian scale mixture distributed. @type x: array_like @param x: list of one-dimensional inputs @type y: array_like @param y: list of one-dimensional outputs @type num_scales: int @param num_scales: number of Gaussian scale mixture components @type max_iter: int @param max_iter: number of optimization steps in parameter search @rtype: tuple @return: slope and y-intercept """ x = asarray(x).reshape(1, -1) y = asarray(y).reshape(1, -1) # preprocess inputs m = mean(x) s = std(x) x = (x - m) / s # preprocess outputs using simple linear regression C = cov(x, y) a = C[0, 1] / C[0, 0] b = mean(y) - a * mean(x) y = y - (a * x + b) # robust linear regression model = MCGSM(dim_in=1, dim_out=1, num_components=1, num_scales=num_scales, num_features=0) model.initialize(x, y) model.train(x, y, parameters={'train_means': True, 'max_iter': max_iter}) a = (a + float(model.predictors[0])) / s b = (b + float(model.means)) - a * m return a, b
def test_sample_video(self): xmask = dstack([ asarray([[1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype='bool'), asarray([[1, 1, 1], [1, 0, 0], [0, 0, 0]], dtype='bool') ]) ymask = dstack([ asarray([[0, 0, 0], [0, 0, 0], [0, 0, 0]], dtype='bool'), asarray([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype='bool') ]) model = MCGSM(13, 1) video_init = randn(64, 64, 5) video_sample = sample_video(video_init, model, xmask, ymask) # the first frame should be untouched self.assertLess(max(abs(video_init[:, :, 0] - video_sample[:, :, 0])), 1e-10)
def test_sample_conditionally(self): mcgsm = MCGSM(3, 2, 2, 2, 4) # make sure there are differences between components mcgsm.weights = -log(rand(*mcgsm.weights.shape)) * 10. mcgsm.scales = square(mcgsm.scales * 3.) inputs = randn(mcgsm.dim_in, 100000) # sample directly outputs0 = mcgsm.sample(inputs) # sample indirectly labels = mcgsm.sample_prior(inputs) outputs1 = mcgsm.sample(inputs, labels) p = ks_2samp(outputs0.ravel(), outputs1.ravel())[1] self.assertGreater(p, 1e-5)
def test_data_gradient(self): for dim_in in [5, 0]: mcgsm = MCGSM(dim_in, 3, 4, 5, 10) cholesky_factors = [] for k in range(mcgsm.num_components): cholesky_factors.append( cholesky(cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)))) mcgsm.cholesky_factors = cholesky_factors inputs = randn(mcgsm.dim_in, 100) outputs = ones_like(mcgsm.sample(inputs)) # compute density gradient and loglikelihood dx, dy, ll = mcgsm._data_gradient(inputs, outputs) self.assertLess( max(abs(ll - mcgsm.loglikelihood(inputs, outputs))), 1e-8) h = 1e-5 dx_ = zeros_like(dx) dy_ = zeros_like(dy) for i in range(mcgsm.dim_in): inputs_p = inputs.copy() inputs_m = inputs.copy() inputs_p[i] += h inputs_m[i] -= h dx_[i] = (mcgsm.loglikelihood(inputs_p, outputs) - mcgsm.loglikelihood(inputs_m, outputs)) / (2. * h) for i in range(mcgsm.dim_out): outputs_p = outputs.copy() outputs_m = outputs.copy() outputs_p[i] += h outputs_m[i] -= h dy_[i] = (mcgsm.loglikelihood(inputs, outputs_p) - mcgsm.loglikelihood(inputs, outputs_m)) / (2. * h) self.assertLess(max(abs(dy_ - dy)), 1e-8) if mcgsm.dim_in > 0: self.assertLess(max(abs(dx_ - dx)), 1e-8)
def __init__(self, num_channels=1, num_hiddens=10, num_components=4, num_scales=4, num_features=16, num_layers=1, nb_size=3, nonlinearity='TanH', verbosity=1, extended=False, input_mask=None, output_mask=None): self.verbosity = verbosity self.num_channels = num_channels self.num_hiddens = num_hiddens self.num_layers = num_layers self.nonlinearity = nonlinearity self.extended = extended self.input_mask, self.output_mask = generate_masks([nb_size] * num_channels) if input_mask: self.input_mask = input_mask if output_mask: self.output_mask = output_mask self.num_channels = sum(self.output_mask) self.slstm = [None] * num_layers self.mcgsm = MCGSM(dim_in=num_hiddens, dim_out=num_channels, num_components=num_components, num_scales=num_scales, num_features=num_features) self.preconditioner = None # see PatchRIDE self._indicators = False
def test_sample_image(self): xmask = asarray([[1, 1], [1, 0]], dtype='bool') ymask = asarray([[0, 0], [0, 1]], dtype='bool') img_init = asarray([[1., 2.], [3., 4.]]) model = MCGSM(3, 1) img_sample = sample_image(img_init, model, xmask, ymask) # only the bottom right-pixel should have been replaced self.assertLess(max(abs((img_init - img_sample).ravel()[:3])), 1e-10) # test using preconditioner wt = WhiteningPreconditioner(randn(3, 1000), randn(1, 1000)) sample_image(img_init, model, xmask, ymask, wt) # test what happens if invalid preconditioner is given self.assertRaises(TypeError, sample_image, (img_init, model, xmask, ymask, 10.)) self.assertRaises(TypeError, sample_image, (img_init, model, xmask, ymask, model))
def test_patchmcgsm_train(self): xmask = ones([2, 2], dtype='bool') ymask = zeros([2, 2], dtype='bool') xmask[-1, -1] = False ymask[-1, -1] = True model = PatchMCGSM(2, 2, xmask, ymask, model=MCGSM(sum(xmask), 1, 1, 1)) data = randn(4, 10000) model.initialize(data) converged = model.train(data, parameters={ 'verbosity': 0, 'max_iter': 200, 'treshold': 1e-4 }) self.assertTrue(converged)
def test_pickle(self): mcgsm0 = MCGSM(11, 2, 4, 7, 21) mcgsm0.linear_features = randn(mcgsm0.num_components, mcgsm0.dim_in) mcgsm0.means = randn(mcgsm0.dim_out, mcgsm0.num_components) tmp_file = mkstemp()[1] # store model with open(tmp_file, 'w') as handle: dump({'mcgsm': mcgsm0}, handle) # load model with open(tmp_file) as handle: mcgsm1 = load(handle)['mcgsm'] # make sure parameters haven't changed self.assertEqual(mcgsm0.dim_in, mcgsm1.dim_in) self.assertEqual(mcgsm0.dim_out, mcgsm1.dim_out) self.assertEqual(mcgsm0.num_components, mcgsm1.num_components) self.assertEqual(mcgsm0.num_scales, mcgsm1.num_scales) self.assertEqual(mcgsm0.num_features, mcgsm1.num_features) self.assertLess(max(abs(mcgsm0.scales - mcgsm1.scales)), 1e-20) self.assertLess(max(abs(mcgsm0.weights - mcgsm1.weights)), 1e-20) self.assertLess(max(abs(mcgsm0.features - mcgsm1.features)), 1e-20) self.assertLess( max(abs(mcgsm0.linear_features - mcgsm1.linear_features)), 1e-20) self.assertLess(max(abs(mcgsm0.means - mcgsm1.means)), 1e-20) for chol0, chol1 in zip(mcgsm0.cholesky_factors, mcgsm1.cholesky_factors): self.assertLess(max(abs(chol0 - chol1)), 1e-20) for pred0, pred1 in zip(mcgsm0.predictors, mcgsm1.predictors): self.assertLess(max(abs(pred0 - pred1)), 1e-20)
def main(argv): # load image and turn into grayscale img = rgb2gray(imread('media/newyork.png')) # generate data inputs, outputs = generate_data_from_image(img, input_mask, output_mask, 220000) # split data into training, test, and validation sets inputs = split(inputs, [100000, 200000], 1) outputs = split(outputs, [100000, 200000], 1) data_train = inputs[0], outputs[0] data_test = inputs[1], outputs[1] data_valid = inputs[2], outputs[2] # compute normalizing transformation pre = WhiteningPreconditioner(*data_train) # intialize model model = MCGSM(dim_in=data_train[0].shape[0], dim_out=data_train[1].shape[0], num_components=8, num_scales=4, num_features=32) # fit parameters model.initialize(*pre(*data_train)) model.train(*chain(pre(*data_train), pre(*data_valid)), parameters={ 'verbosity': 1, 'max_iter': 1000, 'threshold': 1e-7, 'val_iter': 5, 'val_look_ahead': 10, 'num_grad': 20, }) # evaluate model print 'Average log-likelihood: {0:.4f} [bit/px]'.format( -model.evaluate(data_test[0], data_test[1], pre)) # synthesize a new image img_sample = sample_image(img, model, input_mask, output_mask, pre) imwrite('newyork_sample.png', img_sample, cmap='gray', vmin=min(img), vmax=max(img)) # save model with open('image_model.pck', 'wb') as handle: dump( { 'model': model, 'input_mask': input_mask, 'output_mask': output_mask }, handle, 1) return 0
def main(argv): experiment = Experiment() parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/vanhateren_deq2_train.mat') parser.add_argument('--num_data', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--input_size', '-i', type=int, default=9) parser.add_argument('--max_iter', '-I', type=int, default=3000) parser.add_argument('--num_components', '-c', type=int, default=128) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--output', '-o', type=str, default='results/vanhateren_deq2/mcgsm.{0}.{1}.xpck') args = parser.parse_args(argv[1:]) ### DATA HANDLING if args.verbosity > 0: print 'Loading data...' # load data images = loadmat(args.data)['data'] # define causal neighborhood input_mask, output_mask = generate_masks(input_size=args.input_size, output_size=1) # extract causal neighborhoods num_samples = int((args.num_data + args.num_valid) / images.shape[0] + .9) def extract(image): return generate_data_from_image(image, input_mask, output_mask, num_samples) inputs, outputs = zip(*mapp(extract, images)) inputs, outputs = hstack(inputs), hstack(outputs) inputs_train = inputs[:, :args.num_data] outputs_train = outputs[:, :args.num_data] inputs_valid = inputs[:, args.num_data:] outputs_valid = outputs[:, args.num_data:] if inputs_valid.size < 100: print 'Not enough data for validation.' inputs_valid = None outputs_valid = None ### MODEL TRAINING if args.verbosity > 0: print 'Preconditioning...' preconditioner = WhiteningPreconditioner(inputs_train, outputs_train) inputs_train, outputs_train = preconditioner(inputs_train, outputs_train) if inputs_valid is not None: inputs_valid, outputs_valid = preconditioner(inputs_valid, outputs_valid) # free memory del inputs del outputs if args.verbosity > 0: print 'Training model...' model = MCGSM(dim_in=inputs_train.shape[0], dim_out=outputs_train.shape[0], num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) def callback(i, mcgsm): experiment['args'] = args experiment['model'] = mcgsm experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) model.train(inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': args.verbosity, 'cb_iter': 500, 'callback': callback, 'max_iter': args.max_iter }) ### SAVE RESULTS experiment['args'] = args experiment['model'] = model experiment['preconditioner'] = preconditioner experiment['input_mask'] = input_mask experiment['output_mask'] = output_mask experiment.save(args.output) return 0
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/BSDS300_8x8.mat') parser.add_argument('--num_train', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--num_components', '-n', type=int, default=128) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--train_means', '-M', type=int, default=0) parser.add_argument('--indices', '-I', type=int, default=[], nargs='+') parser.add_argument('--initialize', '-i', type=str, default=None) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--max_iter', '-m', type=int, default=2000) args = parser.parse_args(argv[1:]) experiment = Experiment() data_train = loadmat(args.data)['patches_train'] data_valid = loadmat(args.data)['patches_valid'] if args.initialize: results = Experiment(args.initialize) models = results['models'] preconditioners = results['preconditioners'] else: models = [None] * data_train.shape[1] preconditioners = [None] * data_train.shape[1] def preprocess(data, i, N): if N > 0 and N < data.shape[0]: # select subset of data idx = random_select(N, data.shape[0]) return data[idx, :i].T, data[idx, i][None, :] return data.T[:i], data.T[[i]] for i in range(data_train.shape[1]): if args.indices and i not in args.indices: # skip this one continue print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1]) inputs_train, outputs_train = preprocess(data_train, i, args.num_train) inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid) if i > 0: if preconditioners[i] is None: preconditioners[i] = WhiteningPreconditioner( inputs_train, outputs_train) inputs_train, outputs_train = preconditioners[i](inputs_train, outputs_train) inputs_valid, outputs_valid = preconditioners[i](inputs_valid, outputs_valid) if models[i] is None: models[i] = MCGSM(dim_in=i, dim_out=1, num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) models[i].train(inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': 1, 'max_iter': args.max_iter, 'train_means': args.train_means > 0 }) else: preconditioners[i] = None if models[i] is None: models[i] = MoGSM(dim=1, num_components=4, num_scales=8) models[i].train(outputs_train, outputs_valid, parameters={ 'verbosity': 1, 'threshold': -1., 'train_means': 1, 'max_iter': 100 }) experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save( 'results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format( i, args.num_components)) if not args.indices: experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck') return 0
def test_gradient(self): mcgsm = MCGSM(5, 2, 2, 4, 10) cholesky_factors = [] for k in range(mcgsm.num_components): cholesky_factors.append( cholesky(cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)))) mcgsm.cholesky_factors = cholesky_factors mcgsm.linear_features = randn(mcgsm.num_components, mcgsm.dim_in) / 5. mcgsm.means = randn(mcgsm.dim_out, mcgsm.num_components) / 5. err = mcgsm._check_gradient(randn(mcgsm.dim_in, 1000), randn(mcgsm.dim_out, 1000), 1e-5) self.assertLess(err, 1e-8) # without regularization for param in [ 'priors', 'scales', 'weights', 'features', 'chol', 'pred', 'linear_features', 'means' ]: err = mcgsm._check_gradient( randn(mcgsm.dim_in, 1000), randn(mcgsm.dim_out, 1000), 1e-5, parameters={ 'train_prior': param == 'priors', 'train_scales': param == 'scales', 'train_weights': param == 'weights', 'train_features': param == 'features', 'train_cholesky_factors': param == 'chol', 'train_predictors': param == 'pred', 'train_linear_features': param == 'linear_features', 'train_means': param == 'means', }) self.assertLess(err, 1e-8) # with regularization for norm in ['L1', 'L2']: for param in [ 'priors', 'scales', 'weights', 'features', 'chol', 'pred', 'linear_features', 'means' ]: err = mcgsm._check_gradient( randn(mcgsm.dim_in, 1000), randn(mcgsm.dim_out, 1000), 1e-7, parameters={ 'train_prior': param == 'priors', 'train_scales': param == 'scales', 'train_weights': param == 'weights', 'train_features': param == 'features', 'train_cholesky_factors': param == 'chol', 'train_predictors': param == 'pred', 'train_linear_features': param == 'linear_features', 'train_means': param == 'means', 'regularize_features': { 'strength': 0.4, 'norm': norm }, 'regularize_predictors': { 'strength': 0.5, 'norm': norm }, 'regularize_weights': { 'strength': 0.7, 'norm': norm }, 'regularize_linear_features': { 'strength': 0.3, 'norm': norm }, 'regularize_means': { 'strength': 0.6, 'norm': norm }, }) self.assertLess(err, 1e-6)
parser.add_argument('--repetitions', '-r', type=int, default=2) args = parser.parse_args(sys.argv[1:]) ### print socket.gethostname() print datetime.now() print args print ### data = randn(args.dim_in, args.num_data), randn(args.dim_out, args.num_data) model = MCGSM(dim_in=args.dim_in, dim_out=args.dim_out, num_components=12, num_features=40, num_scales=6) ### print 'model.loglikelihood' t = time() for r in range(args.repetitions): model.loglikelihood(*data) print '{0:12.8f} seconds'.format((time() - t) / float(args.repetitions)) print ### print 'model._check_performance' for batch_size in [1000, 2000, 5000]: t = model._check_performance(*data,
def test_mogsm(self): mcgsm = MCGSM(dim_in=0, dim_out=3, num_components=2, num_scales=2, num_features=0) p0 = 0.3 p1 = 0.7 N = 20000 m0 = array([[2], [0], [0]]) m1 = array([[0], [2], [1]]) C0 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)) C1 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)) input = zeros([0, N]) output = hstack([ dot(cholesky(C0), randn(mcgsm.dim_out, round(p0 * N))) + m0, dot(cholesky(C1), randn(mcgsm.dim_out, round(p1 * N))) + m1 ]) * (rand(1, N) + 0.5) mcgsm.train(input, output, parameters={ 'verbosity': 0, 'max_iter': 10, 'train_means': True }) mogsm = MoGSM(3, 2, 2) # translate parameters from MCGSM to MoGSM mogsm.priors = sum(exp(mcgsm.priors), 1) / sum(exp(mcgsm.priors)) for k in range(mogsm.num_components): mogsm[k].mean = mcgsm.means[:, k] mogsm[k].covariance = inv( dot(mcgsm.cholesky_factors[k], mcgsm.cholesky_factors[k].T)) mogsm[k].scales = exp(mcgsm.scales[k, :]) mogsm[k].priors = exp(mcgsm.priors[k, :]) / sum( exp(mcgsm.priors[k, :])) self.assertAlmostEqual(mcgsm.evaluate(input, output), mogsm.evaluate(output), 5) mogsm_samples = mogsm.sample(N) mcgsm_samples = mcgsm.sample(input) # generated samples should have the same distribution for i in range(mogsm.dim): self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[0]) > 0.0001) self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[1]) > 0.0001) self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[2]) > 0.0001) posterior = mcgsm.posterior(input, mcgsm_samples) # average posterior should correspond to prior for k in range(mogsm.num_components): self.assertLess(abs(1 - mean(posterior[k]) / mogsm.priors[k]), 0.1)
def __init__(self, num_channels=1, num_hiddens=10, num_components=8, num_scales=4, num_features=16, num_layers=1, nb_size=5, nonlinearity='TanH', verbosity=1, extended=False, input_mask=None, output_mask=None): """ @type num_channels: C{int} @param num_channels: dimensionality of each pixel @type num_hiddens: C{int} @param num_hiddens: number of LSTM units in each spatial LSTM layer @type num_components: C{int} @param num_components: number of mixture components used by the MCGSM @type num_scales: C{int} @param num_scales: number of scales used by the MCGSM @type num_features: C{int} @param num_features: number of quadratic features used by the MCGSM @type num_layers: C{int} @param num_layers: number of layers of spatial LSTM units @type nb_size: C{int} @param nb_size: controls the neighborhood of pixels read from an image @type nonlinearity: C{str} @param nonlinearity: nonlinearity used by spatial LSTM (e.g., TanH, ReLU) @type verbosity: C{int} @param verbosity: controls how much information is printed during training, etc. @type extended: C{bool} @param extended: use previous memory states as additional inputs to LSTM (more parameters) @type input_mask C{ndarray} @param input_mask: Boolean mask used to define custom input neighborhood of pixels @type output_mask C{ndarray} @param output_mask: determines the position of the output pixel relative to the neighborhood """ self.verbosity = verbosity self.num_channels = num_channels self.num_hiddens = num_hiddens self.num_layers = num_layers self.nonlinearity = nonlinearity self.extended = extended self.input_mask, self.output_mask = generate_masks([nb_size] * num_channels) if input_mask is not None: self.input_mask = input_mask if output_mask is not None: self.output_mask = output_mask self.num_channels = sum(self.output_mask) self.slstm = [None] * num_layers self.mcgsm = MCGSM(dim_in=self.num_hiddens, dim_out=self.num_channels, num_components=num_components, num_scales=num_scales, num_features=num_features) self.preconditioner = None