Example #1
1
	def test_mogsm(self):
		mcgsm = MCGSM(
			dim_in=0,
			dim_out=3,
			num_components=2,
			num_scales=2,
			num_features=0)

		p0 = 0.3
		p1 = 0.7
		N = 20000
		m0 = array([[2], [0], [0]])
		m1 = array([[0], [2], [1]])
		C0 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2))
		C1 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2))
		input = zeros([0, N])
		output = hstack([
			dot(cholesky(C0), randn(mcgsm.dim_out, round(p0 * N))) + m0,
			dot(cholesky(C1), randn(mcgsm.dim_out, round(p1 * N))) + m1]) * (rand(1, N) + 0.5)

		mcgsm.train(input, output, parameters={
			'verbosity': 0,
			'max_iter': 10,
			'train_means': True})

		mogsm = MoGSM(3, 2, 2)

		# translate parameters from MCGSM to MoGSM
		mogsm.priors = sum(exp(mcgsm.priors), 1) / sum(exp(mcgsm.priors))

		for k in range(mogsm.num_components):
			mogsm[k].mean = mcgsm.means[:, k]
			mogsm[k].covariance = inv(dot(mcgsm.cholesky_factors[k], mcgsm.cholesky_factors[k].T))
			mogsm[k].scales = exp(mcgsm.scales[k, :])
			mogsm[k].priors = exp(mcgsm.priors[k, :]) / sum(exp(mcgsm.priors[k, :]))

		self.assertAlmostEqual(mcgsm.evaluate(input, output), mogsm.evaluate(output), 5)

		mogsm_samples = mogsm.sample(N)
		mcgsm_samples = mcgsm.sample(input)

		# generated samples should have the same distribution
		for i in range(mogsm.dim):
			self.assertTrue(ks_2samp(mogsm_samples[i], mcgsm_samples[0]) > 0.0001)
			self.assertTrue(ks_2samp(mogsm_samples[i], mcgsm_samples[1]) > 0.0001)
			self.assertTrue(ks_2samp(mogsm_samples[i], mcgsm_samples[2]) > 0.0001)

		posterior = mcgsm.posterior(input, mcgsm_samples)

		# average posterior should correspond to prior
		for k in range(mogsm.num_components):
			self.assertLess(abs(1 - mean(posterior[k]) / mogsm.priors[k]), 0.1)
Example #2
0
	def test_pickle(self):
		models = [
			Mixture(dim=5),
			MoGSM(dim=3, num_components=4, num_scales=7)]

		for _ in range(3):
			models[0].add_component(GSM(models[0].dim, 7))

		for model0 in models:
			tmp_file = mkstemp()[1]

			# store model
			with open(tmp_file, 'w') as handle:
				dump({'model': model0}, handle)

			# load model
			with open(tmp_file) as handle:
				model1 = load(handle)['model']

			# make sure parameters haven't changed
			self.assertEqual(model0.dim, model1.dim)
			self.assertEqual(model0.num_components, model1.num_components)

			for k in range(model0.num_components):
				self.assertLess(max(abs(model0[k].scales - model0[k].scales)), 1e-10)
				self.assertLess(max(abs(model0[k].priors - model1[k].priors)), 1e-10)
				self.assertLess(max(abs(model0[k].mean - model1[k].mean)), 1e-10)
				self.assertLess(max(abs(model0[k].covariance - model1[k].covariance)), 1e-10)
Example #3
0
	def test_basics(self):
		model = MoGSM(1, 4, 1)

		model.priors = arange(model.num_components) + 1.
		model.priors = model.priors / sum(model.priors)

		for k in range(model.num_components):
			model[k].mean = [[k]]
			model[k].scales = [[1000.]]

		n = 1000
		samples = asarray(model.sample(n) + .5, dtype=int)

		for k in range(model.num_components):
			p = model.priors.ravel()[k]
			x = sum(samples == k)
			c = binom.cdf(x, n, p)
			self.assertGreater(c, 1e-5)
			self.assertGreater(1. - c, 1e-5)
Example #4
0
def main(argv):
	parser = ArgumentParser(argv[0], description=__doc__)
	parser.add_argument('--data_train', '-d', type=str, default='data/BSDS300_train.mat')
	parser.add_argument('--data_test', '-t', type=str, default='data/BSDS300_test.mat')
	parser.add_argument('--patch_size', '-p', type=int, default=8)

	args = parser.parse_args(argv[1:])

	A = eye(args.patch_size) - 1. / args.patch_size**2
	A[-1] = 1. / args.patch_size**2

	logjacobian = slogdet(A)[1]

	data_train = loadmat(args.data_train)['data']
	data_test = loadmat(args.data_test)['data']

	dc_train = dc_component(data_train, args.patch_size)
	dc_test = dc_component(data_test, args.patch_size)

	h_train, bins = histogram(dc_train, 60, density=True)
	h_test, bins = histogram(dc_test, bins, density=False)

	model = MoGSM(dim=1, num_components=16, num_scales=4)
	model.train(dc_train, parameters={'max_iter': 100})

	figure(sans_serif=True)
	t = linspace(0, 1, 100)
	hist(dc_train.ravel(), 100, density=True)
	plot(t, exp(model.loglikelihood(t[None]).ravel()), 'k', line_width=2)
	axis(width=5, height=5)
	savefig('dc_fit.tex')

	loglik = mean(model.loglikelihood(dc_test))

	print 'Add these two numbers to your results:'
	print 'Log-likelihood (MoGSM): {0:.4f} [nat]'.format(loglik)
	print 'Log-likelihood (histogram): {0:.4f} [nat]'.format(sum(h_test * log(h_train)) / sum(h_test))
	print 'Log-Jacobian: {0:.4f} [nat]'.format(logjacobian)

	return 0
Example #5
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--data_train',
                        '-d',
                        type=str,
                        default='data/BSDS300_train.mat')
    parser.add_argument('--data_test',
                        '-t',
                        type=str,
                        default='data/BSDS300_test.mat')
    parser.add_argument('--patch_size', '-p', type=int, default=8)

    args = parser.parse_args(argv[1:])

    A = eye(args.patch_size) - 1. / args.patch_size**2
    A[-1] = 1. / args.patch_size**2

    logjacobian = slogdet(A)[1]

    data_train = loadmat(args.data_train)['data']
    data_test = loadmat(args.data_test)['data']

    dc_train = dc_component(data_train, args.patch_size)
    dc_test = dc_component(data_test, args.patch_size)

    h_train, bins = histogram(dc_train, 60, density=True)
    h_test, bins = histogram(dc_test, bins, density=False)

    model = MoGSM(dim=1, num_components=16, num_scales=4)
    model.train(dc_train, parameters={'max_iter': 100})

    figure(sans_serif=True)
    t = linspace(0, 1, 100)
    hist(dc_train.ravel(), 100, density=True)
    plot(t, exp(model.loglikelihood(t[None]).ravel()), 'k', line_width=2)
    axis(width=5, height=5)
    savefig('dc_fit.tex')

    loglik = mean(model.loglikelihood(dc_test))

    print 'Add these two numbers to your results:'
    print 'Log-likelihood (MoGSM): {0:.4f} [nat]'.format(loglik)
    print 'Log-likelihood (histogram): {0:.4f} [nat]'.format(
        sum(h_test * log(h_train)) / sum(h_test))
    print 'Log-Jacobian: {0:.4f} [nat]'.format(logjacobian)

    return 0
Example #6
0
    def test_mogsm(self):
        mcgsm = MCGSM(dim_in=0,
                      dim_out=3,
                      num_components=2,
                      num_scales=2,
                      num_features=0)

        p0 = 0.3
        p1 = 0.7
        N = 20000
        m0 = array([[2], [0], [0]])
        m1 = array([[0], [2], [1]])
        C0 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2))
        C1 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2))
        input = zeros([0, N])
        output = hstack([
            dot(cholesky(C0), randn(mcgsm.dim_out, round(p0 * N))) + m0,
            dot(cholesky(C1), randn(mcgsm.dim_out, round(p1 * N))) + m1
        ]) * (rand(1, N) + 0.5)

        mcgsm.train(input,
                    output,
                    parameters={
                        'verbosity': 0,
                        'max_iter': 10,
                        'train_means': True
                    })

        mogsm = MoGSM(3, 2, 2)

        # translate parameters from MCGSM to MoGSM
        mogsm.priors = sum(exp(mcgsm.priors), 1) / sum(exp(mcgsm.priors))

        for k in range(mogsm.num_components):
            mogsm[k].mean = mcgsm.means[:, k]
            mogsm[k].covariance = inv(
                dot(mcgsm.cholesky_factors[k], mcgsm.cholesky_factors[k].T))
            mogsm[k].scales = exp(mcgsm.scales[k, :])
            mogsm[k].priors = exp(mcgsm.priors[k, :]) / sum(
                exp(mcgsm.priors[k, :]))

        self.assertAlmostEqual(mcgsm.evaluate(input, output),
                               mogsm.evaluate(output), 5)

        mogsm_samples = mogsm.sample(N)
        mcgsm_samples = mcgsm.sample(input)

        # generated samples should have the same distribution
        for i in range(mogsm.dim):
            self.assertTrue(
                ks_2samp(mogsm_samples[i], mcgsm_samples[0]) > 0.0001)
            self.assertTrue(
                ks_2samp(mogsm_samples[i], mcgsm_samples[1]) > 0.0001)
            self.assertTrue(
                ks_2samp(mogsm_samples[i], mcgsm_samples[2]) > 0.0001)

        posterior = mcgsm.posterior(input, mcgsm_samples)

        # average posterior should correspond to prior
        for k in range(mogsm.num_components):
            self.assertLess(abs(1 - mean(posterior[k]) / mogsm.priors[k]), 0.1)
Example #7
0
def main(argv):
    parser = ArgumentParser(argv[0], description=__doc__)
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='data/BSDS300_8x8.mat')
    parser.add_argument('--num_train', '-N', type=int, default=1000000)
    parser.add_argument('--num_valid', '-V', type=int, default=200000)
    parser.add_argument('--num_components', '-n', type=int, default=128)
    parser.add_argument('--num_scales', '-s', type=int, default=4)
    parser.add_argument('--num_features', '-f', type=int, default=48)
    parser.add_argument('--train_means', '-M', type=int, default=0)
    parser.add_argument('--indices', '-I', type=int, default=[], nargs='+')
    parser.add_argument('--initialize', '-i', type=str, default=None)
    parser.add_argument('--verbosity', '-v', type=int, default=1)
    parser.add_argument('--max_iter', '-m', type=int, default=2000)

    args = parser.parse_args(argv[1:])

    experiment = Experiment()

    data_train = loadmat(args.data)['patches_train']
    data_valid = loadmat(args.data)['patches_valid']

    if args.initialize:
        results = Experiment(args.initialize)
        models = results['models']
        preconditioners = results['preconditioners']
    else:
        models = [None] * data_train.shape[1]
        preconditioners = [None] * data_train.shape[1]

    def preprocess(data, i, N):
        if N > 0 and N < data.shape[0]:
            # select subset of data
            idx = random_select(N, data.shape[0])
            return data[idx, :i].T, data[idx, i][None, :]
        return data.T[:i], data.T[[i]]

    for i in range(data_train.shape[1]):
        if args.indices and i not in args.indices:
            # skip this one
            continue

        print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1])

        inputs_train, outputs_train = preprocess(data_train, i, args.num_train)
        inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid)

        if i > 0:
            if preconditioners[i] is None:
                preconditioners[i] = WhiteningPreconditioner(
                    inputs_train, outputs_train)

            inputs_train, outputs_train = preconditioners[i](inputs_train,
                                                             outputs_train)
            inputs_valid, outputs_valid = preconditioners[i](inputs_valid,
                                                             outputs_valid)

            if models[i] is None:
                models[i] = MCGSM(dim_in=i,
                                  dim_out=1,
                                  num_components=args.num_components,
                                  num_features=args.num_features,
                                  num_scales=args.num_scales)
            models[i].train(inputs_train,
                            outputs_train,
                            inputs_valid,
                            outputs_valid,
                            parameters={
                                'verbosity': 1,
                                'max_iter': args.max_iter,
                                'train_means': args.train_means > 0
                            })
        else:
            preconditioners[i] = None

            if models[i] is None:
                models[i] = MoGSM(dim=1, num_components=4, num_scales=8)
            models[i].train(outputs_train,
                            outputs_valid,
                            parameters={
                                'verbosity': 1,
                                'threshold': -1.,
                                'train_means': 1,
                                'max_iter': 100
                            })

        experiment['args'] = args
        experiment['models'] = models
        experiment['preconditioners'] = preconditioners
        experiment.save(
            'results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format(
                i, args.num_components))

    if not args.indices:
        experiment['args'] = args
        experiment['models'] = models
        experiment['preconditioners'] = preconditioners
        experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck')

    return 0