def test_evaluate(self): isa1 = ISA(2) isa1.A = eye(2) for gsm in isa1.subspaces: gsm.scales[:] = 1. # equivalent overcomplete model isa2 = ISA(2, 4) isa2.A[:, :2] = isa1.A / sqrt(2.) isa2.A[:, 2:] = isa1.A / sqrt(2.) for gsm in isa2.subspaces: gsm.scales[:] = 1. data = isa1.sample(100) # the results should not depend on the parameters ll1 = isa1.evaluate(data) ll2 = isa2.evaluate(data, num_samples=2, sampling_method=('ais', { 'num_steps': 2 })) self.assertTrue(abs(ll1 - ll2) < 1e-5)
def test_train_subspaces(self): isa = ISA(4, 4, 2) isa.initialize(method='laplace') samples = isa.sample_prior(10000) isa = ISA(4, 4, 1) isa.initialize(method='laplace') isa.train_subspaces(samples, max_merge=5) isa.train_subspaces(samples, max_merge=5) self.assertTrue(len(isa.subspaces) == 2)
def main(argv): seterr(over='raise', divide='raise', invalid='raise') # OICA with Student's t-distribution marginals ica = ISA(1, 2, ssize=1, num_scales=20) ica.A[:] = [0.7, 1.1] # fit marginals to exponential power distribution ica.initialize(method='exponpow') # prior landscape xmin, xmax = -35, 35 s = meshgrid(linspace(xmin, xmax, IMG_SIZE), linspace(xmin, xmax, IMG_SIZE)) S = vstack([s[0].flatten(), s[1].flatten()]) E = ica.prior_energy(S).reshape(*s[0].shape)[::-1] # nullspace W = pinv(ica.A) V = pinv(ica.nullspace_basis()) x = -8.#18. s_fr = (W * x + V * Z_FROM).flatten() s_to = (W * x + V * Z_TO).flatten() # sample nullspace Z = ica.sample_nullspace(zeros([1, NUM_SAMPLES]) + x, method=('gibbs', {'num_steps': MCMC_STEPS})).flatten() figure() imshow(-E, cmap='shadows', dpi=DPI, vmin=-7.0, vmax=-2.0, limits=[xmin, xmax, xmin, xmax]) plot([s_fr[0], s_to[0]], [s_fr[1], s_to[1]], line_width=3., color='cyan') arrow(0, 0, W[0, 0] * x, W[1, 0] * x, line_width=1.5) text(5.3, 5., '$A^+$') axis('origin') xtick([]) ytick([]) xlabel('$s_1$') ylabel('$s_2$') savefig('results/prior.tex') draw() figure() h = hist(Z, NUM_BINS, density=True, color='cyan', opacity=0.8, line_width=0.) h.const_plot = False axis('origin') axis([Z_FROM, Z_TO, 0., 0.14]) xlabel('$z$') ylabel('$p(z \mid x)$') xtick([]) ytick([]) savefig('results/nullspace.tex') draw() return 0
def test_compute_map(self): isa = ISA(2, 4) X = isa.sample(100) M = isa.compute_map(X, tol=1E-4, maxiter=1000) Y = isa.sample_posterior(X) self.assertTrue(all(isa.prior_energy(M) <= isa.prior_energy(Y)))
def test_prior_energy(self): step_size = 1E-5 model = ISA(3, 7, 1) for gsm in model.subspaces: gsm.initialize('student') # samples and true gradient X = model.sample_prior(100) G = model.prior_energy_gradient(X) # numerical gradient N = zeros(G.shape) for i in range(N.shape[0]): d = zeros(X.shape) d[i] = step_size N[i] = (model.prior_energy(X + d) - model.prior_energy(X - d)) / (2. * step_size) # test consistency of energy and gradient self.assertTrue(all(abs(G - N) < 1E-5))
def test_logjacobian(self): isa = ISA(4, 4, 2) # standard normal distribution gauss = GSM(4, 1) gauss.scales[0] = 1. # generate test data samples = isa.sample(100) sg = SubspaceGaussianization(isa) # after Gaussianization, samples should be Gaussian distributed loglik_isa = isa.loglikelihood(samples) loglik_gauss = gauss.loglikelihood( sg(samples)) + sg.logjacobian(samples) dist = abs(loglik_isa - loglik_gauss) self.assertTrue(all(dist < 1E-6)) ### # test ICA isa = ISA(3, 3, 1) # standard normal distribution gauss = GSM(3, 1) gauss.scales[0] = 1. # generate test data samples = isa.sample(100) sg = SubspaceGaussianization(isa) # after Gaussianization, samples should be Gaussian distributed loglik_isa = isa.loglikelihood(samples) loglik_gauss = gauss.loglikelihood( sg(samples)) + sg.logjacobian(samples) dist = abs(loglik_isa - loglik_gauss) self.assertTrue(all(dist < 1E-6))
def test_evaluate(self): isa1 = ISA(2) isa1.A = eye(2) for gsm in isa1.subspaces: gsm.scales[:] = 1. # equivalent overcomplete model isa2 = ISA(2, 4) isa2.A[:, :2] = isa1.A / sqrt(2.) isa2.A[:, 2:] = isa1.A / sqrt(2.) for gsm in isa2.subspaces: gsm.scales[:] = 1. data = isa1.sample(100) # the results should not depend on the parameters ll1 = isa1.evaluate(data) ll2 = isa2.evaluate(data, num_samples=2, sampling_method=('ais', {'num_steps': 2})) self.assertTrue(abs(ll1 - ll2) < 1e-5)
def test_inverse(self): """ Make sure inverse Gaussianization is inverse to Gaussianization. """ # complete model isa = ISA(20, 20, 2) # generate sample data samples = isa.sample(100) sg = SubspaceGaussianization(isa) # apply what should be the identity samples_rec = sg.inverse(sg(samples)) # distance between samples and reconstructed samples dist = sqrt(sum(square(samples - samples_rec), 0)) self.assertTrue(all(dist < 1E-6)) ### # overcomplete model isa = ISA(3, 6, 3) # generate sample data samples = isa.sample(100) samples = vstack([samples, isa.sample_nullspace(samples)]) sg = SubspaceGaussianization(isa) # apply what should be the identity samples_rec = sg.inverse(sg(samples)) # distance between samples and reconstructed samples dist = sqrt(sum(square(samples - samples_rec), 0)) self.assertTrue(all(dist < 1E-6))
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int( os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') # generate visible and corresponding hidden states Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # energy of posterior samples should be around this value energy = mean(ica.prior_energy(Y)) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior( X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2) xlabel('time in seconds') ylabel('average energy') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/toyexample/toyexample_trace.tex') return 0
def test_train(self): isa = ISA(2, 2) data = isa.sample_prior(1000) # make sure training doesn't throw any exceptions isa.train_sgd(data, max_iter=1) isa.train_lbfgs(data, max_fun=1) isa = ISA(2, 2, noise=True) data = isa.sample_prior(1000) # make sure training doesn't throw any exeptions isa.train_sgd(data, max_iter=1, weight_decay=0.01) isa.train_analytic(data, max_iter=1, weight_decay=0.01) isa.train_lbfgs(data, max_fun=1, weight_decay=0.01)
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') # generate visible and corresponding hidden states Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # energy of posterior samples should be around this value energy = mean(ica.prior_energy(Y)) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 # measure time required by transition operator start = time() # initial hidden states Y = dot(pinv(ica.A), X) # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 energies = [mean(ica.prior_energy(Y))] # Markov chain for i in range(int(NUM_SECONDS / duration + 1.)): Y = ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y))) energies.append(mean(ica.prior_energy(Y))) plot(arange(len(energies)) * duration, energies, '-', color=method['color'], line_width=1.2, pgf_options=['forget plot'], comment=str(method['parameters'])) plot([-2, NUM_SECONDS + 2], energy, 'k--', line_width=1.2) xlabel('time in seconds') ylabel('average energy') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS savefig('results/toyexample/toyexample_trace.tex') return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') try: if int(os.environ['OMP_NUM_THREADS']) > 1 or int(os.environ['MKL_NUM_THREADS']) > 1: print 'It seems that parallelization is turned on. This will skew the results. To turn it off:' print '\texport OMP_NUM_THREADS=1' print '\texport MKL_NUM_THREADS=1' except: print 'Parallelization of BLAS might be turned on. This could skew results.' experiment = Experiment(seed=42) if os.path.exists('results/toyexample/toyexample.xpck'): results = Experiment('results/toyexample/toyexample.xpck') ica = results['ica'] else: # toy model ica = ISA(1, 3) ica.initialize(method='exponpow') ica.A = 1. + randn(1, 3) / 5. experiment['ica'] = ica experiment.save('results/toyexample/toyexample.xpck') Y_ = ica.sample_prior(NUM_AUTOCORR) X_ = dot(ica.A, Y_) for method in sampling_methods: # disable output and parallelization Distribution.VERBOSITY = 0 mapp.max_processes = 1 Y = ica.sample_prior(NUM_SAMPLES) X = dot(ica.A, Y) # measure time required by transition operator start = time() # increase number of steps to reduce overhead ica.sample_posterior(X, method=(method['method'], dict(method['parameters'], Y=Y, num_steps=method['parameters']['num_steps'] * NUM_STEPS_MULTIPLIER))) # time required per transition operator application duration = (time() - start) / NUM_STEPS_MULTIPLIER # number of mcmc steps to run for this method num_mcmc_steps = int(NUM_SECONDS_RUN / duration + 1.) num_autocorr_steps = int(NUM_SECONDS_VIS / duration + 1.) # enable output and parallelization Distribution.VERBOSITY = 2 mapp.max_processes = 2 # posterior samples Y = [Y_] # Markov chain for i in range(num_mcmc_steps): Y.append(ica.sample_posterior(X_, method=(method['method'], dict(method['parameters'], Y=Y[-1])))) ac = [] for j in range(NUM_AUTOCORR): # collect samples belonging to one posterior distribution S = hstack([Y[k][:, [j]] for k in range(num_mcmc_steps)]) # compute autocorrelation for j-th posterior ac = [autocorr(S, num_autocorr_steps)] # average and plot autocorrelation functions plot(arange(num_autocorr_steps) * duration, mean(ac, 0), '-', color=method['color'], line_width=1.2, comment=str(method['parameters'])) xlabel('time in seconds') ylabel('autocorrelation') title('toy example') gca().width = 7 gca().height = 7 gca().xmin = -1 gca().xmax = NUM_SECONDS_VIS savefig('results/toyexample/toyexample_autocorr2.tex') return 0
def main(argv): seterr(over='raise', divide='raise', invalid='raise') # OICA with Student's t-distribution marginals ica = ISA(1, 2, ssize=1, num_scales=20) ica.A[:] = [0.7, 1.1] # fit marginals to exponential power distribution ica.initialize(method='exponpow') # prior landscape xmin, xmax = -35, 35 s = meshgrid(linspace(xmin, xmax, IMG_SIZE), linspace(xmin, xmax, IMG_SIZE)) S = vstack([s[0].flatten(), s[1].flatten()]) E = ica.prior_energy(S).reshape(*s[0].shape)[::-1] # nullspace W = pinv(ica.A) V = pinv(ica.nullspace_basis()) x = -8. #18. s_fr = (W * x + V * Z_FROM).flatten() s_to = (W * x + V * Z_TO).flatten() # sample nullspace Z = ica.sample_nullspace(zeros([1, NUM_SAMPLES]) + x, method=('gibbs', { 'num_steps': MCMC_STEPS })).flatten() figure() imshow(-E, cmap='shadows', dpi=DPI, vmin=-7.0, vmax=-2.0, limits=[xmin, xmax, xmin, xmax]) plot([s_fr[0], s_to[0]], [s_fr[1], s_to[1]], line_width=3., color='cyan') arrow(0, 0, W[0, 0] * x, W[1, 0] * x, line_width=1.5) text(5.3, 5., '$A^+$') axis('origin') xtick([]) ytick([]) xlabel('$s_1$') ylabel('$s_2$') savefig('results/prior.tex') draw() figure() h = hist(Z, NUM_BINS, density=True, color='cyan', opacity=0.8, line_width=0.) h.const_plot = False axis('origin') axis([Z_FROM, Z_TO, 0., 0.14]) xlabel('$z$') ylabel('$p(z \mid x)$') xtick([]) ytick([]) savefig('results/nullspace.tex') draw() return 0
def main(argv): if len(argv) < 2: print 'Usage:', argv[0], '<param_id>', '[experiment]' print print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format( 'ID', 'PS', 'OC', 'TI', 'FI', 'LP', 'SC') for id, params in enumerate(parameters): print ' {0:>3} {1:>7} {2:>5} {3:>5} {4:>5} {5:>5} {6:>5}'.format(id, *params) print print ' ID = parameter set' print ' PS = patch size' print ' OC = overcompleteness' print ' TI = number of training iterations' print ' FI = number of fine-tuning iterations' print ' LP = optimize marginal distributions' print ' SC = initialize with sparse coding' return 0 seterr(invalid='raise', over='raise', divide='raise') # start experiment experiment = Experiment() # hyperparameters patch_size, \ overcompleteness, \ max_iter, \ max_iter_ft, \ train_prior, \ sparse_coding = parameters[int(argv[1])] ### DATA PREPROCESSING # load data, log-transform and center data data = load('data/vanhateren.{0}.1.npz'.format(patch_size))['data'] data = data[:, :100000] data = preprocess(data) # discrete cosine transform and whitening transform dct = LinearTransform(dim=int(sqrt(data.shape[0])), basis='DCT') wt = WhiteningTransform(dct(data)[1:], symmetric=True) ### MODEL DEFINITION isa = ISA(num_visibles=data.shape[0] - 1, num_hiddens=data.shape[0] * overcompleteness - 1, ssize=1) # model DC component with a mixture of Gaussians model = StackedModel(dct, ConcatModel(MoGaussian(20), StackedModel(wt, isa))) ### MODEL TRAINING # variables to store in results experiment['model'] = model experiment['parameters'] = parameters[int(argv[1])] def callback(phase, isa, iteration): """ Saves intermediate results every few iterations. """ if not iteration % 5: # whitened filters A = dot(dct.A[1:].T, isa.A) patch_size = int(sqrt(A.shape[0]) + 0.5) # save intermediate results experiment.save('results/vanhateren.{0}/results.{1}.{2}.xpck'.format(argv[1], phase, iteration)) # visualize basis imsave('results/vanhateren.{0}/basis.{1}.{2:0>3}.png'.format(argv[1], phase, iteration), stitch(imformat(A.T.reshape(-1, patch_size, patch_size)))) if len(argv) > 2: # initialize model with trained model results = Experiment(argv[2]) model = results['model'] isa = model.model[1].model dct = model.transforms[0] experiment['model'] = model else: # enable regularization of marginals for gsm in isa.subspaces: gsm.gamma = 1e-3 gsm.alpha = 2. gsm.beta = 1. # train mixture of Gaussians on DC component model.train(data, 0, max_iter=100) # initialize filters and marginals model.initialize(data, 1) model.initialize(model=1, method='laplace') experiment.progress(10) if sparse_coding: # initialize with sparse coding if patch_size == '16x16': model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.05, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.3, }), callback=lambda isa, iteration: callback(0, isa, iteration)) else: model.train(data, 1, method=('of', { 'max_iter': max_iter, 'noise_var': 0.1, 'var_goal': 1., 'beta': 10., 'step_width': 0.01, 'sigma': 0.5, }), callback=lambda isa, iteration: callback(0, isa, iteration)) isa.orthogonalize() else: if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # train model using a subset of the data model.train(data[:, :20000], 1, max_iter=max_iter, train_prior=train_prior, persistent=True, init_sampling_steps=5, method=('sgd', {'momentum': 0.8}), callback=lambda isa, iteration: callback(0, isa, iteration), sampling_method=('gibbs', {'num_steps': 1})) experiment.progress(50) if patch_size == '16x16': # prevents out-of-memory mapp.max_processes = 1 # disable regularization for gsm in isa.subspaces: gsm.gamma = 0. # fine-tune model using all the data model.train(data, 1, max_iter=max_iter_ft, train_prior=train_prior, train_subspaces=False, persistent=True, init_sampling_steps=10 if not len(argv) > 2 and (sparse_coding or not train_prior) else 50, method=('lbfgs', {'max_fun': 50}), callback=lambda isa, iteration: callback(1, isa, iteration), sampling_method=('gibbs', {'num_steps': 2})) experiment.save('results/vanhateren/vanhateren.{0}.{{0}}.{{1}}.xpck'.format(argv[1])) return 0