def test_1(self): kernel=GaussianKernel(sigma=10) X=reshape(arange(9.0), (3,3)) K_chol, I, R, W=incomplete_cholesky(X, kernel, eta=0.8, power=2) K=kernel.kernel(X) self.assertEqual(len(I), 2) self.assertEqual(I[0], 0) self.assertEqual(I[1], 2) self.assertEqual(shape(K_chol), (len(I), len(I))) for i in range(len(I)): self.assertEqual(K_chol[i,i], K[I[i], I[i]]) self.assertEqual(shape(R), (len(I), len(X))) self.assertAlmostEqual(R[0,0], 1.000000000000000) self.assertAlmostEqual(R[0,1], 0.763379494336853) self.assertAlmostEqual(R[0,2], 0.339595525644939) self.assertAlmostEqual(R[1,0], 0) self.assertAlmostEqual(R[1,1], 0.535992421608228) self.assertAlmostEqual(R[1,2], 0.940571570355992) self.assertEqual(shape(W), (len(I), len(X))) self.assertAlmostEqual(W[0,0], 1.000000000000000) self.assertAlmostEqual(W[0,1], 0.569858199525808) self.assertAlmostEqual(W[0,2], 0) self.assertAlmostEqual(W[1,0], 0) self.assertAlmostEqual(W[1,1], 0.569858199525808) self.assertAlmostEqual(W[1,2], 1)
def log_pdf(self, thetas): assert(len(shape(thetas)) == 2) assert(shape(thetas)[1] == self.dimension) result=zeros(len(thetas)) for i in range(len(thetas)): labels=BinaryLabels(self.y) feats_train=RealFeatures(self.X.T) # ARD: set set theta, which is in log-scale, as kernel weights kernel=GaussianARDKernel(10,1) kernel.set_weights(exp(thetas[i])) mean=ZeroMean() likelihood=LogitLikelihood() inference=LaplacianInferenceMethod(kernel, feats_train, mean, labels, likelihood) # fix kernel scaling for now inference.set_scale(exp(0)) if self.ridge is not None: log_ml_estimate=inference.get_marginal_likelihood_estimate(self.n_importance, self.ridge) else: log_ml_estimate=inference.get_marginal_likelihood_estimate(self.n_importance) # prior is also in log-domain, so no exp of theta log_prior=self.prior.log_pdf(thetas[i].reshape(1,len(thetas[i]))) result[i]=log_ml_estimate+log_prior return result
def test_2(self): kernel=GaussianKernel(sigma=2) X=reshape(arange(9.0), (3,3)) K_chol, I, R, W=incomplete_cholesky(X, kernel, eta=0.999) K=kernel.kernel(X) self.assertEqual(len(I), 2) self.assertEqual(I[0], 0) self.assertEqual(I[1], 2) self.assertEqual(shape(K_chol), (len(I), len(I))) for i in range(len(I)): self.assertEqual(K_chol[i,i], K[I[i], I[i]]) self.assertEqual(shape(R), (len(I), len(X))) self.assertAlmostEqual(R[0,0], 1.000000000000000) self.assertAlmostEqual(R[0,1], 0.034218118311666) self.assertAlmostEqual(R[0,2], 0.000001370959086) self.assertAlmostEqual(R[1,0], 0) self.assertAlmostEqual(R[1,1], 0.034218071400058) self.assertAlmostEqual(R[1,2], 0.999999999999060) self.assertEqual(shape(W), (len(I), len(X))) self.assertAlmostEqual(W[0,0], 1.000000000000000) self.assertAlmostEqual(W[0,1], 0.034218071400090) self.assertAlmostEqual(W[0,2], 0) self.assertAlmostEqual(W[1,0], 0) self.assertAlmostEqual(W[1,1], 0.034218071400090) self.assertAlmostEqual(W[1,2], 1)
def log_pdf(self, thetas): assert (len(shape(thetas)) == 2) assert (shape(thetas)[1] == self.dimension) result = zeros(len(thetas)) for i in range(len(thetas)): labels = BinaryLabels(self.y) feats_train = RealFeatures(self.X.T) # ARD: set set theta, which is in log-scale, as kernel weights kernel = GaussianARDKernel(10, 1) kernel.set_weights(exp(thetas[i])) mean = ZeroMean() likelihood = LogitLikelihood() inference = LaplacianInferenceMethod(kernel, feats_train, mean, labels, likelihood) # fix kernel scaling for now inference.set_scale(exp(0)) if self.ridge is not None: log_ml_estimate = inference.get_marginal_likelihood_estimate( self.n_importance, self.ridge) else: log_ml_estimate = inference.get_marginal_likelihood_estimate( self.n_importance) # prior is also in log-domain, so no exp of theta log_prior = self.prior.log_pdf(thetas[i].reshape( 1, len(thetas[i]))) result[i] = log_ml_estimate + log_prior return result
def __init__(self, mu=asarray([0, 0]), Sigma=eye(2), is_cholesky=False): DensityFunction.__init__(self, len(Sigma)) assert(len(shape(mu)) == 1) assert(max(shape(Sigma)) == len(mu)) self.mu = mu if is_cholesky: self.L = Sigma else: assert(shape(Sigma)[0] == shape(Sigma)[1]) self.L = cholesky(Sigma)
def test_3(self): kernel=GaussianKernel(sigma=10) X=randn(3000,10) K_chol, I, R, W=incomplete_cholesky(X, kernel, eta=0.001) K=kernel.kernel(X) self.assertEqual(shape(K_chol), (len(I), (len(I)))) self.assertEqual(shape(R), (len(I), (len(X)))) self.assertEqual(shape(W), (len(I), (len(X)))) self.assertLessEqual(norm(K-R.T.dot(R)), 1) self.assertLessEqual(norm(K-W.T.dot(K_chol.dot(W))), 1)
def __init__(self, gp, newton_step=1.0, newton_epsilon=1e-5, newton_max_iterations=20, newton_start=None): """ gp - underlying Gaussian process newton_step - starting step size, if the objective function is not increased after a step, the step is discarded and step size is halfed newton_epsilon - epsilon to terminate optimisation newton_max_iterations - maximum number of steps newton_start - optional starting point, useful if mode has to be found multiple times for slightly varying data """ dim = len(gp.K) assert newton_step > 0 assert newton_epsilon > 0 assert newton_max_iterations > 0 if newton_start is not None: assert len(shape(newton_start)) == 1 assert len(newton_start) == dim self.gp = gp self.newton_step = newton_step self.newton_epsilon = newton_epsilon self.newton_max_iterations = newton_max_iterations self.newton_start = newton_start self.newton_start = newton_start
def set_hyperparameters(self, theta): assert(len(shape(theta))==1) assert(len(theta)==2) self.kernel.sigma=theta[0] self.kernel.scale=theta[1]
def compute_constants(self, y): """ Precomputes constants of the log density of the proposal distribution, which is Gaussian as p(x|y) ~ N(mu, R) where mu = y -a a = 0 R = gamma^2 I + M M^T M = 2 [\nabla_x k(x,z_i]|_x=y Returns (mu,L_R), where L_R is lower Cholesky factor of R """ assert(len(shape(y))==1) # M = 2 [\nabla_x k(x,z_i]|_x=y if self.Z is None: R = self.gamma ** 2 * eye(len(y)) else: M = 2 * self.kernel.gradient(y, self.Z) # R = gamma^2 I + \nu^2 * M H M^T H = Kernel.centring_matrix(len(self.Z)) R = self.gamma ** 2 * eye(len(y)) + self.nu2 * M.T.dot(H.dot(M)) L_R = cholesky(R) return y.copy(), L_R
def __init__(self, gp, newton_step=1.0, newton_epsilon=1e-5, \ newton_max_iterations=20, newton_start=None): """ gp - underlying Gaussian process newton_step - starting step size, if the objective function is not increased after a step, the step is discarded and step size is halfed newton_epsilon - epsilon to terminate optimisation newton_max_iterations - maximum number of steps newton_start - optional starting point, useful if mode has to be found multiple times for slightly varying data """ dim = len(gp.K) assert (newton_step > 0) assert (newton_epsilon > 0) assert (newton_max_iterations > 0) if newton_start is not None: assert (len(shape(newton_start)) == 1) assert (len(newton_start) == dim) self.gp = gp self.newton_step = newton_step self.newton_epsilon = newton_epsilon self.newton_max_iterations = newton_max_iterations self.newton_start = newton_start self.newton_start = newton_start
def showAverageQValues(p, v, nEpisodes, path): '''Colorplot for the average QValues''' lrAgent = loadEpisodeVar(p, v, 0, path, 'lrAgent') ny, nx = shape(lrAgent.x) avgValues = zeros((ny, nx)) for e in xrange(nEpisodes): if e != 0: lrAgent = loadEpisodeVar(p, v, e, path, 'lrAgent') avgValues += lrAgent.x # pdb.set_trace() avgValues = avgValues / nEpisodes figure() title('Average Q-values over the trials- p=' + str(p) + ' v=' + str(v)) dlbd = lrAgent.lbd[1] - lrAgent.lbd[0] last = lrAgent.lbd[-1] + dlbd X = r_[ lrAgent.lbd, last] #watch out with this limit here "last" (pcolor doesn't show the last column) Y = arange(ny + 1) Z = avgValues pcolor(X, Y, Z) colorbar() axis([lrAgent.lbd[0], last, 0, ny + 1]) xlabel('Learning rates') ylabel('Trials')
def construct_proposal(self, y): """ Returns the proposal distribution at point y given the current history """ assert(len(shape(y))==1) mu, L_R = self.compute_constants(y) return Gaussian(mu, L_R, is_cholesky=True)
def __init__(self, X, y, n_importance, prior, ridge=None): Distribution.__init__(self, dimension=shape(X)[1]) self.n_importance = n_importance self.prior = prior self.ridge = ridge self.X = X self.y = y
def __init__(self, X, y, n_importance, prior, ridge=None): Distribution.__init__(self, dimension=shape(X)[1]) self.n_importance=n_importance self.prior=prior self.ridge=ridge self.X=X self.y=y
def load_ozone_data(): folder = OzonePosterior.get_data_folder() y = loadmat(folder + "y.mat")["y"][:, 0] assert(len(shape(y)) == 1) A = loadmat(folder + "A.mat")["A"] return y, A
def construct_proposal(self, y): """ parameters: y - 1D array with a current_sample_object point """ # ensure this in every implementation assert(len(shape(y)) == 1) raise NotImplementedError()
def log_pdf_multiple_points(self, X): assert(len(shape(X)) == 2) assert(shape(X)[1] == self.dimension) log_determinant_part = -sum(log(diag(self.L))) quadratic_parts = zeros(len(X)) for i in range(len(X)): x = X[i] - self.mu # solve y=K^(-1)x = L^(-T)L^(-1)x y = solve_triangular(self.L, x.T, lower=True) y = solve_triangular(self.L.T, y, lower=False) quadratic_parts[i] = -0.5 * x.dot(y) const_part = -0.5 * len(self.L) * log(2 * pi) return const_part + log_determinant_part + quadratic_parts
def load_ozone_data(): folder = OzonePosterior.get_data_folder() y = loadmat(folder + "y.mat")["y"][:, 0] assert (len(shape(y)) == 1) A = loadmat(folder + "A.mat")["A"] return y, A
def construct_proposal(self, y): """ parameters: y - 1D array with a current_sample_object point """ # ensure this in every implementation assert (len(shape(y)) == 1) raise NotImplementedError()
def log_pdf(self, X): logging.debug("Entering") assert (shape(X)[0] == 1) result = self.log_likelihood(2**X[0, 0], 2**X[0, 1]) if self.prior is not None: result += self.prior.log_pdf(X) logging.debug("Leaving") return result
def log_pdf(self, X): logging.debug("Entering") assert(shape(X)[0] == 1) result = self.log_likelihood(2 ** X[0, 0], 2 ** X[0, 1]) if self.prior is not None: result += self.prior.log_pdf(X) logging.debug("Leaving") return result
def construct_proposal(self, y): assert(len(shape(y)) == 1) m = MixtureDistribution(self.distribution.dimension, self.num_eigen) m.mixing_proportion = Discrete((self.eigvalues + 1) / (sum(self.eigvalues) + self.num_eigen)) # print "current mixing proportion: ", m.mixing_proportion.omega for ii in range(self.num_eigen): L = sqrt(self.dwscale[ii] * self.eigvalues[ii]) * reshape(self.eigvectors[:, ii], (self.distribution.dimension, 1)) m.components[ii] = Gaussian(y, L, is_cholesky=True, ell=1) # Z=m.sample(1000).samples # Visualise.plot_data(Z) return m
def __init__(self, X, y, n_importance, prior, ridge=None): Distribution.__init__(self, dimension=shape(X)[1]) self.n_importance = n_importance self.prior = prior self.ridge = ridge self.X = X self.y = y # compute alphabet sizes based on number of unique elements for each # covariate in the cateogrical input data represented as reals self.alphabet_sizes = array( [len(set(X[:, i])) for i in range(X.shape[1])], dtype=int32)
def __init__(self, X, y, n_importance, prior, ridge=None): Distribution.__init__(self, dimension=shape(X)[1]) self.n_importance=n_importance self.prior=prior self.ridge=ridge self.X=X self.y=y # compute alphabet sizes based on number of unique elements for each # covariate in the cateogrical input data represented as reals self.alphabet_sizes=array([len(set(X[:,i])) for i in range(X.shape[1])], dtype=int32)
def kernel(self, X, Y=None): """ Computes the standard Gaussian kernel k(x,y)=exp(-0.5* ||x-y||**2 / sigma**2) X - 2d array, samples on right hand side Y - 2d array, samples on left hand side, can be None in which case it is replaced by X """ # bring to 2d array form if 1d assert(len(shape(X))==2) if Y is not None: assert(len(shape(X))==2) # if X=Y, use more efficient pdist call which exploits symmetry if Y is None: sq_dists = squareform(pdist(X, 'sqeuclidean')) else: sq_dists = cdist(X, Y, 'sqeuclidean') K = exp(-0.5 * (sq_dists) / self.sigma ** 2) return K
def assert_file_matrix(self, filename, M): try: with open(filename): m = loadtxt(filename) # python loads vectors as 1d-arrays, but we want 2d-col-vectors if len(shape(m)) == 1: m = reshape(m, (len(m), 1)) self.assertEqual(M.shape, m.shape) self.assertLessEqual(norm(m - M), 1e-5) return True except IOError: return False
def showQValues2(lrAgent, spFactor=1): '''Colorplot for the QValues''' figure() title('Q-values over the trials') dlbd = lrAgent.lbd[1] - lrAgent.lbd[0] ny, nx = shape(lrAgent.x) last = lrAgent.lbd[-1] + dlbd X = r_[ lrAgent.lbd, last] #watch out with this limit here "last" (pcolor doesn't show the last column) Y = arange(ny / spFactor + 1) sample = array([i % spFactor == 0 for i in xrange(ny)]) Z = lrAgent.x[sample] pcolor(X, Y, Z) colorbar() axis([lrAgent.lbd[0], last, 0, ny / spFactor + 1]) xlabel('Learning rates') ylabel('Trials')
def construct_proposal(self, y): """ proposal is a mixture of normals, centred at y and with covariance gamma^2 I + nu^2 MHaa'HM', where a are the eigenvectors of centred kernel matrix Kc=HKH """ assert len(shape(y)) == 1 m = MixtureDistribution(self.distribution.dimension, self.num_eigen) m.mixing_proportion = Discrete((self.eigvalues + 1) / (sum(self.eigvalues) + self.num_eigen)) # print "current mixing proportion: ", m.mixing_proportion.omega M = 2 * self.kernel.gradient(y, self.Z) H = Kernel.centring_matrix(len(self.Z)) for ii in range(self.num_eigen): Sigma = self.gamma ** 2 * eye(len(y)) + self.nu2 * (M.T).dot( H.dot(outer(self.eigvectors[:, ii], self.eigvectors[:, ii]).dot(H.dot(M))) ) m.components[ii] = Gaussian(y, Sigma) return m
def construct_proposal(self, y): """ proposal is a mixture of normals, centred at y and with covariance gamma^2 I + nu^2 MHaa'HM', where a are the eigenvectors of centred kernel matrix Kc=HKH """ assert (len(shape(y)) == 1) m = MixtureDistribution(self.distribution.dimension, self.num_eigen) m.mixing_proportion = Discrete( (self.eigvalues + 1) / (sum(self.eigvalues) + self.num_eigen)) # print "current mixing proportion: ", m.mixing_proportion.omega M = 2 * self.kernel.gradient(y, self.Z) H = Kernel.centring_matrix(len(self.Z)) for ii in range(self.num_eigen): Sigma = self.gamma ** 2 * eye(len(y)) + \ self.nu2 * (M.T).dot(H.dot(outer(self.eigvectors[:, ii], self.eigvectors[:, ii]).dot(H.dot(M)))) m.components[ii] = Gaussian(y, Sigma) return m
def showQValues(p, v, epis, path): '''Colorplot for the QValues''' lrAgent = loadEpisodeVar(p, v, epis, path, 'lrAgent') figure() title('Q-values over the trials') dlbd = lrAgent.lbd[1] - lrAgent.lbd[0] ny, nx = shape(lrAgent.x) last = lrAgent.lbd[-1] + dlbd X = r_[ lrAgent.lbd, last] #watch out with this limit here "last" (pcolor doesn't show the last column) Y = arange(ny + 1) Z = lrAgent.x pcolor(X, Y, Z) colorbar() axis([lrAgent.lbd[0], last, 0, ny + 1]) xlabel('Learning rates') ylabel('Trials')
def test_testAverage3(self): # Yet more tests of average! a = arange(6) b = arange(6) * 3 r1, w1 = average([[a, b], [b, a]], axis=1, returned=1) assert_equal(shape(r1), shape(w1)) assert_equal(r1.shape, w1.shape) r2, w2 = average(ones((2, 2, 3)), axis=0, weights=[3, 1], returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) r2, w2 = average(ones((2, 2, 3)), weights=ones((2, 2, 3)), returned=1) assert_equal(shape(w2), shape(r2)) a2d = array([[1, 2], [0, 4]], float) a2dm = masked_array(a2d, [[False, False], [True, False]]) a2da = average(a2d, axis=0) assert_equal(a2da, [0.5, 3.0]) a2dma = average(a2dm, axis=0) assert_equal(a2dma, [1.0, 3.0]) a2dma = average(a2dm, axis=None) assert_equal(a2dma, 7. / 3.) a2dma = average(a2dm, axis=1) assert_equal(a2dma, [1.5, 4.0])
def show3dQValues2(lrAgent): '''3d Colorplot for the QValues''' fig = figure() ny, nx = shape(lrAgent.x) ax = fig.gca(projection='3d') X = Agent.lbd Y = np.arange(ny)[::-1] X, Y = np.meshgrid(X, Y) Z = lrAgent.x xlabel('x') ylabel('y') title('Q-values over the trials') surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet, linewidth=0, antialiased=False) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) fig.colorbar(surf, shrink=0.5, aspect=5)
experiment_dir_base = str(sys.argv[1]) n = int(str(sys.argv[2])) # loop over parameters here experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep print "running experiments", n, "times at base", experiment_dir # load data data,labels=GPData.get_glass_data() # normalise and whiten dataset data-=mean(data, 0) L=cholesky(cov(data.T)) data=solve_triangular(L, data.T, lower=True).T dim=shape(data)[1] # prior on theta and posterior target estimate theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5) distribution=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) sigma = 23.0 print "using sigma", sigma kernel = GaussianKernel(sigma=sigma) for i in range(n): mcmc_samplers = []
def precompute(self): # collect lines for Graphlab graph definition file for full rank case graphlab_lines=GraphlabLines(output_filename=self.output_filename) # compute all non-symmetric kernels for incoming messages at a node print "precomputing (non-symmetric) kernels for incoming messages at a node" graphlab_lines.lines.append("# non-observed nodes") for node in self.graph: added_node=False for in_message in self.graph[node]: for out_message in self.graph[node]: if in_message==out_message: continue # dont add nodes which have no kernels, and only do once if they have if not added_node: graphlab_lines.new_non_observed_node(node) added_node=True edge_in_message=(node, in_message) edge_out_message=(out_message, node) lhs=self.data[edge_in_message][0] rhs=self.data[edge_out_message][1] lhs=reshape(lhs, (len(lhs),1)) rhs=reshape(rhs, (len(rhs),1)) K=self.kernel.kernel(lhs,rhs) graphlab_lines.add_non_observed_node(node, out_message, in_message, K) print "precomputing kernel (vectors) at observed nodes" graphlab_lines.lines.append(os.linesep + "# observed nodes") for node, observation in self.observations.items(): graphlab_lines.new_observed_node(node) for out_message in self.graph[node]: edge=(out_message, node) lhs=self.data[edge][1] lhs=reshape(lhs, (len(lhs), 1)) rhs=[[observation]] K=self.kernel.kernel(lhs, rhs) graphlab_lines.add_observed_node(node, out_message, K) # now precompute systems for inference print "precomputing systems for messages from observed nodes" graphlab_lines.lines.append(os.linesep + "# edges with observed targets") for node, observation in self.observations.items(): for out_message in self.graph[node]: edge=(out_message, node) graphlab_lines.new_edge_observed_target(node, out_message) data_source=self.data[edge][0] data_source=reshape(data_source, (len(data_source), 1)) data_target=self.data[edge][1] data_target=reshape(data_target, (len(data_target), 1)) Ks=self.kernel.kernel(data_source) Kt=self.kernel.kernel(data_target) Ls=cholesky(Ks+eye(shape(Ks)[0])*self.reg_lambda) Lt=cholesky(Kt+eye(shape(Kt)[0])*self.reg_lambda) graphlab_lines.add_edge(node, out_message,"L_s", Ls) graphlab_lines.add_edge(node, out_message,"L_t", Lt) print "precomputing systems for messages from non-observed nodes" graphlab_lines.lines.append(os.linesep + "# edges with non-observed targets") for edge in self.edges: # exclude edges which involve observed nodes is_edge_target_observed=len(Set(self.observations.keys()).intersection(Set(edge)))>0 if not is_edge_target_observed: graphlab_lines.new_edge_observed_target(edge[1], edge[0]) data_source=self.data[edge][0] data_source=reshape(data_source, (len(data_source), 1)) Ks=self.kernel.kernel(data_source) Ls=cholesky(Ks+eye(shape(Ks)[0])*self.reg_lambda) graphlab_lines.add_edge(edge[1], edge[0],"L_s", Ls) # write graph definition file to disc graphlab_lines.flush()
mpl.rcParams['text.usetex']=True mpl.rcParams['text.latex.unicode']=True sampler_names_short = ["SM","AM-FS","AM-LS","KAMH-LS"] sampler_names = ["StandardMetropolis","AdaptiveMetropolis","AdaptiveMetropolisLearnScale","KameleonWindowLearnScale"] colours = ['blue', 'red', 'magenta', 'green'] ii=0 for sampler_name in sampler_names: filename = directory+sampler_name+"_mmds.bin" f = open(filename,"r") upto, mmds, mean_dist = load(f) trials=shape(mean_dist)[1] figure(1) if which_plot == "mean": stds = std(mean_dist,1)/sqrt(trials) means = mean(mean_dist,1) if which_plot == "mmd": stds = std(mmds,1)/sqrt(trials) means = mean(mmds,1) zscore=1.28 yerr = zscore*stds if highlight == "SM": condition = sampler_name == "StandardMetropolis" elif highlight == "AM": condition = sampler_name == "AdaptiveMetropolis" or sampler_name == "AdaptiveMetropolisLearnScale" elif highlight == "KAMH": condition = sampler_name == "KameleonWindowLearnScale"
samples_long = loadtxt( "/nfs/home2/dino/kamh-results/StandardMetropolis_PseudoMarginalHyperparameterDistribution_merged_samples.txt" ) samples_long = samples_long[:10000] # f_long=open("/nfs/home2/dino/kamh-results/long_experiment_output.bin") # experiment_long=load(f_long) # f_long.close() # thin_long=100 # mcmc_chain_long=experiment_long.mcmc_chain # burnin=mcmc_chain_long.mcmc_params.burnin # indices_long = range(burnin, mcmc_chain_long.iteration,thin_long) # samples_long=mcmc_chain_long.samples[indices_long] mu_long = mean(samples_long, 0) print 'using this many samples for the long chain: ', shape(samples_long)[0] how_many_chains = 20 stats_granularity = 10 path_above = "/nfs/home2/dino/git/kameleon-mcmc/main/gp/scripts/glass_gaussian_ard/" #path_above = "/nfs/data3/ucabhst/kameleon_experiments/glass_ard/" path_below = "output/experiment_output.bin" #sampler_names = ["KameleonWindowLearnScale", "AdaptiveMetropolisLearnScale","AdaptiveMetropolis"] sampler_names = ["StandardMetropolis"] path_temp = "_PseudoMarginalHyperparameterDistribution_#/" for sampler_name in sampler_names: mean_dist = zeros((stats_granularity, how_many_chains)) mmds = zeros((stats_granularity, how_many_chains))
# load data data, labels = GPData.get_glass_data() # throw away some data n = 250 seed(1) idx = permutation(len(data)) idx = idx[:n] data = data[idx] labels = labels[idx] # normalise and whiten dataset data -= mean(data, 0) L = cholesky(cov(data.T)) data = solve_triangular(L, data.T, lower=True).T dim = shape(data)[1] # prior on theta and posterior target estimate theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin = 10000 num_iterations = burnin + 300000 kernel = GaussianKernel(sigma=23.0) sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin) # sampler=AdaptiveMetropolisLearnScale(target) # sampler=StandardMetropolis(target)
# store index sets at source and target of every edge index_sets={} print "precomputing systems for messages from observed nodes" graphlab_lines.lines.append(os.linesep + "# edges with observed targets") for node, observation in observations.items(): for out_message in graph[node]: edge=(out_message, node) graphlab_lines.new_edge_observed_target(node, out_message) data_source=data[edge][0] data_target=data[edge][1] Ks_chol, Is, Rs, Ws=incomplete_cholesky(data_source, kernel, eta) Kt_chol, It, Rt, Wt=incomplete_cholesky(data_target, kernel, eta) Qs,Rs,Ps=qr(Ws.dot(Ws.T)+Ks_chol+eye(shape(Ks_chol)[0])*reg_lambda, pivoting=True) Qt,Rt,Pt=qr(Wt.dot(Wt.T)+Kt_chol+eye(shape(Kt_chol)[0])*reg_lambda, pivoting=True) savetxt(graphlab_lines.add_edge(node, out_message,"Q_s"), Qs) savetxt(graphlab_lines.add_edge(node, out_message,"R_s"), Rs) savetxt(graphlab_lines.add_edge(node, out_message,"P_s"), Ps) savetxt(graphlab_lines.add_edge(node, out_message,"Q_t"), Qt) savetxt(graphlab_lines.add_edge(node, out_message,"R_t"), Rt) savetxt(graphlab_lines.add_edge(node, out_message,"P_t"), Pt) savetxt(graphlab_lines.add_edge(node, out_message,"W"), Ws.dot(Wt.T)) print "precomputing systems for messages from non-observed nodes" graphlab_lines.lines.append(os.linesep + "# edges with non-observed targets") for edge in edges:
def set_hyperparameters(self, theta): assert (len(shape(theta)) == 1) assert (len(theta) == 2) self.kernel.sigma = theta[0] self.kernel.scale = theta[1]
def init(self, start): assert(len(shape(start)) == 1) self.current_sample_object = Sample(start) start_2d = reshape(start, (1, len(start))) self.log_lik_current = self.distribution.log_pdf(start_2d)
def init(self, start): assert (len(shape(start)) == 1) self.current_sample_object = Sample(start) start_2d = reshape(start, (1, len(start))) self.log_lik_current = self.distribution.log_pdf(start_2d)
def construct_proposal(self, y): assert(len(shape(y))==1) return Gaussian(mu=y, Sigma=self.globalscale * self.cov_est, is_cholesky=False)