def get_trace_from_ws_and_Ks(eps, Ky, Kx, ws=None): Gy = center_K(Ky) Gx = center_K(Kx) N = len(Kx) #print 'ws', ws if ws is None: ws = np.ones(N) # pdb.set_trace() ans = np.trace(np.dot(np.dot(np.diag(ws), Gy), np.linalg.inv(np.dot(np.diag(ws), Gx + float(N) * eps * np.eye(N))))) return ans
def KL_two_gaussians(params): d = np.shape(params)[0]-1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) muPrior = np.zeros(d) sigmaPrior = np.identity(d) #print Sigma #print np.linalg.det(Sigma) return 1/2*(np.log(np.linalg.det(Sigma)/np.linalg.det(sigmaPrior))-d+np.trace(np.dot(np.linalg.inv(Sigma),sigmaPrior))+np.dot(np.transpose(mu-muPrior),np.dot(np.linalg.inv(Sigma),mu-muPrior)))
def test_make_diagonal(): def fun(D): return to_scalar(np.make_diagonal(D, axis1=-1, axis2=-2)) D = np.random.randn(4) A = np.make_diagonal(D, axis1=-1, axis2=-2) assert np.allclose(np.diag(A), D) check_grads(fun, D) D = np.random.randn(3, 4) A = np.make_diagonal(D, axis1=-1, axis2=-2) assert all([np.allclose(np.diag(A[i]), D[i]) for i in range(3)]) check_grads(fun, D)
def predict(params, xstar, with_noise = False, FITC = False): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x0, xstar) cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) if FITC: pred_cov = np.diag(np.diag(pred_cov)) if with_noise: pred_cov = pred_cov + noise_scale*np.eye(len(xstar)) return pred_mean, pred_cov
def loglikelihood(self, g, beta, mu_ivp, alpha, pi, priors): logprobs = [] for i, ifx in enumerate(self._ifix): # get the logprobability for each mixture component ll = 0. zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): ll += norm.logpdf( yq, zM[..., q], scale=1/np.sqrt(alpha)).sum() logprobs.append(ll + np.log(pi[i])) logprobs = np.array(logprobs) lpmax = max(logprobs) loglik = lpmax + np.log(np.exp(logprobs - lpmax).sum()) Cg = self.latentforces[0].kernel(self.ttc[:, None]) Cg[np.diag_indices_from(Cg)] += 1e-5 Lg = np.linalg.cholesky(Cg) logprior = -0.5 * g.dot(cho_solve((Lg, True), g)) - \ np.log(np.diag(Lg)).sum() - \ Lg.shape[0] / 2 * np.log(2 * np.pi) for vn, x in zip(['beta'], beta): try: prior_logpdf = priors[vn] logprior += prior_logpdf(x) except KeyError: pass return loglik + logprior
def plot_single_gp(ax, params, layer, unit, plot_xs): ax.cla() rs = npr.RandomState(0) deep_map = create_deep_map(params) gp_details = deep_map[layer][unit] gp_params = pack_gp_params(gp_details) pred_mean, pred_cov = predict_layer_funcs[layer][unit](gp_params, plot_xs, with_noise = False, FITC = False) x0 = deep_map[layer][unit]['x0'] y0 = deep_map[layer][unit]['y0'] noise_scale = deep_map[layer][unit]['noise_scale'] marg_std = np.sqrt(np.diag(pred_cov)) if n_samples_to_plot > 19: ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov*(random), size=n_samples_to_plot) ax.plot(plot_xs, sampled_funcs.T) ax.plot(x0, y0, 'ro') #ax.errorbar(x0, y0, yerr = noise_scale, fmt='o') ax.set_xticks([]) ax.set_yticks([])
def entropy(self, params): mu, sig = self.get_params(params) a = self.zlen*np.log(2*np.pi) + self.zlen b = 2*np.sum(np.log((np.diag(sig)))) return 0.5*(a+b)
def test_exact_log_det_vectorized(): D = 10 N = 7 rs = npr.RandomState(0) mats = [] exact_logdets = [] for i in xrange(N): # Build N different functions, each multiplying against a different matrix. cur_mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mats.append(cur_mat) cur_func = lambda v : np.dot(cur_mat, v) exact_logdets.append(exact_log_det_non_vectorized(cur_func, D)) def mvp_vec(v): """Vectorized version takes in N vectors of length D, and multiples each v against the corresponding matrix in the list.""" assert v.shape == (N, D), v.shape mvps = [] for i in xrange(N): print "i:", i, v[i] mvps.append(np.dot(mats[i], v[i])) return fast_array_from_list(mvps) vec_logdets = exact_log_det(mvp_vec, D, N) assert np.all(vec_logdets == exact_logdets),\ "vectorized: {} non-vectorized: {}".format(vec_logdets, exact_logdets)
def callback(params): print("Log likelihood {}".format(-objective(params))) plt.cla() # Show posterior marginals. plot_xs = np.reshape(np.linspace(-7, 7, 300), (300, 1)) pred_mean, pred_cov = predict(params, X, y, plot_xs) marg_std = np.sqrt(np.diag(pred_cov)) ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([ pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1] ]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. rs = npr.RandomState(0) sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10) ax.plot(plot_xs, sampled_funcs.T) ax.plot(X, y, 'kx') ax.set_ylim([-1.5, 1.5]) ax.set_xticks([]) ax.set_yticks([]) plt.draw() plt.pause(1.0 / 60.0)
def test_multidim_array(self, s, tol): """Tests that arguments which are multidimensional arrays are properly evaluated and differentiated in QNodes.""" multidim_array = np.reshape(b, s) def circuit(w): qml.RX(w[np.unravel_index(0,s)], wires=0) # b[0] qml.RX(w[np.unravel_index(1,s)], wires=1) # b[1] qml.RX(w[np.unravel_index(2,s)], wires=2) # ... qml.RX(w[np.unravel_index(3,s)], wires=3) qml.RX(w[np.unravel_index(4,s)], wires=4) qml.RX(w[np.unravel_index(5,s)], wires=5) qml.RX(w[np.unravel_index(6,s)], wires=6) qml.RX(w[np.unravel_index(7,s)], wires=7) return tuple(qml.expval(qml.PauliZ(idx)) for idx in range(len(b))) dev = qml.device('default.qubit', wires=8) circuit = qml.QNode(circuit, dev) # circuit evaluations circuit_output = circuit(multidim_array) expected_output = np.cos(b) assert np.allclose(circuit_output, expected_output, atol=tol, rtol=0) # circuit jacobians circuit_jacobian = circuit.jacobian([multidim_array]) expected_jacobian = -np.diag(np.sin(b)) assert np.allclose(circuit_jacobian, expected_jacobian, atol=tol, rtol=0)
def neg_likelihood(self, theta): sn2, sp2, log_lscale, w = self.split_theta(theta) scaled_x = scale_x(log_lscale, self.train_x) Phi = self.nn.predict(w, scaled_x) Phi_y = np.dot(Phi, self.train_y.T) A = np.dot(Phi, Phi.T) + self.m * sn2 / sp2 * np.eye(self.m) LA = np.linalg.cholesky(A) logDetA = 2*np.log(np.diag(LA)).sum() datafit = (np.dot(self.train_y, self.train_y.T) - np.dot(Phi_y.T, chol_inv(LA, Phi_y)))/sn2 neg_likelihood = 0.5*(datafit + logDetA + self.num_train*np.log(2*np.pi*sn2) - self.m*np.log(self.m*sn2/sp2)) neg_likelihood = neg_likelihood.sum() if np.isnan(neg_likelihood): neg_likelihood = np.inf w_nobias = self.nn.w_nobias(w, self.dim) l1_reg = self.l1 * np.sum(np.abs(w_nobias)) l2_reg = self.l2 * np.dot(w_nobias, w_nobias.T) neg_likelihood += l1_reg + l2_reg if neg_likelihood < self.loss: self.loss = neg_likelihood self.theta = np.copy(theta) self.A = A.copy() self.LA = LA.copy() return neg_likelihood
def MBAM_jac_RHS( self, V: "2N dimensional initial conditions vector" ) -> "RHS of the geodesic equation": N = int(np.size(V) / 2) θ = V[:N] dθ = V[N:] g = self.g(θ) ret = np.c_[np.zeros((N, N)), np.diag(N * [1])] if np.linalg.matrix_rank(g) == N: ret1 = np.c_[ np.zeros((N, N)), -2 * np.einsum('a,bi,cab->ci', dθ, np.diag(N * [1]), self.Γ2(θ))] ret = np.r_[ret, ret1] else: return np.array(2 * N * [np.nan]) return ret
def test_multidim_array(self): "Tests that arguments which are multidimensional arrays are properly evaluated and differentiated in QNodes." self.logTestName() for s in b_shapes: multidim_array = np.reshape(b, s) def circuit(w): qml.RX(w[np.unravel_index(0, s)], 0) # b[0] qml.RX(w[np.unravel_index(1, s)], 1) # b[1] qml.RX(w[np.unravel_index(2, s)], 2) # ... qml.RX(w[np.unravel_index(3, s)], 3) qml.RX(w[np.unravel_index(4, s)], 4) qml.RX(w[np.unravel_index(5, s)], 5) qml.RX(w[np.unravel_index(6, s)], 6) qml.RX(w[np.unravel_index(7, s)], 7) return tuple(qml.expval.PauliZ(idx) for idx in range(len(b))) circuit = qml.QNode(circuit, self.dev8) # circuit evaluations circuit_output = circuit(multidim_array) expected_output = np.cos(b) self.assertAllAlmostEqual(circuit_output, expected_output, delta=self.tol) # circuit jacobians circuit_jacobian = circuit.jacobian(multidim_array) expected_jacobian = -np.diag(np.sin(b)) self.assertAllAlmostEqual(circuit_jacobian, expected_jacobian, delta=self.tol)
def plot_full_gp(ax, params, plot_xs): ax.cla() rs = npr.RandomState(0) sampled_means_and_covs = [ sample_mean_cov_from_deep_gp(params, plot_xs) for i in xrange(n_samples) ] sampled_means, sampled_covs = zip(*sampled_means_and_covs) avg_pred_mean = np.mean(sampled_means, axis=0) avg_pred_cov = np.mean(sampled_covs, axis=0) marg_std = np.sqrt(np.diag(avg_pred_cov)) if n_samples > 1: ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([ avg_pred_mean - 1.96 * marg_std, (avg_pred_mean + 1.96 * marg_std)[::-1] ]), alpha=.15, fc='Blue', ec='None') ax.plot(plot_xs, avg_pred_mean, 'b') sampled_funcs = np.array([ rs.multivariate_normal(mean, cov * (random)) for mean, cov in sampled_means_and_covs ]) ax.plot(plot_xs, sampled_funcs.T) ax.plot(X, y, 'kx') #ax.set_ylim([-1.5,1.5]) ax.set_xticks([]) ax.set_yticks([]) ax.set_title("Full GP, X to Y")
def likelihood(self, hyper): #print self.params,self.bound if (self.noise_e != None and self.noise_fix_e is False): sigma_n_e = hyper[2] else: sigma_n_e = 0. theta_e = hyper[self.id_theta_e] self.theta_e = theta_e K = self.RBF(theta_e, self.Xe) + np.eye(self.Ne) * (sigma_n_e) self.K = K L = np.linalg.cholesky(K + np.eye(self.Ne) * self.stab) self.L = L alpha1_ = np.linalg.solve(self.L.T, np.linalg.solve(self.L, self.mc)) alpha2_ = np.linalg.solve(self.L.T, np.linalg.solve(self.L, self.ye)) rho = np.matmul(self.mc.T, alpha2_) / np.matmul(self.mc.T, alpha1_) self.rho = rho alpha = np.linalg.solve(L.T, np.linalg.solve(L, (self.ye - rho * self.mc))) self.alpha = alpha NLML = np.sum(np.log(np.diag(L))) + 0.5 * np.matmul( (self.ye - rho * self.mc).T, alpha) + 0.5 * np.log( 2. * np.pi) * self.Ne self.NLML = NLML return NLML
def mu_grad_old(self, Xnew): """ Old mu_grad using actual math.. who needs that when you have autograd """ alpha = solve(self.L.T, solve(self.L, self.Y*self.Ystd+self.Ymean ) ) Knew_N,_ = self.K(self.lengthscale, (Xnew-self.Xmean)/self.Xstd, self.X) normalX = (self.X * self.Xstd) + self.Xmean return np.diag( (-1/(self.lengthscale*self.Xstd)**2)*np.dot( np.tile(Xnew.T, self.n) - normalX.T, np.multiply(Knew_N.T, alpha) ) )
def posterior_field_general(prior_embedding_y, k_xx, k_yy, epsil, delta): """ Obtain the posterior weights involved in Kernel Bayes' Rule. The posterior refers to the posterior distribution of y given x. Parameters ---------- prior_embedding_y : numpy.ndarray The prior kernel embedding on y evaluated at the training outputs (n,) k_xx : numpy.ndarray The gram matrix on the observed input variables (n, n) k_yy : numpy.ndarray The gram matrix on the observed output variables (n, n) epsil : float The regularisation parameter for the prior delta : float The regularisation parameter for the likelihood Returns ------- numpy.ndarray The posterior field ready to be conditioned on arbitrary input x values and queried at arbitrary output y values (n, n) """ # [Data Size] scalar n, = prior_embedding_y.shape # [Identity] (n, n) identity = np.eye(n) # [Prior Effect] (n, n) d = np.diag(np.dot(_pinv(k_yy + (epsil**2) * identity), prior_embedding_y)) # [Posterior Weights] (n, n) return np.dot(_pinv(np.dot(d, k_xx) + ((delta**2) / n) * identity), d)
def cost(usv): delta = .5 u = usv[0] s = usv[1] vt = usv[2] X = np.dot(np.dot(u, np.diag(s)), vt) return np.sum(np.sqrt((X - A)**2 + delta**2) - delta)
def __init__(self): #set our drift dynamics self.f_drift = f_trivial self.g_ctrl = g_mono self.u = u_step self.h = h_single #set our graph n_elements = 10 n_regions = int(np.floor(n_elements/2)) self.G = nx.random_regular_graph(4, n_elements) self.L = nx.linalg.laplacian_matrix(self.G).todense() self.D = np.array(nx.linalg.incidence_matrix(self.G).todense()) self.D = np.diag(np.ones(shape=(n_elements,))) # for each of our elements, assign them to a brain region self.e_to_r = np.random.randint(0,n_regions,size=n_elements) #do our disease layer n_symp = 2 #self.Xi = np.random.randint(0,1,size=(n_regions,n_symp)) self.Xi = Xi_1 self.P = self.L self.x_state = np.random.uniform(size=(1000,1)) self.n_regions = n_regions self.n_symp = n_symp self.n_elements = n_elements
def callback(params): print("Log likelihood {}".format(-objective(params))) plt.cla() print(params) # Show posterior marginals. plot_xs = np.reshape(np.linspace(-7, 7, 300), (300,1)) pred_mean, pred_cov = predict(params, X, y, plot_xs) marg_std = np.sqrt(np.diag(pred_cov)) ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. rs = npr.RandomState(0) sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10) ax.plot(plot_xs, sampled_funcs.T) ax.plot(X, y, 'kx') ax.set_ylim([-1.5, 1.5]) ax.set_xticks([]) ax.set_yticks([]) plt.draw() plt.pause(1.0/60.0)
def marginal_likelihood(self, X, W=None, Psi=None): ''' function: marginal_likelihood Description: Compute the marginal likelihood for given data X. Inputs: X - (np.array) Data matrix. Shape (N,D) W - (np.array) Factor loading matrix. Shape (K,D). Psi - (np.array) Output covariance matrix. Shape (D,D). Positive, diagonal. Outputs: ml - (np.array) Array of shape (N,) where ml[n] is the marginal log likelihood of data case X[n,:]. ''' #print 'ML' N, D = X.shape if (W is None): W = self.W if (Psi is None): Psi = self.Psi #ml = np.zeros(N) cov_mat = np.dot(W.T, W) + Psi inv_cov_mat = np.linalg.inv(cov_mat) #norm_term = (1/(np.sqrt(np.power(2*np.pi,D)*np.linalg.det(cov_mat)))) sgn, logdet = np.linalg.slogdet(2 * np.pi * cov_mat) norm_term = -0.5 * (logdet) * sgn second_term = (-0.5 * (np.diag(np.dot(np.dot(X, inv_cov_mat), X.T)))) ml1 = (norm_term) + second_term #ml2 = (autograd.scipy.stats.multivariate_normal.logpdf(X,np.zeros(D), cov_mat)) #print norm_term, second_term return ml1
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): dimension = 3 num_samples = 200 num_components = 2 samples = np.random.randn(num_samples, dimension) @ np.diag([3, 2, 1]) samples -= samples.mean(axis=0) cost, egrad, ehess = create_cost_egrad_ehess(backend, samples, num_components) manifold = Stiefel(dimension, num_components) problem = pymanopt.Problem(manifold, cost, egrad=egrad, ehess=ehess) if quiet: problem.verbosity = 0 solver = TrustRegions() # from pymanopt.solvers import ConjugateGradient # solver = ConjugateGradient() estimated_span_matrix = solver.solve(problem) if quiet: return estimated_projector = estimated_span_matrix @ estimated_span_matrix.T eigenvalues, eigenvectors = np.linalg.eig(samples.T @ samples) indices = np.argsort(eigenvalues)[::-1][:num_components] span_matrix = eigenvectors[:, indices] projector = span_matrix @ span_matrix.T print( "Frobenius norm error between estimated and closed-form projection " "matrix:", np.linalg.norm(projector - estimated_projector))
def make_positive_definite(m, tol = None): ''' Computes a matrix close to the original matrix m that is positive definite. This function is just a transcript of R' make.positive.definite function. m (2d array): A matrix that is not necessary psd. tol (int): A tolerence level controlling how "different" the psd matrice can be from the original matrix --------------------------------------------------------------- returns (2d array): A psd matrix ''' d = m.shape[0] if (m.shape[1] != d): raise RuntimeError("Input matrix is not square!") eigvalues, eigvect = eigh(m) # Sort the eigen values idx = eigvalues.argsort()[::-1] eigvalues = eigvalues[idx] eigvect = eigvect[:,idx] if (tol == None): tol = d * np.max(np.abs(eigvalues)) * sys.float_info.epsilon delta = 2 * tol tau = np.maximum(0, delta - eigvalues) dm = multi_dot([eigvect, np.diag(tau), eigvect.T]) return(m + dm)
def test_approx_log_det_vectorized(): D = 10 N = 7 rs = npr.RandomState(0) rs2 = npr.RandomState(0) mats = [] alds = [] for i in xrange(N): cur_mat = np.eye(D) - 0.1 * np.diag(rs.rand(D)) mats.append(cur_mat) cur_func = lambda v : np.dot(cur_mat, v.T) alds.append(approx_log_det_non_vectorized(cur_func, D, rs=rs2)) alds = np.array(alds) def mvp_vec(v): """Vectorized version takes in N vectors of length D, and multiples each v against the corresponding matrix in the list. Takes in a matrix of length N x D, returns an N x D matrix.""" assert v.shape == (N, D), v.shape mvps = [] for i in xrange(N): mvps.append(np.dot(mats[i], v[i])) retval = fast_array_from_list(mvps) assert retval.shape == (N,D), retval.shape return fast_array_from_list(mvps) vec_logdets = approx_log_det(mvp_vec, D, N, rs=npr.RandomState(0)) assert np.all(vec_logdets - alds < 0.0001), "vectorized: {} non-vectorized: {}, diff: {}".format(vec_logdets, alds, vec_logdets - alds)
def hmc_sample(params, rs, num_samples, callback=None): """Generate a samples from HMC with given parameters and return them, as well as an unbiased estimate on the lower bound.""" # Unpack parameters mean = parser.get(params, 'mean') stddevs = np.exp(parser.get(params, 'log_stddev')) hmc_stepsize = np.exp(parser.get(params, 'hmc_log_stepsize')) mass_mat = parser.get(params, 'mass_mat') v_A = parser.get(params, 'v_A') v_B = parser.get(params, 'v_B') v_cov = np.exp(parser.get(params, 'v_log_cov')) rev_A = parser.get(params, 'rev_A') rev_B = parser.get(params, 'rev_B') rev_cov = np.exp(parser.get(params, 'rev_log_cov')) #Create initial sample and combine its log_lik and its entropy init_zs = mean + rs.randn(num_samples, D) * stddevs init_ll = loglik_func(init_zs) init_log_prob_mvn = build_logprob_mvn(mean, np.diag(stddevs**2), pseudo_inv=False) init_ent = init_log_prob_mvn(init_zs) init_L_est = init_ll - init_ent samples, lower_bound_est = run_hmc(init_zs, loglik_func, loglik_func_grad, hmc_stepsize, mass_mat, v_A, v_B, v_cov, rev_A, rev_B, rev_cov, num_steps, leap_steps, rs, callback) return samples, lower_bound_est + init_L_est
def plot_single_gp(ax, params, layer, unit, plot_xs): ax.cla() rs = npr.RandomState(0) deep_map = create_deep_map(params) gp_details = deep_map[layer][unit] gp_params = pack_gp_params(gp_details) pred_mean, pred_cov = predict_layer_funcs[layer][unit]( gp_params, plot_xs, with_noise=False, FITC=False) x0 = deep_map[layer][unit]['x0'] y0 = deep_map[layer][unit]['y0'] noise_scale = deep_map[layer][unit]['noise_scale'] marg_std = np.sqrt(np.diag(pred_cov)) if n_samples_to_plot > 19: ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([ pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1] ]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov * (random), size=n_samples_to_plot) ax.plot(plot_xs, sampled_funcs.T) ax.plot(x0, y0, 'ro') #ax.errorbar(x0, y0, yerr = noise_scale, fmt='o') ax.set_xticks([]) ax.set_yticks([])
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): num_rows = 10 rank = 3 matrix = np.random.normal(size=(num_rows, num_rows)) matrix = 0.5 * (matrix + matrix.T) # Solve the problem with pymanopt. manifold = Oblique(rank, num_rows) cost, euclidean_gradient, euclidean_hessian = create_cost_and_derivates( manifold, matrix, backend ) problem = pymanopt.Problem( manifold, cost, euclidean_gradient=euclidean_gradient, euclidean_hessian=euclidean_hessian, ) optimizer = TrustRegions(verbosity=2 * int(not quiet)) X = optimizer.run(problem).point if quiet: return C = X.T @ X print("Diagonal elements:", np.diag(C)) print("Eigenvalues:", np.sort(np.linalg.eig(C)[0].real)[::-1])
def callback0(params, timer=None): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) test_L = bl * bl * np.exp(-test_L0 / al / al / 2) pred_mean = test_L @ alpha if timer: return test_err = ((pred_mean - test_Y) ** 2).mean() # ((pred_mean-test_Y)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean() norm = alpha.T @ L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_params is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm)
def ExpectedImprovement(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi,beta) var_u_star = np.abs(np.diag(var_u_star))[:,None] # Expected Improvement # from https://people.orie.cornell.edu/pfrazier/Presentations/2011.11.INFORMS.Tutorial.pdf best = np.min(y) delta = -(pred_u_star - best) deltap = -(pred_u_star - best) deltap[delta < 0] = 0 Z = delta/np.sqrt(var_u_star) EI_acq = deltap - np.abs(deltap)*norm.cdf(-Z) + np.sqrt(var_u_star)*norm.pdf(Z) if isinstance(EI_acq, np.ndarray) == False: EI_acq = EI_acq._value return EI_acq
def get_causal_effect(params, do_A, w): "to be called within experiment function." al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) EYhat_do_A = [] for a in do_A: a = np.repeat(a, [w.shape[0]]).reshape(-1, 1) w = w.reshape(-1, 1) aw = np.concatenate([a, w], axis=-1) ate_L0 = _sqdist(aw, X) ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2) h_out = ate_L @ alpha mean_h = np.mean(h_out).reshape(-1, 1) EYhat_do_A.append(mean_h) print('a = {}, beta_a = {}'.format(np.mean(a), mean_h)) return np.concatenate(EYhat_do_A)
def compute_Lagrangian_gradient(self): Φ = self.compute_Φ() [new_W, W_λ] = eig_solver(Φ, db['q'], mode='smallest') gradient = Φ.dot(db['W']) - db['W'].dot(np.diag(W_λ)) print('Gradient :\n') print(gradient) return gradient
def get_dF_js_idM(self, M, N, M_tangent_bundle_sub, N_tangent_bundle, selectedpoints, dim = None): if dim == None: dim = self.dim q = self.q affinity_matrix = M.geom.affinity_matrix nsel = len(selectedpoints) dF = np.zeros((nsel, dim, q)) for i in range(nsel): pt = selectedpoints[i] neighborspt = affinity_matrix[selectedpoints[i]].indices deltap0 = M.data[neighborspt, :] - M.data[pt, :] deltaq0 = N.data[neighborspt, :] - N.data[pt, :] projected_M = np.matmul(M_tangent_bundle_sub.tangent_bases[i, :, :].transpose(), deltap0.transpose()).transpose() # projected_rescaled_M = np.matmul(np.diag(M_tangent_bundle_sub.rmetric.Gsvals[selectedpoints[i]]),projected_M.transpose()) projected_rescaled_M = projected_M.transpose() b = np.linalg.pinv(projected_rescaled_M) a = np.zeros((len(neighborspt), q)) rescaled_basis = np.matmul(N_tangent_bundle.tangent_bases[selectedpoints[i], :, :][:, :], np.diag(N.geom.rmetric.Gsvals[selectedpoints[i]])) projected_N = np.dot(rescaled_basis.transpose(), deltaq0.transpose()) projected_N_expanded = np.matmul(N_tangent_bundle.tangent_bases[selectedpoints[i], :, :][:, :], projected_N) a = projected_N_expanded dF[i, :, :][:, :] = np.matmul(a, b).transpose() return (dF)
def eqn19sum_numerical(paramslin, x,z, pij, run_time, gamma, alpha): params = util.unlinearise_params(paramslin, verbose=0) precomp = precompute(z, gamma, alpha, [0, run_time]) kzzinv = precomp.Lzzinv.T @ precomp.Lzzinv kzzinv_m = kzzinv @ params.m eqn19sum = 0 kzzinv_S_kzzinv = kzzinv @ params.L @ params.L.T @ kzzinv mutilde_list = [] for i in range(1, x.shape[1]): taus = np.array([x[0, i] - x[0, :i]]) kxx = kdiag(taus, gamma) kxz = k(taus, z, gamma, alpha) mutilde = (kxz @ kzzinv_m).flatten() mutilde_list += mutilde.tolist() sigmatilde = np.sqrt(np.diag(kxx - kxz @ kzzinv @ kxz.T + kxz @ kzzinv_S_kzzinv @ kxz.T)) assert mutilde.ndim == 1 assert sigmatilde.ndim == 1 for j in range(len(mutilde)): mui = mutilde[j] sigmai = sigmatilde[j] interp_f = np.linspace(mui - 6*sigmai, mui + 6 * sigmai, 4096) delta = interp_f[1] - interp_f[0] e_log_f2 = multivariate_normal(mui, sigmai**2).pdf(interp_f) * np.log(interp_f ** 2) * delta eqn19sum += pij[i, j + 1] * np.sum(e_log_f2) return eqn19sum
def ExpectedImprovement(self, X_star): # Normalize data X_star = (X_star - self.Xmean) / self.Xstd X = self.X y = self.y L = self.L theta = self.hyp[:-1] psi = self.kernel(X_star, X, theta) alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) pred_u_star = np.matmul(psi,alpha) beta = np.linalg.solve(np.transpose(L), np.linalg.solve(L,psi.T)) var_u_star = self.kernel(X_star, X_star, theta) - np.matmul(psi,beta) var_u_star = np.abs(np.diag(var_u_star))[:,None] # Expected Improvement best = np.min(y) Z = (best - pred_u_star)/var_u_star EI_acq = (best - pred_u_star)*norm.cdf(Z) + var_u_star*norm.pdf(Z) return EI_acq
def set_lnpdf(model="baseball", dset="boston"): if model == "baseball": return lambda x: np.squeeze(baseball.lnpdf_flat(x, 0) ), baseball.D, model if model == "frisk": lnpdf, unpack, num_params, frisk_df, param_names = \ frisk.make_model_funs(crime=2., precinct_type=1) return lnpdf, num_params, model if model == "normal": D, r = 10, 2 mu0 = np.zeros(D) C_true = np.random.randn(D, r) * 2. v_true = np.random.randn(D) Sigma_true = np.dot(C_true, C_true.T) + np.diag(np.exp(v_true)) print Sigma_true lnpdf = lambda x: misc.make_fixed_cov_mvn_logpdf(Sigma_true)(x, mean=mu0) return lnpdf, D, model if model == "bnn": (Xtrain, Ytrain), (Xtest, Ytest) = \ uci.load_dataset(dset, split_seed=0) lnpdf, predict, loglike, parser, (std_X, ustd_X), (std_Y, ustd_Y) = \ nn.make_nn_regression_funs(Xtrain[:100], Ytrain[:100], layer_sizes=None, obs_variance=None) lnpdf_vec = lambda ths: np.array( [lnpdf(th) for th in np.atleast_2d(ths)]) return lnpdf_vec, parser.N, "-".join([model, dset])
def likelihood(self, hyp): X_L = self.X_L y_L = self.y_L X_H = self.X_H y_H = self.y_H y = np.vstack((y_L,y_H)) NL = y_L.shape[0] NH = y_H.shape[0] N = y.shape[0] rho = hyp[-3] sigma_n_L = np.exp(hyp[-2]) sigma_n_H = np.exp(hyp[-1] ) theta_L = hyp[self.idx_theta_L] theta_H = hyp[self.idx_theta_H] K_LL = self.kernel(X_L, X_L, theta_L) + np.eye(NL)*sigma_n_L K_LH = rho*self.kernel(X_L, X_H, theta_L) K_HH = rho**2 * self.kernel(X_H, X_H, theta_L) + \ self.kernel(X_H, X_H, theta_H) + np.eye(NH)*sigma_n_H K = np.vstack((np.hstack((K_LL,K_LH)), np.hstack((K_LH.T,K_HH)))) L = np.linalg.cholesky(K + np.eye(N)*self.jitter) self.L = L alpha = np.linalg.solve(np.transpose(L), np.linalg.solve(L,y)) NLML = 0.5*np.matmul(np.transpose(y),alpha) + \ np.sum(np.log(np.diag(L))) + 0.5*np.log(2.*np.pi)*N return NLML[0,0]
def mvt_logpdf(x, mu, Li, df): dim = Li.shape[0] Ki = np.dot(Li.T, Li) #determinant is just multiplication of diagonal elements of cholesky logdet = 2*log(1./np.diag(Li)).sum() lpdf_const = (gammaln((df + dim) / 2) -(gammaln(df/2) + (log(df)+log(np.pi)) * dim*0.5 + logdet * 0.5) ) x = np.atleast_2d(x) if x.shape[1] != mu.size: x = x.T assert(x.shape[1] == mu.size or x.shape[0] == mu.size) d = (x - mu.reshape((1 ,mu.size))).T Ki_d_scal = np.dot(Ki, d) /df #vector d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar res_pdf = (lpdf_const - 0.5 * (df + dim) * np.log(d_Ki_d_scal_1)).flatten() if res_pdf.size == 1: res_pdf = np.float(res_pdf) return res_pdf
def transition_matrix(self): if self._transition_matrix is not None: return self._transition_matrix As, rs, ps = self.Ps, self.rs, self.ps # Fill in the transition matrix one block at a time K_total = self.total_num_states P = np.zeros((K_total, K_total)) starts = np.concatenate(([0], np.cumsum(rs)[:-1])) ends = np.cumsum(rs) for (i, j), Aij in np.ndenumerate(As): block = P[starts[i]:ends[i], starts[j]:ends[j]] # Diagonal blocks (stay in sub-state or advance to next sub-state) if i == j: for k in range(rs[i]): # p(z_{t+1} = (.,i+k) | z_t = (.,i)) = (1-p)^k p # for 0 <= k <= r - i block += (1 - ps[i])**k * ps[i] * np.diag( np.ones(rs[i] - k), k=k) # Off-diagonal blocks (exit to a new super state) else: # p(z_{t+1} = (j,1) | z_t = (k,i)) = (1-p_k)^{r_k-i+1} * A[k, j] block[:, 0] = (1 - ps[i])**np.arange(rs[i], 0, -1) * Aij assert np.allclose(P.sum(1), 1) assert (0 <= P).all() and (P <= 1.).all() # Cache the transition matrix self._transition_matrix = P return P
def location_mixture_logpdf(samps, locations, location_weights, distr_at_origin, contr_var = False, variant = 1): # lpdfs = zeroprop.logpdf() diff = samps - locations[:, np.newaxis, :] lpdfs = distr_at_origin.logpdf(diff.reshape([np.prod(diff.shape[:2]), diff.shape[-1]])).reshape(diff.shape[:2]) logprop_weights = log(location_weights/location_weights.sum())[:, np.newaxis] if not contr_var: return logsumexp(lpdfs + logprop_weights, 0) #time_m1 = np.hstack([time0[:,:-1],time0[:,-1:]]) else: time0 = lpdfs + logprop_weights + log(len(location_weights)) if variant == 1: time1 = np.hstack([time0[:,1:],time0[:,:1]]) cov = np.mean(time0**2-time0*time1) var = np.mean((time0-time1)**2) lpdfs = lpdfs - cov/var * (time0-time1) return logsumexp(lpdfs - log(len(location_weights)), 0) elif variant == 2: cvar = (time0[:,:,np.newaxis] - np.dstack([np.hstack([time0[:, 1:], time0[:, :1]]), np.hstack([time0[:,-1:], time0[:,:-1]])])) ## self-covariance matrix of control variates K_cvar = np.diag(np.mean(cvar**2, (0, 1))) #add off diagonal K_cvar = K_cvar + (1.-np.eye(2)) * np.mean(cvar[:,:,0]*cvar[:,:,1]) ## covariance of control variates with random variable cov = np.mean(time0[:,:,np.newaxis] * cvar, 0).mean(0) optimal_comb = np.linalg.inv(K_cvar) @ cov lpdfs = lpdfs - cvar @ optimal_comb return logsumexp(lpdfs - log(len(location_weights)), 0)
def cost(X): mu = 0.132 global D2 global V1 global V2 global Cor1 global Cor2 global k_ coup = (np.linalg.norm(Cor1.T @ V1[:, 0:k_] - Cor2.T @ V2[:, 0:k_] @ X, 'fro'))**2 res = (X.T @ np.diag(D2[0:k_]) @ X)**2 diag_res = np.diagonal(res, offset=0, axis1=-1, axis2=-2) diag_res = np.sum(diag_res) sumres = np.sum(res) val = sumres - diag_res #val=np.linalg.norm(X.T @ diag2 @ X - diag2, 'fro') ** 2 #print(coup) res = val + mu * coup return res
def natural_predict_forward_temps(J, J11, J12, h): J, J11, J12 = -2*J, -2*J11, -J12 L = np.linalg.cholesky(J + J11) v = solve_triangular(L, h) lognorm = 1./2*np.dot(v,v) - np.sum(np.log(np.diag(L))) v2 = solve_triangular(L, v, trans='T') temp = solve_triangular(L, J12) return L, v, v2, temp, h, lognorm
def regularized_persudo_inverse_(self, mat, reg=1e-5): """ Use SVD to realize persudo inverse by perturbing the singularity values to ensure its positive-definite properties """ u, s, v = np.linalg.svd(mat) s[ s < 0 ] = 0.0 #truncate negative values... diag_s_inv = np.zeros((v.shape[0], u.shape[1])) diag_s_inv[0:len(s), 0:len(s)] = np.diag(1./(s+reg)) return v.dot(diag_s_inv).dot(u.T)
def my_spectral_clustering(sim_mat, n_clusters=2): N = sim_mat.shape[0] sim_mat = sim_mat - np.diag(np.diag(sim_mat)) t1 = 1./np.sqrt(np.sum(sim_mat, axis=1)) t2 = np.dot(t1.reshape(N,1), t1.reshape(1,N)) lap_mat = np.eye(N) - sim_mat * t2 eig_val, eig_vec = np.linalg.eig(lap_mat) idx = eig_val.argsort() eig_val = eig_val[idx] eig_vec = np.real(eig_vec[:, idx]) t3 = np.diag(np.sqrt(1./np.sum(eig_vec[:,0:n_clusters]**2, axis=1))) embd = np.dot(t3, eig_vec[:, 0:n_clusters]) clf = KMeans(n_clusters = n_clusters, n_jobs=-1) label_pred = clf.fit_predict(embd) return label_pred
def sample_invwishart(S, nu): n = S.shape[0] chol = np.linalg.cholesky(S) if (nu <= 81 + n) and (nu == np.round(nu)): x = npr.randn(nu, n) else: x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n))))) x[np.triu_indices_from(x, 1)] = npr.randn(n*(n-1)//2) R = np.linalg.qr(x, 'r') T = solve_triangular(R.T, chol.T, lower=True).T return np.dot(T, T.T)
def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) plt.cla() target_distribution = lambda x : np.exp(log_posterior(x, t)) plot_isocontours(ax, target_distribution) mean, log_std = unpack_params(params) variational_contour = lambda x: mvn.pdf(x, mean, np.diag(np.exp(2*log_std))) plot_isocontours(ax, variational_contour) plt.draw() plt.pause(1.0/30.0)
def expectation(params,y,X,eps,N,u): #for each sample of theta, calculate likelihood #likelihood has participants #for each participant, we have N particles #with L samples, n participants, N particles per participant and sample, we have #L*n*N particles #get the first column to be mu d = np.shape(X)[-1]+1 mu = params[0:d,0] toSigma = params[0:d,1:d+1] intSigma = toSigma-np.diag(np.diag(toSigma))+np.diag(np.exp(np.diag(toSigma))) Sigma = intSigma-np.tril(intSigma)+np.transpose(np.triu(intSigma)) print mu print Sigma n = X.shape[0] E = 0 #iterate over number of samples of theta for j in range(np.shape(eps)[0]): beta = mu+np.dot(Sigma,eps[j,:]) #this log likelihood will iterate over both the participants and the particles E+=log_likelihood(beta,y,X,u[j*(n*N):(j+1)*(n*N)]) return E/len(beta)
def condition_on(mu, sigma, A, y, sigma_obs): temp1 = np.dot(A, sigma) sigma_pred = np.dot(temp1, A.T) + sigma_obs L = np.linalg.cholesky(sigma_pred) v = solve_triangular(L, y - np.dot(A, mu)) ll = -1./2 * np.dot(v, v) - np.sum(np.log(np.diag(L))) \ - y.shape[0]/2.*np.log(2*np.pi) mu_cond = mu + np.dot(temp1.T, solve_triangular(L, v, 'T')) temp2 = solve_triangular(L, temp1) sigma_cond = sigma - np.dot(temp2.T, temp2) return (mu_cond, sigma_cond), ll
def unpack_params(params): """Unpacks parameter vector into the proportions, means and covariances of each mixture component. The covariance matrices are parametrized by their Cholesky decompositions.""" log_proportions = parser.get(params, 'log proportions') normalized_log_proportions = log_proportions - logsumexp(log_proportions) means = parser.get(params, 'means') lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1) diag_chols = np.exp( parser.get(params, 'log diagonals')) chols = [] for lower_tri, diag in zip(lower_tris, diag_chols): chols.append(np.expand_dims(lower_tri + np.diag(diag), 0)) chols = np.concatenate(chols, axis=0) return normalized_log_proportions, means, chols
def evaluate_prior(all_params): # clean up code so we don't compute matrices twice all_layer_params = unpack_all_params(all_params) log_prior = 0 deep_map = create_deep_map(all_params) for layer, layer_map in deep_map.iteritems(): for unit, gp_map in layer_map.iteritems(): cov_y_y = covariance_function(gp_map["cov_params"], gp_map["x0"], gp_map["x0"]) + gp_map[ "noise_scale" ] * np.eye(len(gp_map["y0"])) log_prior += mvn.logpdf( gp_map["y0"], np.ones(len(cov_y_y)) * gp_map["mean"], cov_y_y + np.diag(np.diag(cov_y_y)) * 0 ) # CHANGE ##log_prior += mvn.logpdf(gp_map['y0'],np.ones(len(cov_y_y))*gp_map['mean'],cov_y_y + np.eye(len(cov_y_y))*tuning_param) ###log_prior += mvn.logpdf(gp_map['y0'],np.ones(len(cov_y_y))*gp_map['mean'],np.diag(np.diag(cov_y_y))*10) return log_prior
def natural_predict(J, h, J11, J12, J22, logZ): # convert from natural parameter to the usual J definitions J, J11, J12, J22 = -2*J, -2*J11, -J12, -2*J22 L = np.linalg.cholesky(J + J11) v = solve_triangular(L, h) lognorm = 1./2*np.dot(v,v) - np.sum(np.log(np.diag(L))) h_predict = -np.dot(J12.T, solve_triangular(L, v, trans='T')) temp = solve_triangular(L, J12) J_predict = J22 - np.dot(temp.T, temp) assert np.all(np.linalg.eigvals(J_predict) > 0) return (-1./2*J_predict, h_predict), lognorm + logZ
def test_lognorm_grads(): npr.seed(0) n = 3 L = np.linalg.cholesky(rand_psd(n)) v = npr.randn(n) foo = lambda L, v: 1./2*np.dot(v,v) - np.sum(np.log(np.diag(L))) ans = foo(L, v) a = grad(foo, 0)(L, v) b = lognorm_grad_arg0(1., ans, L, v) check(a, b) a = grad(foo, 1)(L, v) b = lognorm_grad_arg1(1., ans, L, v) check(a, b)
def plot_gp(ax, X, y, pred_mean, pred_cov, plot_xs): ax.cla() marg_std = np.sqrt(np.diag(pred_cov)) ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. rs = npr.RandomState(0) sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10) ax.plot(plot_xs, sampled_funcs.T) ax.plot(X, y, 'kx') ax.set_ylim([-1.5, 1.5]) ax.set_xticks([]) ax.set_yticks([])
def plot_single_gp(ax, x0, y0, pred_mean, pred_cov, plot_xs): ax.cla() marg_std = np.sqrt(np.diag(pred_cov)) if n_samples > 1: ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. rs = npr.RandomState(0) sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov*(random), size=n_samples) ax.plot(plot_xs, sampled_funcs.T) ax.plot(x0, y0, 'ro') ax.set_xticks([]) ax.set_yticks([])
def __init__(self, mu, K, Ki = None, logdet_K = None, L = None): mu = np.atleast_1d(mu).flatten() K = np.atleast_2d(K) assert(np.prod(mu.shape) == K.shape[0] ) assert(K.shape[0] == K.shape[1]) self.mu = mu self.K = K (val, vec) = np.linalg.eigh(K) idx = np.arange(mu.size-1,-1,-1) (self.eigval, self.eigvec) = (np.diag(val[idx]), vec[:,idx]) self.eig = self.eigvec.dot(np.sqrt(self.eigval)) self.dim = K.shape[0] #(self.Ki, self.logdet) = (np.linalg.inv(K), np.linalg.slogdet(K)[1]) (self.Ki, self.L, self.Li, self.logdet) = pdinv(K) self.lpdf_const = -0.5 *np.float(self.dim * np.log(2 * np.pi) + self.logdet)
def loss(W_vect, X, T): log_prior = -L2_reg * np.dot(W_vect, W_vect) # compute distribution for each input params = predict_distribution(W_vect, X) means = params[:, :8] var_prior = - np.sum(params[:, -8:] * params[:, -8:]) variances = np.exp(params[:,-8:]) # axis aligned variances ll = 0. for i in xrange(T.shape[0]): ll = ll + np.sum( gmm_util.mog_loglike( T[i], means = means[i,:,None].T, icovs = np.array([ np.diag(1./variances[i]) ]), dets = np.array([1.]), pis = np.array([1.])) ) return - log_prior - ll - var_prior
def fit_gaussian_draw(X, J, seed=28, reg=1e-7, eig_pow=1.0): """ Fit a multivariate normal to the data X (n x d) and draw J points from the fit. - reg: regularizer to use with the covariance matrix - eig_pow: raise eigenvalues of the covariance matrix to this power to construct a new covariance matrix before drawing samples. Useful to shrink the spread of the variance. """ with NumpySeedContext(seed=seed): d = X.shape[1] mean_x = np.mean(X, 0) cov_x = np.cov(X.T) if d==1: cov_x = np.array([[cov_x]]) [evals, evecs] = np.linalg.eig(cov_x) evals = np.maximum(0, np.real(evals)) assert np.all(np.isfinite(evals)) evecs = np.real(evecs) shrunk_cov = evecs.dot(np.diag(evals**eig_pow)).dot(evecs.T) + reg*np.eye(d) V = np.random.multivariate_normal(mean_x, shrunk_cov, J) return V
def plot_deep_gp(ax, params, plot_xs): ax.cla() rs = npr.RandomState(0) sampled_means_and_covs = [sample_mean_cov_from_deep_gp(params, plot_xs) for i in xrange(n_samples)] sampled_means, sampled_covs = zip(*sampled_means_and_covs) avg_pred_mean = np.mean(sampled_means, axis = 0) avg_pred_cov = np.mean(sampled_covs, axis = 0) marg_std = np.sqrt(np.diag(avg_pred_cov)) if n_samples > 1: ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([avg_pred_mean - 1.96 * marg_std, (avg_pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') ax.plot(plot_xs, avg_pred_mean, 'b') sampled_funcs = np.array([rs.multivariate_normal(mean, cov*(random)) for mean,cov in sampled_means_and_covs]) ax.plot(plot_xs,sampled_funcs.T) ax.plot(X, y, 'kx') #ax.set_ylim([-1.5,1.5]) ax.set_xticks([]) ax.set_yticks([]) ax.set_title("Full Deep GP, inputs to outputs")
def callback(params,t,g): paramis = [0, - 6.32795237, - 0.69221531, - 0.24707744] print("Log likelihood {}".format(-objective(params,t))) plt.cla() # Show posterior marginals. plot_xs = np.reshape(np.linspace(-7, 7, 300), (300, 1)) mu, log_std = params[:D], params[D:2*D] pred_mean, pred_cov = predict(paramis, pseudo, mu, plot_xs) marg_std = np.sqrt(np.diag(pred_cov)) ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') plt.pause(1.0/30.0) ax.plot(X, y, 'kx') ax.set_ylim([-1.5, 1.5]) ax.set_xticks([]) ax.set_yticks([]) plt.draw() plt.pause(1.0 / 60.0) print("Iteration {} lower bound {}".format(t, -objective(params, t)))
def plot_train_fits(W, axarr): params = pred_fun(W, train_images) means = params[:, :8] variances = np.exp(params[:,-8:]) # axis aligned variances # plot 5 random data points and 4 random marginals idx = np.sort(np.random.permutation(train_images.shape[0])[:axarr.shape[0]]) dims = np.sort(np.random.permutation(samps.shape[-1])[:axarr.shape[1]]) for r, i in enumerate(idx): for c, d in enumerate(dims): axarr[r, c].cla() n, bins, patches = axarr[r, c].hist(samps[i, :, d], bins=20, normed=True) axarr[r, c].plot( [means[i, d], means[i, d]], [0, n.max()] ) thgrid = np.linspace(min(bins[0], means[i,d]), max(bins[-1], means[i,d]), 50) axarr[r, c].plot(thgrid, np.exp(gu.mog_logmarglike(thgrid, means = np.array([ means[i] ]), covs = np.array([ np.diag(variances[i]) ]), pis = np.array([1.]), ind = d))) axarr[r, c].set_title("Idx = %d, dim = %d"%(i, d)) plt.draw()
def fun(x): return to_scalar(np.diag(x)) d_fun = lambda x : to_scalar(grad(fun)(x))