def test_inv_3d(): fun = lambda x: np.linalg.inv(x) D = 4 mat = npr.randn(D, D, D) + 5*np.eye(D) check_grads(fun)(mat) mat = npr.randn(D, D, D, D) + 5*np.eye(D) check_grads(fun)(mat)
def test_rts_backward_step_grad(): npr.seed(0) n = 5 Jns = rand_psd(n) + 10*np.eye(n) hns = npr.randn(n) mun = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = (rand_psd(n) + 10*np.eye(n)) hf = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = bigJ[:n,:n], bigJ[:n,n:], bigJ[n:,n:] next_smooth = Jns, hns, mun next_pred = Jnp, hnp filtered = Jf, hf pair_param = J11, J12, J22, 0. dotter = g_Js, g_hs, (g_Ex, g_ExxT, g_ExnxT) = \ npr.randn(n,n), npr.randn(n), (npr.randn(n), npr.randn(n,n), npr.randn(n,n)) # this function wraps natural_rts_backward_step to take care of factors of 2 def fun(next_smooth, next_pred, filtered, pair_param): (Jns, hns, mun), (Jnp, hnp), (Jf, hf) = next_smooth, next_pred, filtered next_smooth, next_pred, filtered = (-1./2*Jns, hns, mun), (-1./2*Jnp, hnp), (-1./2*Jf, hf) J11, J12, J22, logZ_pair = pair_param pair_param = -1./2*J11, -J12, -1./2*J22, logZ_pair neghalfJs, hs, (Ex, ExxT, ExnxT) = natural_rts_backward_step( next_smooth, next_pred, filtered, pair_param) Js = -2*neghalfJs return Js, hs, (Ex, ExxT, ExnxT) # ans Js, hs, (Ex, ExxT, ExnxT) = fun(next_smooth, next_pred, filtered, pair_param) def gfun(next_smooth, next_pred, filtered): vals = fun(next_smooth, next_pred, filtered, pair_param) assert shape(vals) == shape(dotter) return contract(dotter, vals) g1 = grad(lambda x: gfun(*x))((next_smooth, next_pred, filtered)) g2 = rts_backward_step_grad( g_Js, g_hs, g_Ex, g_ExxT, g_ExnxT, next_smooth, next_pred, filtered, pair_param, Js, hs, (Ex, ExxT, ExnxT)) assert allclose(g1, g2)
def exp(r): """ matrix exponential under the special orthogonal group SO(3) -> converts Rodrigues 3-vector r into 3x3 rotation matrix R""" theta = np.linalg.norm(r) if (theta == 0): return np.eye(3) K = hat(r / theta) # Compute w/ Rodrigues' formula return np.eye(3) + np.sin(theta) * K + \ (1 - np.cos(theta)) * np.dot(K, K)
def make_prior_natparam(n, random=False, scaling=1.): if random: raise NotImplementedError nu, S, mu, kappa = n+1., 2.*scaling*(n+1)*np.eye(n), np.zeros(n), 1./(2.*scaling*n) # M, K = np.zeros((n,n)), 1./(2.*scaling*n)*np.eye(n) M, K = np.eye(n), 1./(2.*scaling*n)*np.eye(n) init_state_prior_natparam = niw.standard_to_natural(nu, S, mu, kappa) dynamics_prior_natparam = mniw.standard_to_natural(nu, S, M, K) return init_state_prior_natparam, dynamics_prior_natparam
def conditional(x, y, xstar): #Assume zero mean prior """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise). -assumed prior mean is zero mean u is the observed""" cov_f_f = RBF(xstar, xstar) cov_y_f = RBF(x, xstar) cov_y_y = RBF(x, x) + (noise_scale+tol) * np.eye(len(y)) pred_mean = np.dot(solve(cov_y_y, cov_y_f).T, y) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f)+tol*np.eye(len(xstar)) return pred_mean, pred_cov
def evaluate_prior(all_params): # clean up code so we don't compute matrices twice all_layer_params = unpack_all_params(all_params) log_prior = 0 for layer in xrange(n_layers): layer_params = all_layer_params[layer] layer_gp_params = unpack_layer_params[layer](layer_params) for dim in xrange(dimensions[layer+1]): gp_params = layer_gp_params[dim] mean, cov_params, noise_scale, x0, y0 = unpack_gp_params_all[layer][dim](gp_params) cov_y_y = covariance_function(cov_params,x0,x0) + noise_scale * np.eye(len(y0)) log_prior += mvn.logpdf(y0,np.ones(len(cov_y_y))*mean,cov_y_y+np.eye(len(cov_y_y))*10) return log_prior
def test_inv_3d(): fun = lambda x: to_scalar(np.linalg.inv(x)) d_fun = lambda x : to_scalar(grad(fun)(x)) D = 4 mat = npr.randn(D, D, D) + 5*np.eye(D) check_grads(fun, mat) check_grads(d_fun, mat) mat = npr.randn(D, D, D, D) + 5*np.eye(D) check_grads(fun, mat) check_grads(d_fun, mat)
def get_KMM_ineq_constraints(num_train, B_max, eps): G_gt_0 = -np.eye(num_train) h_gt_0 = np.zeros(num_train) G_lt_B_max = np.eye(num_train) h_lt_B_max = np.ones(num_train) * B_max G_B_sum_lt = np.ones(num_train, dtype=float) h_B_sum_lt = (1+eps) * float(num_train) * np.ones(1) G_B_sum_gt = -np.ones(num_train, dtype=float) h_B_sum_gt = -(1-eps) * float(num_train) * np.ones(1) G = np.vstack((G_gt_0,G_lt_B_max,G_B_sum_lt,G_B_sum_gt)) (h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt) h = np.hstack((h_gt_0,h_lt_B_max,h_B_sum_lt,h_B_sum_gt)) return G,h
def loss(weights): mu1 = parser.get(weights, 'mu1') mu2 = parser.get(weights, 'mu2') sig1 = parser.get(weights, 'sig1')*np.eye(mu1.size) sig2 = parser.get(weights, 'sig2')*np.eye(mu1.size) return 0.5*( \ np.log(np.linalg.det(sig2) / np.linalg.det(sig1)) \ - mu1.size \ + np.trace(np.dot(np.linalg.inv(sig2),sig1)) \ #+ np.dot(np.dot(np.transpose(mu2 - mu1), np.linalg.inv(sig2)), mu2 - mu1 ) + np.dot(np.dot(mu2 - mu1, np.linalg.inv(sig2)), np.transpose(mu2 - mu1 )) )
def expectedstats_standard(nu, S, M, K, fudge=1e-8): m = M.shape[0] E_Sigmainv = nu*symmetrize(np.linalg.inv(S)) + fudge*np.eye(S.shape[0]) E_Sigmainv_A = nu*np.linalg.solve(S, M) E_AT_Sigmainv_A = m*K + nu*symmetrize(np.dot(M.T, np.linalg.solve(S, M))) \ + fudge*np.eye(K.shape[0]) E_logdetSigmainv = digamma((nu-np.arange(m))/2.).sum() \ + m*np.log(2) - np.linalg.slogdet(S)[1] assert is_posdef(E_Sigmainv) assert is_posdef(E_AT_Sigmainv_A) return make_tuple( -1./2*E_AT_Sigmainv_A, E_Sigmainv_A.T, -1./2*E_Sigmainv, 1./2*E_logdetSigmainv)
def predict(params, xstar, with_noise = False, FITC = False): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale, x0, y0 = unpack_gp_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x0, xstar) cov_y_y = cov_func(cov_params, x0, x0) + noise_scale * np.eye(len(y0)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y0 - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) if FITC: pred_cov = np.diag(np.diag(pred_cov)) if with_noise: pred_cov = pred_cov + noise_scale*np.eye(len(xstar)) return pred_mean, pred_cov
def test_rts_backward_step(): npr.seed(0) n = 3 Jns = rand_psd(n) hns = npr.randn(n) mun = npr.randn(n) Jnp = rand_psd(n) hnp = npr.randn(n) Jf = rand_psd(n) + 10*np.eye(n) hf = npr.randn(n) bigJ = rand_psd(2*n) J11, J12, J22 = -1./2*bigJ[:n,:n], -bigJ[:n,n:], -1./2*bigJ[n:,n:] next_smooth = -1./2*Jns, hns, mun next_pred = -1./2*Jnp, hnp filtered = -1./2*Jf, hf pair_param = J11, J12, J22, 0. Js1, hs1, (mu1, ExxT1, ExxnT1) = natural_rts_backward_step( next_smooth, next_pred, filtered, pair_param) Js2, hs2, (mu2, ExxT2, ExnxT2) = rts_backward_step( next_smooth, next_pred, filtered, pair_param) assert np.allclose(Js1, Js2) assert np.allclose(hs1, hs2) assert np.allclose(mu1, mu2) assert np.allclose(ExxT1, ExxT2) assert np.allclose(ExxnT1, ExnxT2)
def _hmc_log_probability(self, L, b, A, W): """ Compute the log probability as a function of L. This allows us to take the gradients wrt L using autograd. :param L: :param A: :return: """ assert self.B == 1 import autograd.numpy as anp # Compute pairwise distance L1 = anp.reshape(L,(self.N,1,self.dim)) L2 = anp.reshape(L,(1,self.N,self.dim)) # Mu = a * anp.sqrt(anp.sum((L1-L2)**2, axis=2)) + b Mu = -anp.sum((L1-L2)**2, axis=2) + b Aoff = A * (1-anp.eye(self.N)) X = (W - Mu[:,:,None]) * Aoff[:,:,None] # Get the covariance and precision Sig = self.cov.sigma[0,0] Lmb = 1./Sig lp = anp.sum(-0.5 * X**2 * Lmb) # Log prior of L under spherical Gaussian prior lp += -0.5 * anp.sum(L * L / self.eta) # Log prior of mu0 under standardGaussian prior lp += -0.5 * b ** 2 return lp
def tmp_cost_func_dxx(x, u, t, aux): hessian = np.zeros((2*self.n_dims_, 2*self.n_dims_)) hessian[0:self.n_dims_, 0:self.n_dims_] = 2 * self.weight_array[t] if t > self.T_-1: hessian[self.n_dims_:, self.n_dims_:] = 2 * np.eye(self.n_dims_) * self.R_ * self.Q_vel_ratio_ return hessian
def PyLQR_TrajCtrl_TrackingTest(): n_pnts = 200 x_coord = np.linspace(0.0, 2*np.pi, n_pnts) y_coord = np.sin(x_coord) #concatenate to have trajectory ref_traj = np.array([x_coord, y_coord]).T weight_mats = [ np.eye(ref_traj.shape[1])*100 ] #draw reference trajectory fig = plt.figure() ax = fig.add_subplot(111) ax.hold(True) ax.plot(ref_traj[:, 0], ref_traj[:, 1], '.-k', linewidth=3.5) ax.plot([ref_traj[0, 0]], [ref_traj[0, 1]], '*k', markersize=16) lqr_traj_ctrl = PyLQR_TrajCtrl(use_autograd=True) lqr_traj_ctrl.build_ilqr_tracking_solver(ref_traj, weight_mats) n_queries = 5 for i in range(n_queries): #start from a perturbed point x0 = ref_traj[0, :] + np.random.rand(2) * 2 - 1 syn_traj = lqr_traj_ctrl.synthesize_trajectory(x0) #plot it ax.plot(syn_traj[:, 0], syn_traj[:, 1], linewidth=3.5) plt.show() return
def location_mixture_logpdf(samps, locations, location_weights, distr_at_origin, contr_var = False, variant = 1): # lpdfs = zeroprop.logpdf() diff = samps - locations[:, np.newaxis, :] lpdfs = distr_at_origin.logpdf(diff.reshape([np.prod(diff.shape[:2]), diff.shape[-1]])).reshape(diff.shape[:2]) logprop_weights = log(location_weights/location_weights.sum())[:, np.newaxis] if not contr_var: return logsumexp(lpdfs + logprop_weights, 0) #time_m1 = np.hstack([time0[:,:-1],time0[:,-1:]]) else: time0 = lpdfs + logprop_weights + log(len(location_weights)) if variant == 1: time1 = np.hstack([time0[:,1:],time0[:,:1]]) cov = np.mean(time0**2-time0*time1) var = np.mean((time0-time1)**2) lpdfs = lpdfs - cov/var * (time0-time1) return logsumexp(lpdfs - log(len(location_weights)), 0) elif variant == 2: cvar = (time0[:,:,np.newaxis] - np.dstack([np.hstack([time0[:, 1:], time0[:, :1]]), np.hstack([time0[:,-1:], time0[:,:-1]])])) ## self-covariance matrix of control variates K_cvar = np.diag(np.mean(cvar**2, (0, 1))) #add off diagonal K_cvar = K_cvar + (1.-np.eye(2)) * np.mean(cvar[:,:,0]*cvar[:,:,1]) ## covariance of control variates with random variable cov = np.mean(time0[:,:,np.newaxis] * cvar, 0).mean(0) optimal_comb = np.linalg.inv(K_cvar) @ cov lpdfs = lpdfs - cvar @ optimal_comb return logsumexp(lpdfs - log(len(location_weights)), 0)
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) m = self.m = 10 n = self.n = 15 Y = self.Y = rnd.randn(m, n) A = self.A = rnd.randn(m, n) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian tensor H (4th order) Y1 = Y.reshape(m, n, 1, 1) Y2 = Y.reshape(1, 1, m, n) # Create an m x n x m x n array with diag[i,j,k,l] == 1 iff # (i == k and j == l), this is a 'diagonal' tensor. diag = np.eye(m * n).reshape(m, n, m, n) H = np.exp(np.sum(Y ** 2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = A.reshape(1, 1, m, n) self.correct_hess = np.sum(H * Atensor, axis=(2, 3)) self.backend = AutogradBackend()
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) n = self.n = 15 Y = self.Y = rnd.randn(1, n) A = self.A = rnd.randn(1, n) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian matrix H # Convert Y and A into matrices (row vectors) Ymat = np.matrix(Y) Amat = np.matrix(A) diag = np.eye(n) H = np.exp(np.sum(Y ** 2)) * (4 * Ymat.T.dot(Ymat) + 2 * diag) # Then 'left multiply' H by A self.correct_hess = np.array(Amat.dot(H)) self.backend = AutogradBackend()
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) n1 = self.n1 = 3 n2 = self.n2 = 4 n3 = self.n3 = 5 Y = self.Y = rnd.randn(n1, n2, n3) A = self.A = rnd.randn(n1, n2, n3) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian tensor H (6th order) Y1 = Y.reshape(n1, n2, n3, 1, 1, 1) Y2 = Y.reshape(1, 1, 1, n1, n2, n3) # Create an n1 x n2 x n3 x n1 x n2 x n3 diagonal tensor diag = np.eye(n1 * n2 * n3).reshape(n1, n2, n3, n1, n2, n3) H = np.exp(np.sum(Y ** 2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = A.reshape(1, 1, 1, n1, n2, n3) self.correct_hess = np.sum(H * Atensor, axis=(3, 4, 5)) self.backend = AutogradBackend()
def log_marginal_likelihood(params, data): cluster_lls = [] for log_proportion, mean, chol in zip(*unpack_params(params)): cov = np.dot(chol.T, chol) + 0.000001 * np.eye(D) cluster_log_likelihood = log_proportion + mvn.logpdf(data, mean, cov) cluster_lls.append(np.expand_dims(cluster_log_likelihood, axis=0)) cluster_lls = np.concatenate(cluster_lls, axis=0) return np.sum(logsumexp(cluster_lls, axis=0))
def test_eigvalh_upper_broadcasting(): def fun(x): w, v = np.linalg.eigh(x, 'U') return tuple((w, v)) D = 6 mat = npr.randn(2, 3, D, D) + 10 * np.eye(D)[None,None,...] hmat = broadcast_dot_transpose(mat, mat) check_symmetric_matrix_grads(fun)(hmat)
def test_solve_arg2(): D = 6 A = npr.randn(D, D) + 1.0 * np.eye(D) B = npr.randn(D, D - 1) def fun(b): return to_scalar(np.linalg.solve(A, b)) d_fun = lambda x : to_scalar(grad(fun)(x)) check_grads(fun, B) check_grads(d_fun, B)
def test_solve_arg1_1d(): D = 8 A = npr.randn(D, D) + 10.0 * np.eye(D) B = npr.randn(D) def fun(a): return to_scalar(np.linalg.solve(a, B)) d_fun = lambda x : to_scalar(grad(fun)(x)) check_grads(fun, A) check_grads(d_fun, A)
def test_inv(): def fun(x): return to_scalar(np.linalg.inv(x)) d_fun = lambda x : to_scalar(grad(fun)(x)) D = 8 mat = npr.randn(D, D) mat = np.dot(mat, mat) + 1.0 * np.eye(D) check_grads(fun, mat) check_grads(d_fun, mat)
def test_solve_arg1_3d_3d(): D = 4 A = npr.randn(D+1, D, D) + 5*np.eye(D) B = npr.randn(D+1, D, D+2) fun = lambda A: to_scalar(np.linalg.solve(A, B)) d_fun = lambda A: to_scalar(grad(fun)(A)) check_grads(fun, A) check_grads(d_fun, A)
def evaluate_prior(all_params): # clean up code so we don't compute matrices twice layer_params, x0, y0 = unpack_all_params(all_params) log_prior = 0 for layer in xrange(n_layers): #import pdb; pdb.set_trace() mean, cov_params, noise_scale = unpack_kernel_params(layer_params[layer]) cov_y_y = covariance_function(cov_params, x0[layer], x0[layer]) + noise_scale * np.eye(len(y0[layer])) log_prior += mvn.logpdf(y0[layer],np.ones(len(cov_y_y))*mean,cov_y_y+np.eye(len(cov_y_y))*10) return log_prior
def test_solve_triangular_arg2_2d(): D = 6 A = npr.randn(D, D) + 10*np.eye(D) trans_options = ['T', 'N', 'C', 0, 1, 2] lower_options = [True, False] for trans, lower in itertools.product(trans_options, lower_options): def fun(B): return to_scalar(spla.solve_triangular(A, B, trans=trans, lower=lower)) yield check_grads, fun, npr.randn(D, D-1)
def sample_from_mvn(mu, sigma,rs = npr.RandomState(0),FITC = False): if FITC: #if not np.allclose(sigma, np.diag(np.diag(sigma))): # print("NOT DIAGONAL") # return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu return np.dot(np.sqrt(sigma+1e-6),rs.randn(len(sigma)))+mu if random == 1 else mu #return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu else: return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
def predict(params, x, y, xstar): """Returns the predictive mean and covariance at locations xstar, of the latent function value f (without observation noise).""" mean, cov_params, noise_scale = unpack_kernel_params(params) cov_f_f = cov_func(cov_params, xstar, xstar) cov_y_f = cov_func(cov_params, x, xstar) cov_y_y = cov_func(cov_params, x, x) + noise_scale * np.eye(len(y)) pred_mean = mean + np.dot(solve(cov_y_y, cov_y_f).T, y - mean) pred_cov = cov_f_f - np.dot(solve(cov_y_y, cov_y_f).T, cov_y_f) return pred_mean, pred_cov
def test_eigvalh_upper_broadcasting(): def fun(x): w, v = np.linalg.eigh(x, 'U') return to_scalar(w) + to_scalar(v) d_fun = lambda x : to_scalar(grad(fun)(x)) D = 6 mat = npr.randn(2, 3, D, D) + 10 * np.eye(D)[None,None,...] hmat = broadcast_dot_transpose(mat, mat) check_symmetric_matrix_grads(fun, hmat) check_symmetric_matrix_grads(d_fun, hmat)
def svrg(fun, x0, fun_and_jac, pieces, stepsize, iter_per_epoch, max_epochs=100, bounds=None, callback=None, rgen=np.random, quasinewton=True, init_epoch_svrg=False, xtol=1e-6): x0 = np.array(x0) if quasinewton is not True and quasinewton is not False: init_H = quasinewton quasinewton = True else: init_H = None if callback is None: callback = lambda *a, **kw: None if bounds is None: bounds = [(None, None) for _ in x0] lower, upper = zip(*bounds) lower = [-float('inf') if l is None else l for l in lower] upper = [float('inf') if u is None else u for u in upper] def truncate(x): return np.maximum(np.minimum(x, upper), lower) def update_Hess(H, new_x, prev_x, new_g, prev_g): if np.allclose(new_x, prev_x): return H s = new_x - prev_x y = new_g - prev_g sy = np.dot(s, y) Bs = np.linalg.solve(H, s) y_Bs = y - Bs if np.abs(np.dot( s, y_Bs)) < 1e-8 * np.linalg.norm(s) * np.linalg.norm(y_Bs): # skip SR1 update return H Hy = np.dot(H, y) s_Hy = s - Hy H = H + np.outer(s_Hy, s_Hy) / np.dot(s_Hy, y) return H I = np.eye(len(x0)) finished = False x = x0 nit = 0 history = {k: [] for k in ('x', 'f', 'jac')} for epoch in itertools.count(): if epoch > 0: prev_w = w prev_gbar = gbar else: prev_w, prev_gbar = None, None w = x if epoch > 0 or init_epoch_svrg is True: fbar, gbar = fun_and_jac(w, None) logger.info("SVRG pivot, {0}".format({ "w": list(w), "fbar": fbar, "gbar": list(gbar) })) #callback(w, fbar, epoch) for k, v in (('x', w), ('f', fbar), ('jac', gbar)): history[k].append(v) elif init_epoch_svrg is False: gbar = None else: fbar, gbar = init_epoch_svrg if quasinewton: if prev_gbar is not None: H = update_Hess(H, w, prev_w, gbar, prev_gbar) else: assert epoch == 0 or (epoch == 1 and not init_epoch_svrg) if init_H is not None: H = init_H else: f_w, g_w = fun_and_jac(w, 0) if epoch == 0: u = truncate(w - g_w) else: u = prev_w f_u, g_u = fun_and_jac(u, 0) s, y = (u - w), (g_u - g_w) H = np.abs(np.dot(s, y) / np.dot(y, y)) * I H_eigvals, H_eigvecs = scipy.linalg.eigh(H) Habs = np.einsum("k,ik,jk->ij", np.abs(H_eigvals), H_eigvecs, H_eigvecs) Babs = scipy.linalg.pinvh(Habs) if epoch > 0 and xtol >= 0 and np.allclose(w, prev_w, xtol, xtol): success = True message = "|x[k]-x[k-1]|~=0" break if epoch >= max_epochs: success = False message = "Maximum number of iterations reached" break for k in range(iter_per_epoch): i = rgen.randint(pieces) f_x, g_x = fun_and_jac(x, i) if gbar is not None: f_w, g_w = fun_and_jac(w, i) g = (g_x - g_w) + gbar f = (f_x - f_w) + fbar else: assert epoch == 0 and init_epoch_svrg is False g = g_x f = f_x callback(x, f, nit) if not quasinewton: xnext = truncate(x - stepsize * g) else: xnext = x - stepsize * np.dot(Habs, g) if not np.allclose(xnext, truncate(xnext)): model_fun = lambda y: np.dot( g, y - x) + .5 / stepsize * np.dot( y - x, np.dot(Babs, y - x)) model_grad = autograd.grad(model_fun) xnext = scipy.optimize.minimize(model_fun, x, jac=model_grad, bounds=bounds).x xnext = truncate(xnext) x = xnext nit += 1 history = {k: np.array(v) for k, v in history.items()} res = scipy.optimize.OptimizeResult({ 'success': success, 'message': message, 'nit': nit, 'nepoch': epoch, 'history': history, 'x': x, 'fun': fbar, 'jac': gbar }) if quasinewton: res['hess_inv'] = H return res
def fit_linear_regression(Xs, ys, weights=None, mu0=0, sigmasq0=1, nu0=1, Psi0=1, fit_intercept=True): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.zeros((P, D)) sigmasq0 = sigmasq0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigmasq0 = block_diag(sigmasq0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigmasq0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + P + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
#print(loss_all(Ys, output.x.reshape(100,D), Us,b)) # Reminder: check the output of minimize. see that it terminates #output.status == 0 # %% # test with num user (M) = 1 y = [ list(range(25)), list(range(25, 50)), list(range(50, 75)), list(range(75, 100)) ] Y = get_Y(y, 100) Ys = [Y] Us = [np.eye(2)] b = 0 # -- at each corner of X axis X_correct = np.concatenate(( np.concatenate((np.random.uniform(1 - 0.01, 1 + 0.01, (25, 1)), np.random.uniform(0 - 0.01, 0 + 0.01, (25, 1))), axis=1), np.concatenate((np.random.uniform(0 - 0.01, 0 + 0.01, (25, 1)), np.random.uniform(1 - 0.01, 1 + 0.01, (25, 1))), axis=1), np.concatenate((np.random.uniform(-1 - 0.01, -1 + 0.01, (25, 1)), np.random.uniform(0 - 0.01, 0 + 0.01, (25, 1))), axis=1), np.concatenate((np.random.uniform(0 - 0.01, 0 + 0.01, (25, 1)), np.random.uniform(-1 - 0.01, -1 + 0.01, (25, 1))), axis=1),
"forest green", "pastel purple", "salmon", "dark brown"] colors = sns.xkcd_palette(color_names) from ssm import HMM from ssm.util import find_permutation, one_hot T = 2000 # number of time bins D = 25 # number of observed neurons K = 5 # number of states # Make an LDS with somewhat interesting dynamics parameters true_hmm = HMM(K, D, observations="poisson") P = np.eye(K) + 0.2 * np.diag(np.ones(K-1), k=1) + 1e-5 * np.ones((K,K)) P[-1,0] = 0.2 true_hmm.transitions.log_Ps = np.log(P) log_lambdas = np.log(0.01 * np.ones((K, D))) for k in range(K): log_lambdas[k,k*K:(k+1)*K] = np.log(0.2) true_hmm.observations.log_lambdas = log_lambdas z, y = true_hmm.sample(T) z_test, y_test = true_hmm.sample(T) T_plot=500 plt.figure() plt.subplot(211) plt.imshow(z[None,:], aspect="auto")
def setUp(self): np.seterr(all='raise') def f(x): return (np.exp(np.sum(x[0]**2)) + np.exp(np.sum(x[1]**2)) + np.exp(np.sum(x[2]**2))) self.cost = f n1 = self.n1 = 3 n2 = self.n2 = 4 n3 = self.n3 = 5 n4 = self.n4 = 6 n5 = self.n5 = 7 n6 = self.n6 = 8 self.y = y = (rnd.randn(n1), rnd.randn(n2, n3), rnd.randn(n4, n5, n6)) self.a = a = (rnd.randn(n1), rnd.randn(n2, n3), rnd.randn(n4, n5, n6)) self.correct_cost = f(y) # CALCULATE CORRECT GRAD g1 = 2 * y[0] * np.exp(np.sum(y[0]**2)) g2 = 2 * y[1] * np.exp(np.sum(y[1]**2)) g3 = 2 * y[2] * np.exp(np.sum(y[2]**2)) self.correct_grad = (g1, g2, g3) # CALCULATE CORRECT HESS # 1. VECTOR Ymat = np.matrix(y[0]) Amat = np.matrix(a[0]) diag = np.eye(n1) H = np.exp(np.sum(y[0]**2)) * (4 * Ymat.T.dot(Ymat) + 2 * diag) # Then 'left multiply' H by A h1 = np.array(Amat.dot(H)).flatten() # 2. MATRIX # First form hessian tensor H (4th order) Y1 = y[1].reshape(n2, n3, 1, 1) Y2 = y[1].reshape(1, 1, n2, n3) # Create an m x n x m x n array with diag[i,j,k,l] == 1 iff # (i == k and j == l), this is a 'diagonal' tensor. diag = np.eye(n2 * n3).reshape(n2, n3, n2, n3) H = np.exp(np.sum(y[1]**2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = a[1].reshape(1, 1, n2, n3) h2 = np.sum(H * Atensor, axis=(2, 3)) # 3. Tensor3 # First form hessian tensor H (6th order) Y1 = y[2].reshape(n4, n5, n6, 1, 1, 1) Y2 = y[2].reshape(1, 1, 1, n4, n5, n6) # Create an n1 x n2 x n3 x n1 x n2 x n3 diagonal tensor diag = np.eye(n4 * n5 * n6).reshape(n4, n5, n6, n4, n5, n6) H = np.exp(np.sum(y[2]**2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = a[2].reshape(1, 1, 1, n4, n5, n6) h3 = np.sum(H * Atensor, axis=(3, 4, 5)) self.correct_hess = (h1, h2, h3) self.backend = AutogradBackend()
def init_param(self): init_param_dict = dict(mu=np.zeros(self.dim), Sigma=10 * np.eye(self.dim)) return self._pattern.flatten(init_param_dict)
import os import autograd.numpy as np from autograd import value_and_grad from scipy.optimize import minimize from util import get_median_inter_mnist, Kernel, load_data, ROOT_PATH, _sqdist, nystrom_decomp, remove_outliers, nystrom_decomp, chol_inv from scipy.sparse import csr_matrix import random import time Nfeval = 1 seed = 527 np.random.seed(seed) random.seed(seed) JITTER = 1e-7 nystr_M = 300 EYE_nystr = np.eye(nystr_M) __sparse_fmt = csr_matrix opt_params = None prev_norm = None opt_test_err = None def experiment(sname, seed, nystr=True): def LMO_err(params, M=2, verbal=False): global Nfeval params = np.exp(params) al, bl = params[:-1], params[ -1] # params[:int(n_params/2)], params[int(n_params/2):] # [np.exp(e) for e in params] if train.x.shape[1] < 5: train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN else:
# approximate posterior with mean-field gaussian variational_params = variational_inference(Sigma_W=Sigma_W, y_train=y_train, x_train=x_train, S=S, max_iteration=max_iteration, step_size=step_size, verbose=verbose) # sample from the variational posterior var_means = variational_params[:D] var_variance = np.diag(np.exp(variational_params[D:])**2) return var_variance, var_means ys = np.array([[1.]]) xs = np.array([[-20]]) N = 1 D = 1 Sigma_W = np.eye(D) print( variational_bernoulli_regression(Sigma_W=Sigma_W, x_train=xs, y_train=ys, S=4000, max_iteration=2000, step_size=1e-1, verbose=True))
def param_func(param, matrix): return param * anp.eye(matrix.shape[0])
def generate(cls, Ds=[2, 3], size=1): if (np.any(np.array(Ds) == 1)): assert 0, 'Can\'t have an empty dim' params = (np.zeros(Ds), [np.eye(D) for D in Ds]) samples = cls.sample(params=params, size=size) return samples if size > 1 else cls.unpackSingleSample(samples)
def init(self): # mu, sigma return np.array([5., 5.]), 1.e-4 * np.eye(2)
def train(self, InpsAndTargsFunc, algorithm, monitor_training=0, **kwargs): '''Use this method to train the RNN using one of several training algorithms! Inputs: InpsAndTargsFunc: This should be a FUNCTION that randomly produces a training input and a target function Those should have individual inputs/targets as columns and be the first two outputs of this function. Should also take a 'dt' argument. algorithm: Which training algorithm would you like to use? Options are: 'full-FORCE': as seen in DePasquale 2017 'grad': gradient-based training using autograd (adam optimizer) monitor_training: Collect useful statistics and show at the end **kwargs: use to pass things to the InpsAndTargsFunc function Outputs: Nothing explicitly, but the weights of self.rnn_par are optimized to map the inputs to the targets ''' kwargs['algorithm'] = algorithm if algorithm == 'full-FORCE': '''Use this to train the network according to the full-FORCE algorithm, described in DePasquale 2017 This function uses a recursive least-squares algorithm to optimize the network. Note that after each batch, the function shows an example output as well as recurrent unit activity. Parameters: In self.p, the parameters starting with ff_ control this function. *****NOTE***** The function InpsAndTargsFunc must have a third output of "hints" for training. If you don't want to use hints, replace with a vector of zeros (Nx1) ''' ####################### TEMPORARY ####################### TEMPORARY thetas = [] ####################### TEMPORARY ####################### TEMPORARY # First, initialize some parameters p = self.p self.initialize_act() N = p['network_size'] self.rnn_par['rec_weights'] = np.zeros((N, N)) self.rnn_par['out_weights'] = np.zeros( (self.rnn_par['out_weights'].shape)) # Need to initialize a target-generating network, used for computing error: # First, take some example inputs, targets, and hints to get the right shape D_inps_and_targs = InpsAndTargsFunc(dt=p['dt'], **kwargs) D_num_inputs = D_inps_and_targs['inps'].shape[1] D_num_targs = D_inps_and_targs['targs'].shape[1] D_num_hints = D_inps_and_targs['hints'].shape[1] D_num_total_inps = D_num_inputs + D_num_targs + D_num_hints # Then instantiate the network and pull out some relevant weights DRNN = RNN(hyperparameters=self.p, num_inputs=D_num_total_inps, num_outputs=1) w_targ = np.transpose( DRNN.rnn_par['inp_weights'][D_num_inputs:(D_num_inputs + D_num_targs), :]) w_hint = np.transpose( DRNN.rnn_par['inp_weights'][(D_num_inputs + D_num_targs):D_num_total_inps, :]) Jd = np.transpose(DRNN.rnn_par['rec_weights']) ################### Monitor training with these variables: J_err_ratio = [] J_err_mag = [] J_norm = [] w_err_ratio = [] w_err_mag = [] w_norm = [] ################### # Let the networks settle from the initial conditions print('Initializing', end="") for i in range(p['ff_init_trials']): print('.', end="") inps_and_targs = InpsAndTargsFunc(dt=p['dt'], **kwargs) inp = inps_and_targs['inps'] targ = inps_and_targs['targs'] hints = inps_and_targs['hints'] D_total_inp = np.hstack((inp, targ, hints)) DRNN.run(D_total_inp) self.run(inp) print('') # Now begin training print('Training network...') # Initialize the inverse correlation matrix P = np.eye(N) / p['ff_alpha'] for batch in range(p['ff_num_batches']): print( 'Batch %g of %g, %g trials: ' % (batch + 1, p['ff_num_batches'], p['ff_trials_per_batch']), end="") for trial in range(p['ff_trials_per_batch']): if np.mod(trial, 50) == 0: print('') print('.', end="") # Create input, target, and hints. Combine for the driven network inps_and_targs = InpsAndTargsFunc( dt=p['dt'], **kwargs) # Get relevant time series inp = inps_and_targs['inps'] targ = inps_and_targs['targs'] hints = inps_and_targs['hints'] D_total_inp = np.hstack((inp, targ, hints)) # For recording: dx = [] # Driven network activity x = [] # RNN activity z = [] # RNN output for t in range(len(inp)): # Run both RNNs forward and get the activity. Record activity for potential plotting dx_t = DRNN.run(D_total_inp[t:(t + 1), :], record_flag=1)[1][:, 0:5] z_t, x_t = self.run(inp[t:(t + 1), :], record_flag=1) dx.append(np.squeeze(np.tanh(dx_t) + np.arange(5) * 2)) z.append(np.squeeze(z_t)) x.append( np.squeeze( np.tanh(x_t[:, 0:5]) + np.arange(5) * 2)) if npr.rand() < (1 / p['ff_steps_per_update']): # Extract relevant values r = np.transpose(np.tanh(self.act)) rd = np.transpose(np.tanh(DRNN.act)) J = np.transpose(self.rnn_par['rec_weights']) w = np.transpose(self.rnn_par['out_weights']) # Compute errors J_err = (np.dot(J, r) - np.dot(Jd, rd) - np.dot(w_targ, targ[t:(t + 1), :].T) - np.dot(w_hint, hints[t:(t + 1), :].T)) w_err = np.dot(w, r) - targ[t:(t + 1), :].T # Compute the gain (k) and running estimate of the inverse correlation matrix Pr = np.dot(P, r) k = np.transpose(Pr) / ( 1 + np.dot(np.transpose(r), Pr)) P = P - np.dot(Pr, k) # Update weights w = w - np.dot(w_err, k) J = J - np.dot(J_err, k) self.rnn_par['rec_weights'] = np.transpose(J) self.rnn_par['out_weights'] = np.transpose(w) if monitor_training == 1: J_err_plus = ( np.dot(J, r) - np.dot(Jd, rd) - np.dot(w_targ, targ[t:(t + 1), :].T) - np.dot(w_hint, hints[t:(t + 1), :].T)) J_err_ratio = np.hstack( (J_err_ratio, np.squeeze(np.mean(J_err_plus / J_err)))) J_err_mag = np.hstack( (J_err_mag, np.squeeze(np.linalg.norm(J_err)))) J_norm = np.hstack( (J_norm, np.squeeze(np.linalg.norm(J)))) w_err_plus = np.dot(w, r) - targ[t:(t + 1), :].T w_err_ratio = np.hstack( (w_err_ratio, np.squeeze(w_err_plus / w_err))) w_err_mag = np.hstack( (w_err_mag, np.squeeze(np.linalg.norm(w_err)))) w_norm = np.hstack( (w_norm, np.squeeze(np.linalg.norm(w)))) ####################### TEMPORARY ####################### TEMPORARY thetas.append(inps_and_targs['theta']) ####################### TEMPORARY ####################### TEMPORARY ########## Batch callback print('') # New line after each batch # Convert lists to arrays dx = np.array(dx) x = np.array(x) z = np.array(z) if batch == 0: # Set up plots training_fig = plt.figure() ax_unit = training_fig.add_subplot(2, 1, 1) ax_out = training_fig.add_subplot(2, 1, 2) tvec = np.expand_dims(np.arange(0, len(inp)) * p['dt'], axis=1) # Create output and target lines lines_targ_out = plt.Line2D(np.repeat(tvec, targ.shape[1], axis=1).T, targ.T, linestyle='--', color='r') lines_out = plt.Line2D(np.repeat(tvec, targ.shape[1], axis=1).T, z.T, color='b') ax_out.add_line(lines_targ_out) ax_out.add_line(lines_out) # Create recurrent unit and DRNN target lines lines_targ_unit = {} lines_unit = {} for i in range(5): lines_targ_unit['%g' % i] = plt.Line2D(tvec, dx[:, i], linestyle='--', color='r') lines_unit['%g' % i] = plt.Line2D(tvec, x[:, i], color='b') ax_unit.add_line(lines_targ_unit['%g' % i]) ax_unit.add_line(lines_unit['%g' % i]) # Set up the axes ax_out.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_xlim([0, p['dt'] * len(inp)]) ax_out.set_ylim([-1.2, 1.2]) ax_unit.set_ylim([-2, 10]) ax_out.set_title('Output') ax_unit.set_title('Recurrent units, batch %g' % (batch + 1)) # Labels ax_out.set_xlabel('Time (s)') ax_out.legend([lines_targ_out, lines_out], ['Target', 'RNN'], loc=1) else: # Update the plot tvec = np.expand_dims(np.arange(0, len(inp)) * p['dt'], axis=1) ax_out.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_xlim([0, p['dt'] * len(inp)]) ax_unit.set_title('Recurrent units, batch %g' % (batch + 1)) lines_targ_out.set_xdata( np.repeat(tvec, targ.shape[1], axis=1).T) lines_targ_out.set_ydata(targ.T) lines_out.set_xdata( np.repeat(tvec, targ.shape[1], axis=1).T) lines_out.set_ydata(z.T) for i in range(5): lines_targ_unit['%g' % i].set_xdata(tvec) lines_targ_unit['%g' % i].set_ydata(dx[:, i]) lines_unit['%g' % i].set_xdata(tvec) lines_unit['%g' % i].set_ydata(x[:, i]) training_fig.canvas.draw() if monitor_training == 1: # Now for some visualization to see how things went: stats_fig = plt.figure(figsize=(8, 10)) plt.subplot(3, 2, 1) plt.title('Recurrent learning error ratio') plt.plot(J_err_ratio) plt.subplot(3, 2, 3) plt.title('Recurrent error magnitude') plt.plot(J_err_mag) plt.subplot(3, 2, 5) plt.title('Recurrent weights norm') plt.plot(J_norm) plt.subplot(3, 2, 2) plt.plot(w_err_ratio) plt.title('Output learning error ratio') plt.subplot(3, 2, 4) plt.plot(w_err_mag) plt.title('Output error magnitude') plt.subplot(3, 2, 6) plt.plot(w_norm) plt.title('Output weights norm') stats_fig.canvas.draw() print('Done training!') ####################### TEMPORARY ####################### TEMPORARY return thetas ####################### TEMPORARY ####################### TEMPORARY elif algorithm == 'grad': ''' Use this setting to train with a gradient-based optimization (in this case, the adam optimizer). Parameters: In self.p, the parameters starting with grad_ control this function. ''' # First, define the training loss function def training_loss(x, iteration, myparams=kwargs, showplot=0): error = 0 self.rnn_par = x batch_size = self.p['grad_batch_size'] for i in range(batch_size): self.initialize_act() inps_and_targs = InpsAndTargsFunc(dt=self.p['dt'], **myparams) inputs = inps_and_targs['inps'] target = inps_and_targs['targs'] targ_idx = inps_and_targs['targ_idx'] outputs = self.run(inputs)[0] error += np.sum( (outputs[targ_idx, :] - target[targ_idx, :])** 2) / target[targ_idx, 0].size error = error / batch_size if showplot: fig = plt.gcf() fig.add_subplot(1, 2, 2) plt.cla() plt.plot(target, 'r--') plt.plot(outputs, 'b') fig.canvas.draw() return error # Function from David Sussillo that allows for better monitoring and interfacing def myadam(grad, init_params, callback=None, num_iters=100, step_sizes=0.001, b1=0.9, b2=0.999, eps=10**-8, gnorm_max=np.inf, last_m=None, last_v=None, last_i=0, lossfun=[], printstuff=0): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" flattened_grad, unflatten, x = autograd.util.flatten_func( grad, init_params) if type(step_sizes) == float or type(step_sizes) == int: step_sizes = step_sizes * np.ones(num_iters) else: assert len(step_sizes) == num_iters m = np.zeros(len(x)) if last_m is None else last_m v = np.zeros(len(x)) if last_v is None else last_v for i in range(num_iters): g = flattened_grad(x, i) gnorm = np.linalg.norm(g) if gnorm > gnorm_max: if printstuff: print(" Gradient norm was: %0.4f" % gnorm) g = g * gnorm_max / gnorm gnorm = np.linalg.norm(g) if printstuff: print(" Gradient norm: %0.4f" % gnorm) print(" Step size: %0.4f" % step_sizes[i]) if callback: callback(unflatten(x), i, unflatten(g), lossfun=lossfun) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + last_i + 1)) # Bias correction. vhat = v / (1 - b2**(i + last_i + 1)) x = x - step_sizes[i] * mhat / (np.sqrt(vhat) + eps) return unflatten(x), (m, v, i + last_i) def callback(weights, iteration, gradient, total_loss=[], lossfun=[]): loss = (lossfun(weights, 0, showplot=1)) total_loss.append(loss) if iteration > 0: fig = plt.gcf() fig.add_subplot(1, 2, 1) plt.semilogy([iteration - 1, iteration], [total_loss[-2], total_loss[-1]], 'b-') fig.canvas.draw() plt.title('Iteration %d' % (iteration + 1)) plt.ylabel('Training loss') plt.xlabel('Iteration') def make_step_sizes(): init_stepsize = self.p['grad_init_stepsize'] decay_factor = self.p['grad_stepsize_decay'] num_iters = self.p['grad_num_iters'] step_sizes = init_stepsize * decay_factor**np.ones( (num_iters)) #np.arange(num_iters) return step_sizes plt.figure() x = self.rnn_par loss_grad = grad(training_loss) x_fin = myadam(loss_grad, x, callback=callback, num_iters=self.p['grad_num_iters'], step_sizes=make_step_sizes(), lossfun=training_loss, gnorm_max=self.p['grad_norm_clip'])[0]
def empty(self, valid): if valid: return np.eye(self.__size) * (self.__diag_lb + 1) else: return np.empty((self.__size, self.__size))
def experiment(sname, seed, datasize, nystr=False, args=None): def LMO_err(params, M=10): al, bl = np.exp(params) L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: tmp_mat = L @ eig_vec_K C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2 c = C @ W_nystr_Y else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) C = L @ LWL_inv @ L / N2 c = C @ W @ Y * N2 # TODO: next c_y = c - Y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(X.shape[0]) for i in range(0, X.shape[0], M): indices = permutation[i:i + M] K_i = W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / N / M ** 2 def callback0(params, timer=None): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) test_L = bl * bl * np.exp(-test_L0 / al / al / 2) pred_mean = test_L @ alpha if timer: return test_err = ((pred_mean - test_Y) ** 2).mean() # ((pred_mean-test_Y)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean() norm = alpha.T @ L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_params is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm) def get_causal_effect(params, do_A, w): "to be called within experiment function." al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) EYhat_do_A = [] for a in do_A: a = np.repeat(a, [w.shape[0]]).reshape(-1, 1) w = w.reshape(-1, 1) aw = np.concatenate([a, w], axis=-1) ate_L0 = _sqdist(aw, X) ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2) h_out = ate_L @ alpha mean_h = np.mean(h_out).reshape(-1, 1) EYhat_do_A.append(mean_h) print('a = {}, beta_a = {}'.format(np.mean(a), mean_h)) return np.concatenate(EYhat_do_A) # train,dev,test = load_data(ROOT_PATH+'/data/zoo/{}_{}.npz'.format(sname,datasize)) # X = np.vstack((train.x,dev.x)) # Y = np.vstack((train.y,dev.y)) # Z = np.vstack((train.z,dev.z)) # test_X = test.x # test_Y = test.g t1 = time.time() train, dev, test = load_data(ROOT_PATH + "/data/zoo/" + sname + '/main_orig.npz') # train, dev, test = train[:300], dev[:100], test[:100] t2 = time.time() print('t2 - t1 = ', t2 - t1) Y = np.concatenate((train.y, dev.y), axis=0).reshape(-1, 1) # test_Y = test.y AZ_train, AW_train = bundle_az_aw(train.a, train.z, train.w) AZ_test, AW_test = bundle_az_aw(test.a, test.z, test.w) AZ_dev, AW_dev = bundle_az_aw(dev.a, dev.z, test.w) X, Z = np.concatenate((AW_train, AW_dev), axis=0), np.concatenate((AZ_train, AZ_dev), axis=0) test_X, test_Y = AW_test, test.y.reshape(-1, 1) # TODO: is test.g just test.y? t3 = time.time() print('t3 - t2', t3-t2) EYEN = np.eye(X.shape[0]) ak0, ak1 = get_median_inter_mnist(Z[:, 0:1]), get_median_inter_mnist(Z[:, 1:2]) N2 = X.shape[0] ** 2 W0 = _sqdist(Z, None) print('av kernel indicator: ', args.av_kernel) W = np.exp(-W0 / ak0 / ak0 / 2) / N2 if not args.av_kernel \ else (np.exp(-W0 / ak0 / ak0 / 2) + np.exp(-W0 / ak0 / ak0 / 200) + np.exp(-W0 / ak0 / ak0 * 50)) / 3 / N2 del W0 L0, test_L0 = _sqdist(X, None), _sqdist(test_X, X) t4 = time.time() print('t4 - t3', t4-t3) # measure time # callback0(np.random.randn(2)/10,True) # np.save(ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + '/LMO_errs_{}_nystr_{}_time.npy'.format(seed,train.x.shape[0]),time.time()-t0) # return params0 = np.random.randn(2) / 10 bounds = None # [[0.01,10],[0.01,5]] if nystr: for _ in range(seed + 1): random_indices = np.sort(np.random.choice(range(W.shape[0]), nystr_M, replace=False)) # decomposing n^2W eig_val_K, eig_vec_K = nystrom_decomp(W * N2, random_indices) inv_eig_val_K = np.diag(1 / eig_val_K * N2) W_nystr = eig_vec_K @ np.diag(eig_val_K) @ eig_vec_K.T / N2 # checked, this is the same as W_V W_nystr_Y = W_nystr @ Y t5 = time.time() print('t5 - t4', t5-t4) obj_grad = value_and_grad(lambda params: LMO_err(params)) # try: res = minimize(obj_grad, x0=params0, bounds=bounds, method='L-BFGS-B', jac=True, options={'maxiter': 5000}, callback=callback0) # res stands for results (not residuals!). # except Exception as e: # print(e) PATH = ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + "/" assert opt_params is not None params = opt_params do_A = np.load(ROOT_PATH + "/data/zoo/" + sname + '/do_A_orig.npz')['do_A'] EY_do_A_gt = np.load(ROOT_PATH + "/data/zoo/" + sname + '/do_A_orig.npz')['gt_EY_do_A'] w_sample = train.w EYhat_do_A = get_causal_effect(params=params, do_A=do_A, w=w_sample) plt.figure() plt.plot([i + 1 for i in range(20)], EYhat_do_A) plt.xlabel('A') plt.ylabel('EYdoA-est') plt.savefig( os.path.join(PATH, str(date.today()), 'causal_effect_estimates_nystr_{}'.format(AW_train.shape[0]) + '.png')) plt.close() print('ground truth ate: ', EY_do_A_gt) visualise_ATEs(EY_do_A_gt, EYhat_do_A, x_name='E[Y|do(A)] - gt', y_name='beta_A', save_loc=os.path.join(PATH, str(date.today())) + '/', save_name='ate_{}_nystr.png'.format(AW_train.shape[0])) causal_effect_mean_abs_err = np.mean(np.abs(EY_do_A_gt - EYhat_do_A)) causal_effect_mae_file = open(os.path.join(PATH, str(date.today()), "ate_mae_{}_nystrom.txt".format(AW_train.shape[0])), "a") causal_effect_mae_file.write("mae_: {}\n".format(causal_effect_mean_abs_err)) causal_effect_mae_file.close() os.makedirs(PATH, exist_ok=True) np.save(os.path.join(PATH, str(date.today()), 'LMO_errs_{}_nystr_{}.npy'.format(seed, AW_train.shape[0])), [opt_params, prev_norm, opt_test_err])
def experiment(sname, seed, nystr=True): def LMO_err(params, M=2, verbal=False): global Nfeval params = np.exp(params) al, bl = params[:-1], params[ -1] # params[:int(n_params/2)], params[int(n_params/2):] # [np.exp(e) for e in params] if train.x.shape[1] < 5: train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN else: train_L, dev_L = 0, 0 for i in range(len(al)): train_L += train_L0[i] / al[i]**2 train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN tmp_mat = train_L @ eig_vec_K C = train_L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val) @ tmp_mat.T / N2 c = C @ W_nystr_Y * N2 c_y = c - train.y lmo_err = 0 N = 0 for ii in range(1): permutation = np.random.permutation(train.x.shape[0]) for i in range(0, train.x.shape[0], M): indices = permutation[i:i + M] K_i = train_W[np.ix_(indices, indices)] * N2 C_i = C[np.ix_(indices, indices)] c_y_i = c_y[indices] b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i lmo_err += b_y.T @ K_i @ b_y N += 1 return lmo_err[0, 0] / M**2 def callback0(params): global Nfeval, prev_norm, opt_params, opt_test_err if Nfeval % 1 == 0: params = np.exp(params) print('params:', params) al, bl = params[:-1], params[-1] if train.x.shape[1] < 5: train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN test_L = bl**2 * np.exp(-test_L0 / al**2 / 2) else: train_L, test_L = 0, 0 for i in range(len(al)): train_L += train_L0[i] / al[i]**2 test_L += test_L0[i] / al[i]**2 train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN test_L = bl * bl * np.exp(-test_L / 2) if nystr: tmp_mat = eig_vec_K.T @ train_L alpha = EYEN - eig_vec_K @ np.linalg.inv( tmp_mat @ eig_vec_K / N2 + inv_eig_val) @ tmp_mat / N2 alpha = alpha @ W_nystr_Y * N2 else: LWL_inv = chol_inv(train_L @ train_W @ train_L + train_L / N2 + JITTER * EYEN) alpha = LWL_inv @ train_L @ train_W @ train.y pred_mean = test_L @ alpha test_err = ((pred_mean - test.g)**2).mean() norm = alpha.T @ train_L @ alpha Nfeval += 1 if prev_norm is not None: if norm[0, 0] / prev_norm >= 3: if opt_test_err is None: opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm, norm[0, 0]) raise Exception if prev_norm is None or norm[0, 0] <= prev_norm: prev_norm = norm[0, 0] opt_test_err = test_err opt_params = params print(True, opt_params, opt_test_err, prev_norm, norm[0, 0]) train, dev, test = load_data(ROOT_PATH + '/data/' + sname + '/main.npz') del dev # avoid same indices when run on the cluster for _ in range(seed + 1): random_indices = np.sort( np.random.choice(range(train.x.shape[0]), nystr_M, replace=False)) EYEN = np.eye(train.x.shape[0]) N2 = train.x.shape[0]**2 # precompute to save time on parallized computation if train.z.shape[1] < 5: ak = get_median_inter_mnist(train.z) else: ak = np.load(ROOT_PATH + '/mnist_precomp/{}_ak.npy'.format(sname)) train_W = np.load(ROOT_PATH + '/mnist_precomp/{}_train_K0.npy'.format(sname)) train_W = (np.exp(-train_W / ak / ak / 2) + np.exp( -train_W / ak / ak / 200) + np.exp(-train_W / ak / ak * 50)) / 3 / N2 if train.x.shape[1] < 5: train_L0 = _sqdist(train.x, None) test_L0 = _sqdist(test.x, train.x) else: L0s = np.load(ROOT_PATH + '/mnist_precomp/{}_Ls.npz'.format(sname)) train_L0 = L0s['train_L0'] # dev_L0 = L0s['dev_L0'] test_L0 = L0s['test_L0'] del L0s if train.x.shape[1] < 5: params0 = np.random.randn(2) * 0.1 else: params0 = np.random.randn(len(train_L0) + 1) * 0.1 bounds = None eig_val_K, eig_vec_K = nystrom_decomp(train_W * N2, random_indices) W_nystr_Y = eig_vec_K @ np.diag(eig_val_K) @ eig_vec_K.T @ train.y / N2 inv_eig_val = np.diag(1 / eig_val_K / N2) obj_grad = value_and_grad(lambda params: LMO_err(params)) res = minimize(obj_grad, x0=params0, bounds=bounds, method='L-BFGS-B', jac=True, options={ 'maxiter': 5000, 'disp': True, 'ftol': 0 }, callback=callback0) PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/" os.makedirs(PATH, exist_ok=True) np.save(PATH + 'LMO_errs_{}_nystr.npy'.format(seed), [opt_params, prev_norm, opt_test_err])
def __init__(self, env, nb_steps, init_state, init_action_sigma=1., policy_kl_bound=0.1, param_nominal_kl_bound=100., param_regularizer_kl_bound=1., policy_kl_stepwise=False, activation=None, slew_rate=False, action_penalty=None, nominal_variance=1e-8): # logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) self.env = env # expose necessary functions self.env_dyn = self.env.unwrapped.dynamics self.env_noise = self.env.unwrapped.sigma self.env_cost = self.env.unwrapped.cost self.env_init = init_state self.ulim = self.env.action_space.high self.dm_state = self.env.observation_space.shape[0] self.dm_act = self.env.action_space.shape[0] self.dm_param = self.dm_state * (self.dm_act + self.dm_state + 1) self.nb_steps = nb_steps # use slew rate penalty or not self.env.unwrapped.slew_rate = slew_rate if action_penalty is not None: self.env.unwrapped.uw = action_penalty * np.ones((self.dm_act, )) self.policy_kl_stepwise = policy_kl_stepwise if self.policy_kl_stepwise: self.policy_kl_bound = policy_kl_bound * np.ones((self.nb_steps, )) self.alpha = 1e8 * np.ones((self.nb_steps, )) else: self.policy_kl_bound = policy_kl_bound * np.ones((1, )) self.alpha = 1e8 * np.ones((1, )) self.param_nominal_kl_bound = param_nominal_kl_bound * np.ones((1, )) self.beta = 1e16 * np.ones((1, )) self.param_regularizer_kl_bound = param_regularizer_kl_bound * np.ones( (1, )) self.eta = 1e16 * np.ones((1, )) # create state distribution and initialize first time step self.xdist = Gaussian(self.dm_state, self.nb_steps + 1) self.xdist.mu[..., 0], self.xdist.sigma[..., 0] = self.env_init self.udist = Gaussian(self.dm_act, self.nb_steps) self.xudist = Gaussian(self.dm_state + self.dm_act, self.nb_steps + 1) self.vfunc = QuadraticStateValue(self.dm_state, self.nb_steps + 1) self.qfunc = QuadraticStateActionValue(self.dm_state, self.dm_act, self.nb_steps) # We assume process noise over dynamics is known self.noise = np.zeros((self.dm_state, self.dm_state, self.nb_steps)) for t in range(self.nb_steps): self.noise[..., t] = self.env_noise self.param = MatrixNormalParameters(self.dm_state, self.dm_act, self.nb_steps) self.nominal = MatrixNormalParameters(self.dm_state, self.dm_act, self.nb_steps) # LQG dynamics from autograd import jacobian input = tuple([np.zeros((self.dm_state, )), np.zeros((self.dm_act, ))]) A = jacobian(self.env.unwrapped.dynamics, 0)(*input) B = jacobian(self.env.unwrapped.dynamics, 1)(*input) c = self.env.unwrapped.dynamics(*input) tmp = np.hstack((A, B, c[:, None])) for t in range(self.nb_steps): self.nominal.mu[..., t] = np.reshape(tmp, self.dm_param, order='F') self.nominal.sigma[..., t] = nominal_variance * np.eye(self.dm_param) self.ctl = LinearGaussianControl(self.dm_state, self.dm_act, self.nb_steps, init_action_sigma) # activation of cost function in shape of sigmoid if activation is None: self.weighting = np.ones((self.nb_steps + 1, )) elif "mult" and "shift" in activation: t = np.linspace(0, self.nb_steps, self.nb_steps + 1) self.weighting = 1. / (1. + np.exp(-activation['mult'] * (t - activation['shift']))) elif "discount" in activation: self.weighting = np.ones((self.nb_steps + 1, )) gamma = activation["discount"] * np.ones((self.nb_steps, )) self.weighting[1:] = np.cumprod(gamma) else: raise NotImplementedError self.cost = AnalyticalQuadraticCost(self.env_cost, self.dm_state, self.dm_act, self.nb_steps + 1) self.data = {}
def test_slogdet_3d(): fun = lambda x: np.sum(np.linalg.slogdet(x)[1]) mat = np.concatenate([(rand_psd(5) + 5 * np.eye(5))[None, ...] for _ in range(3)]) check_grads(fun)(mat)
np.random.normal(.4, .1, [50])]), np.concatenate( [np.random.normal(.3, .1, [50]), np.random.normal(.9, .1, [50])]), ]).T labels = ['a'] * 50 + ['b'] * 50 categories = np.unique(labels) idx_map = { category: idx for category, idx in zip(categories, range(len(categories))) } labels_indexed = [idx_map[label] for label in labels] one_hot_targets = np.eye(len(categories))[labels_indexed] hps = { 'lr': .001, # <-- learning rate 'wr': [.01, .3], # <-- weight range 'num_hidden_nodes': 4, 'hidden_activation': lambda x: x, 'channel_activation': lambda x: x, 'output_activation': softmax, } params = build_params( inputs.shape[1], # <-- num features hps['num_hidden_nodes'], categories, weight_range=hps['wr'])
def heisenberg_expand(self, U, num_wires): """Expand the given local Heisenberg-picture array into a full-system one. Args: U (array[float]): array to expand (expected to be of the dimension ``1+2*self.num_wires``) num_wires (int): total number of wires in the quantum circuit. If zero, return ``U`` as is. Returns: array[float]: expanded array, dimension ``1+2*num_wires`` """ U_dim = len(U) nw = len(self.wires) if U.ndim > 2: raise ValueError('Only order-1 and order-2 arrays supported.') if U_dim != 1 + 2 * nw: raise ValueError( '{}: Heisenberg matrix is the wrong size {}.'.format( self.name, U_dim)) if num_wires == 0 or list(self.wires) == list(range(num_wires)): # no expansion necessary (U is a full-system matrix in the correct order) return U if num_wires < len(self.wires): raise ValueError( '{}: Number of wires {} is too small to fit Heisenberg matrix'. format(self.name, num_wires)) # expand U into the I, x_0, p_0, x_1, p_1, ... basis dim = 1 + num_wires * 2 def loc(w): "Returns the slice denoting the location of (x_w, p_w) in the basis." ind = 2 * w + 1 return slice(ind, ind + 2) if U.ndim == 1: W = np.zeros(dim) W[0] = U[0] for k, w in enumerate(self.wires): W[loc(w)] = U[loc(k)] elif U.ndim == 2: if isinstance(self, Expectation): W = np.zeros((dim, dim)) else: W = np.eye(dim) W[0, 0] = U[0, 0] for k1, w1 in enumerate(self.wires): s1 = loc(k1) d1 = loc(w1) # first column W[d1, 0] = U[s1, 0] # first row (for gates, the first row is always (1, 0, 0, ...), but not for observables!) W[0, d1] = U[0, s1] for k2, w2 in enumerate(self.wires): W[d1, loc(w2)] = U[s1, loc( k2)] # block k1, k2 in U goes to w1, w2 in W. return W
# lqr task env = gym.make('LQR-TO-v0') env._max_episode_steps = 100000 dm_state = env.observation_space.shape[0] dm_act = env.action_space.shape[0] horizon, nb_steps = 25, 100 state = np.zeros((dm_state, nb_steps + 1)) action = np.zeros((dm_act, nb_steps)) init_action = LinearGaussianControl(dm_state, dm_act, horizon, 5.) state[:, 0] = env.reset() for t in range(nb_steps): solver = MBGPS(env, init_state=tuple([state[:, t], 1e-16 * np.eye(dm_state)]), init_action_sigma=5., nb_steps=horizon, kl_bound=1.) trace = solver.run(nb_iter=25, verbose=False) _nominal_action = solver.udist.mu action[:, t] = _nominal_action[:, 0] state[:, t + 1], _, _, _ = env.step(action[:, t]) print('Time Step:', t, 'Cost:', trace[-1]) import matplotlib.pyplot as plt plt.figure()
def test_solve_arg1_3d_3d(): D = 4 A = npr.randn(D + 1, D, D) + 5 * np.eye(D) B = npr.randn(D + 1, D, D + 2) fun = lambda A: np.linalg.solve(A, B) check_grads(fun)(A)
def cov(self, value): self._sqrt_cov = np.linalg.cholesky(value + self.reg * np.eye(self.dm_obs))
#!/usr/bin/env python # coding: utf-8 import numpy as np import autograd.numpy as anp # Thinly-wrapped numpy from .pyerrors import derived_observable ### This code block is directly taken from the current master branch of autograd and remains # only until the new version is released on PyPi from functools import partial from autograd.extend import defvjp _dot = partial(anp.einsum, '...ij,...jk->...ik') # batched diag _diag = lambda a: anp.eye(a.shape[-1])*a # batched diagonal, similar to matrix_diag in tensorflow def _matrix_diag(a): reps = anp.array(a.shape) reps[:-1] = 1 reps[-1] = a.shape[-1] newshape = list(a.shape) + [a.shape[-1]] return _diag(anp.tile(a, reps).reshape(newshape)) # https://arxiv.org/pdf/1701.00392.pdf Eq(4.77) # Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete def grad_eig(ans, x): """Gradient of a general square (complex valued) matrix""" e, u = ans # eigenvalues as 1d array, eigenvectors in columns n = e.shape[-1] def vjp(g):
import autograd.scipy.special as special import autograd.numpy as np import wh, wp import util import gtilde from scipy.integrate import quad from scipy.stats import multivariate_normal from autograd import primitive import matplotlib.pyplot as plt __nugget_scalar = 1e-7 __nugget = lambda n: __nugget_scalar * np.eye(n) __verify = False __euler_mascheroni = 0.57721566490153286060651209008240243104215933593992 from scipy.misc import logsumexp def psi(x, gamma, alpha, trange): tmin, tmax = trange y = x d = x.shape[0] exp1 = ((np.pi * alpha / 4) ** (d/2)) * (gamma**2) * np.exp(-wh.sqdist(x,None) / (4*alpha)) xbar = 0.5 * (x.reshape(x.shape[0],x.shape[1],1)+y.reshape(y.shape[0],1,y.shape[1])) d = special.erf((xbar-tmin) / np.sqrt(alpha)) - special.erf((xbar - tmax) / np.sqrt(alpha)) prodd = np.prod(d, axis=0) rval = exp1 * prodd rval = 0.5 * (rval + rval.T) rval += 2 * gamma * __nugget_scalar rval += np.eye(rval.shape[0]) * __nugget_scalar ** 2 return rval
L2_reg = 1.0 D = 784 init_init_stddev_scale = 0.00001 init_langevin_stepsize = 0.01 init_langevin_noise_size = 0.00001 prior_relax = 0.001 # train_mnist_model() # Comment after running once. with open('mnist_models.pkl') as f: trained_weights, all_mean, all_cov = pickle.load(f) # Regularize all_cov all_cov = all_cov + prior_relax * np.eye(D) N_weights, predict_fun, loss_fun, frac_err, nn_like = make_nn_funs( layer_sizes, L2_reg) prior_func = build_logprob_mvn(all_mean, all_cov) def nn_likelihood(images, labels): prior = prior_func(images) likelihood = nn_like(trained_weights, images, labels) return prior + likelihood gen_labels = one_hot(np.array([i % 10 for i in range(num_samples)]), 10) labeled_likelihood = lambda images: nn_likelihood(images, gen_labels) init_mean = all_mean
log_topics_KV = np.log(topics_KV) log_topics_KVm1 = log_topics_KV[:, :-1] log_topics_KVm1 = log_topics_KVm1 - log_topics_KV[:, -1][:, np.newaxis] return log_topics_KVm1 + np.log1p(-V * min_eps) def to_safe_common_arr(topics_KV, min_eps=MIN_EPS): ''' Force provided topics_KV array to be numerically safe. Returns ------- topics_KV : 2D array, size K x V minimum value of each row is min_eps each row will sum to 1.0 (+/- min_eps) ''' K, V = topics_KV.shape topics_KV = topics_KV.copy() for rep in range(2): topics_KV /= topics_KV.sum(axis=1)[:, np.newaxis] np.maximum(topics_KV, min_eps, out=topics_KV) return topics_KV if __name__ == '__main__': topics_KV = np.eye(3) + np.ones((3, 3)) topics_KV /= topics_KV.sum(axis=1)[:, np.newaxis] print('------ before') print(topics_KV) print('------ after') print(to_common_arr(to_diffable_arr(topics_KV)))
def make_psd(mat): return np.dot(mat.T, mat) + np.eye(mat.shape[0])
def _solve_n_slack_qp(self, constraints, n_samples): C = self.C joint_features = [c[1] for sample in constraints for c in sample] losses = [c[2] for sample in constraints for c in sample] joint_feature_matrix = np.vstack(joint_features).astype(np.float) n_constraints = len(joint_features) P = cvxopt.matrix(np.dot(joint_feature_matrix, joint_feature_matrix.T)) # q contains loss from margin-rescaling q = cvxopt.matrix(-np.array(losses, dtype=np.float)) # constraints are a bit tricky. first, all alpha must be >zero idy = np.identity(n_constraints) tmp1 = np.zeros(n_constraints) # box constraint: sum of all alpha for one example must be <= C blocks = np.zeros((n_samples, n_constraints)) first = 0 for i, sample in enumerate(constraints): blocks[i, first:first + len(sample)] = 1 first += len(sample) # positivity constraints: if self.negativity_constraint is None: # empty constraints zero_constr = np.zeros(0) joint_features_constr = np.zeros((0, n_constraints)) else: joint_features_constr = joint_feature_matrix.T[ self.negativity_constraint] zero_constr = np.zeros(len(self.negativity_constraint)) # put together G = cvxopt.sparse( cvxopt.matrix(np.vstack((-idy, blocks, joint_features_constr)))) tmp2 = np.ones(n_samples) * C h = cvxopt.matrix(np.hstack((tmp1, tmp2, zero_constr))) # solve QP model cvxopt.solvers.options['feastol'] = 1e-5 try: solution = cvxopt.solvers.qp(P, q, G, h) except ValueError: solution = {'status': 'error'} if solution['status'] != "optimal": print("regularizing QP!") P = cvxopt.matrix( np.dot(joint_feature_matrix, joint_feature_matrix.T) + 1e-8 * np.eye(joint_feature_matrix.shape[0])) print("P {}".format(P)) solution = cvxopt.solvers.qp(P, q, G, h) if solution['status'] != "optimal": raise ValueError("QP solver failed. Try regularizing your QP.") # Lagrange multipliers a = np.ravel(solution['x']) self.prune_constraints(constraints, a) self.old_solution = solution # Support vectors have non zero lagrange multipliers sv = a > self.inactive_threshold * C box = np.dot(blocks, a) if self.verbose > 1: print("%d support vectors out of %d points" % (np.sum(sv), n_constraints)) # calculate per example box constraint: print("Box constraints at C: %d" % np.sum(1 - box / C < 1e-3)) print("dual objective: %f" % -solution['primal objective']) self.w = np.dot(a, joint_feature_matrix) return -solution['primal objective']
# Define experimental constants. CHI_E = -5.65e-4 #GHz CHI_E_2 = 7.3e-7 KAPPA = 2.09e-6 #GHz MAX_AMP_C = 2 * anp.pi * 2e-4 #GHz MAX_AMP_T = 2 * anp.pi * 3e-3 #GHz # Define the system. CAVITY_STATE_COUNT = 3 CAVITY_ANNIHILATE = get_annihilation_operator(CAVITY_STATE_COUNT) CAVITY_CREATE = get_creation_operator(CAVITY_STATE_COUNT) CAVITY_NUMBER = anp.matmul(CAVITY_CREATE, CAVITY_ANNIHILATE) CAVITY_QUADRATURE = matmuls(CAVITY_CREATE, CAVITY_CREATE, CAVITY_ANNIHILATE, CAVITY_ANNIHILATE) CAVITY_I = anp.eye(CAVITY_STATE_COUNT) CAVITY_VACUUM = anp.zeros((CAVITY_STATE_COUNT, 1)) CAVITY_ZERO = anp.copy(CAVITY_VACUUM) CAVITY_ZERO[0][0] = 1. CAVITY_ONE = anp.copy(CAVITY_VACUUM) CAVITY_ONE[1][0] = 1. TRANSMON_STATE_COUNT = 2 TRANSMON_VACUUM = anp.zeros((TRANSMON_STATE_COUNT, 1)) TRANSMON_G = anp.copy(TRANSMON_VACUUM) TRANSMON_G[0][0] = 1. TRANSMON_G_DAGGER = conjugate_transpose(TRANSMON_G) TRANSMON_E = anp.copy(TRANSMON_VACUUM) TRANSMON_E[1][0] = 1. TRANSMON_E_DAGGER = conjugate_transpose(TRANSMON_E) TRANSMON_I = anp.eye(TRANSMON_STATE_COUNT)
c_p_0 = param.c_p_0 * np.ones(Nyz_dofs * (mesh.Nr - 1)) # Concatenate variables y_0 = np.concatenate([V_0, I_0, c_n_0, c_p_0]) # Make matrices for DAE system ------------------------------------------------ # NOTE: variables arranged as V, I, c_n, c_p, with c_n order by radial # coordinate, then y,z coordinate (labelling as decided by dolfin) # NOTE: should probably work with sparse matrices # Total number of dofs param.N_dofs = 2 * Nyz_dofs + 2 * Nyz_dofs * (mesh.Nr - 1) # Add to param # Mass matrix M = np.zeros([param.N_dofs, param.N_dofs]) M[2 * Nyz_dofs:, 2 * Nyz_dofs:] = np.eye(2 * Nyz_dofs * (mesh.Nr - 1)) # Linear part A = np.zeros([param.N_dofs, param.N_dofs]) A[0:Nyz_dofs, 0:Nyz_dofs] = K A[0:Nyz_dofs, Nyz_dofs:2 * Nyz_dofs] = param.alpha * np.eye(Nyz_dofs) A[Nyz_dofs:2 * Nyz_dofs, 0:Nyz_dofs] = np.eye(Nyz_dofs) # Load vector (RHS) b = np.zeros(param.N_dofs) b[0:Nyz_dofs] = -(load_tab_n + load_tab_p) # Remining entries depend on time and are computed during solve def update_load(t, y, mesh, param):