def _cumulative_hazard(self, params, T, *Xs): alpha_params = params[self._LOOKUP_SLICE["alpha_"]] alpha_ = np.exp(np.dot(Xs[0], alpha_params)) beta_params = params[self._LOOKUP_SLICE["beta_"]] beta_ = np.exp(np.dot(Xs[1], beta_params)) return np.log1p((T / alpha_) ** beta_)
def _get_responsibilities(self, pi, g, beta, mu_ivp, alpha): """ Gets the posterior responsibilities for each comp. of the mixture. """ probs = [[]]*len(self.N_data) for i, ifx in enumerate(self._ifix): zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): logprob = norm.logpdf( yq, zM[self.data_inds[q], :, q], scale=1/np.sqrt(alpha)) # sum over the dimension component logprob = logprob.sum(-1) if probs[q] == []: probs[q] = logprob else: probs[q] = np.column_stack((probs[q], logprob)) probs = [lp - pi for lp in probs] # subtract the maxmium for exponential normalize probs = [p - np.atleast_1d(p.max(axis=-1))[:, None] for p in probs] probs = [np.exp(p) / np.exp(p).sum(-1)[:, None] for p in probs] return probs
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) n1 = self.n1 = 3 n2 = self.n2 = 4 n3 = self.n3 = 5 Y = self.Y = rnd.randn(n1, n2, n3) A = self.A = rnd.randn(n1, n2, n3) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian tensor H (6th order) Y1 = Y.reshape(n1, n2, n3, 1, 1, 1) Y2 = Y.reshape(1, 1, 1, n1, n2, n3) # Create an n1 x n2 x n3 x n1 x n2 x n3 diagonal tensor diag = np.eye(n1 * n2 * n3).reshape(n1, n2, n3, n1, n2, n3) H = np.exp(np.sum(Y ** 2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = A.reshape(1, 1, 1, n1, n2, n3) self.correct_hess = np.sum(H * Atensor, axis=(3, 4, 5)) self.backend = AutogradBackend()
def ackley(x): a, b, c = 20.0, -0.2, 2.0*np.pi len_recip = 1.0/len(x) sum_sqrs = sum(x*x) sum_cos = sum(np.cos(c*x)) return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) - np.exp(len_recip*sum_cos) + a + np.e)
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) m = self.m = 10 n = self.n = 15 Y = self.Y = rnd.randn(m, n) A = self.A = rnd.randn(m, n) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian tensor H (4th order) Y1 = Y.reshape(m, n, 1, 1) Y2 = Y.reshape(1, 1, m, n) # Create an m x n x m x n array with diag[i,j,k,l] == 1 iff # (i == k and j == l), this is a 'diagonal' tensor. diag = np.eye(m * n).reshape(m, n, m, n) H = np.exp(np.sum(Y ** 2)) * (4 * Y1 * Y2 + 2 * diag) # Then 'right multiply' H by A Atensor = A.reshape(1, 1, m, n) self.correct_hess = np.sum(H * Atensor, axis=(2, 3)) self.backend = AutogradBackend()
def setUp(self): self.X = None self.cost = lambda X: np.exp(np.sum(X**2)) n = self.n = 15 Y = self.Y = rnd.randn(1, n) A = self.A = rnd.randn(1, n) # Calculate correct cost and grad... self.correct_cost = np.exp(np.sum(Y ** 2)) self.correct_grad = correct_grad = 2 * Y * np.exp(np.sum(Y ** 2)) # ... and hess # First form hessian matrix H # Convert Y and A into matrices (row vectors) Ymat = np.matrix(Y) Amat = np.matrix(A) diag = np.eye(n) H = np.exp(np.sum(Y ** 2)) * (4 * Ymat.T.dot(Ymat) + 2 * diag) # Then 'left multiply' H by A self.correct_hess = np.array(Amat.dot(H)) self.backend = AutogradBackend()
def callback(params, t, g): # log_weights = params[:10] - logsumexp(params[:10]) print("Iteration {} lower bound {}".format(t, -objective(params, t))) # print (np.exp(log_weights)) # mean = params[0] # log_std = params[1] # norm_flow_params = params[2] # print (len(params[2][0][0])) # print ('u', params[2][0]) print ('u0', params[2][0][0]) print ('u1', params[2][1][0]) print ('w0', params[2][0][1]) print ('w1', params[2][1][1]) print ('b0', params[2][0][2]) print ('b1', params[2][1][2]) # print ('b', params[2][2]) plt.cla() target_distribution = lambda x: np.exp(log_density(x)) var_distribution = lambda x: np.exp(variational_log_density(params, x)) plot_isocontours(ax, target_distribution) plot_isocontours(ax, var_distribution, cmap=plt.cm.bone) ax.set_autoscale_on(False) # rs = npr.RandomState(0) # samples = variational_sampler(params, num_plotting_samples, rs) # plt.plot(samples[:, 0], samples[:, 1], 'x') plt.draw() plt.pause(1.0/30.0)
def scalar_log_lik(theta_1, theta_2, x): arg = (x - theta_1) lik1 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2)) arg = (x - theta_1 - theta_2) lik2 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2)) return np.log(0.5 * lik1 + 0.5 * lik2)
def test_multi_scalar(self): """ Tests functions with multiple scalar output """ def f1(x): # two scalar input return x**3, np.exp(3*x) df = jacobian(f1)(0.5) self.assertAlmostEqual(3*0.5**2, df[0]) self.assertAlmostEqual(3*np.exp(3*0.5), df[1]) def f2(params): # one list, one numpy array input x,y = params[0] A = params[1] return np.sum(A**2) + np.cos(x) + np.exp(0.5*y) df = jacobian(f2) A = np.array([[1.0, 2.0],[3.0, 4.0]]) params = [[0.5, np.pi], A] diff = df(params) self.assertAlmostEqual(diff[0][0], -np.sin(0.5)) self.assertAlmostEqual(diff[0][1], 0.5*np.exp(0.5*np.pi)) self.assertTrue(np.linalg.norm(2*A - diff[1]) < 1e-10)
def log_density(x): x_, y_ = x[:, 0], x[:, 1] sigma_density = norm.logpdf(y_, 0, 1.35) mu_density = norm.logpdf(x_, -0.5, np.exp(y_)) sigma_density2 = norm.logpdf(y_, 0.1, 1.35) mu_density2 = norm.logpdf(x_, 0.5, np.exp(y_)) return np.logaddexp(sigma_density + mu_density, sigma_density2 + mu_density2)
def normalizing_flows(z_0, norm_flow_params): ''' z_0: [n_samples, D] u: [D,1] w: [D,1] b: [1] ''' current_z = z_0 all_zs = [] all_zs.append(z_0) for params_k in norm_flow_params: u = params_k[0] w = params_k[1] b = params_k[2] # Appendix equations m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u))) u_k = u + (-1. + np.log(1.+np.exp(np.dot(w.T,u))) - np.dot(w.T,u)) * (w/np.linalg.norm(w)) # u_k = u # [D,1] term1 = np.tanh(np.dot(current_z,w)+b) # [n_samples, D] term1 = np.dot(term1,u_k.T) # [n_samples, D] current_z = current_z + term1 all_zs.append(current_z) return current_z, all_zs
def predict(self, x): if self.prob_func_ == "sigmoid": prob = (1.0 / (1.0 + np.exp(-np.dot(x, self.coef_) - self.intercept_)))[:,np.newaxis] prob = np.concatenate((1.0-prob, prob), axis=1) else: # self.prob_func_ == "softmax" prob = np.exp(np.dot(x, self.coef_.T) + self.intercept_) prob /= np.sum(prob, axis=1)[:,np.newaxis] return np.array([self.classes_[i] for i in np.argmax(prob, axis=1)])
def gradient_check(): params = np.array([2,2]) h = np.array([1e-5,0]) print (np.exp(log_variational(params+h,0.5))-np.exp(log_variational(params,0.5)))/h[0] h = np.array([0,1e-5]) print (np.exp(log_variational(params+h,0.5))-np.exp(log_variational(params,0.5)))/h[1] print gradient_log_variational(params,0.5,0) print gradient_log_variational(params,0.5,1)
def log_density(x, t): mu, log_sigma = x[:, 0], x[:, 1] sigma_density = norm.logpdf(log_sigma, 0, 1.35) mu_density = norm.logpdf(mu, -0.5, np.exp(log_sigma)) sigma_density2 = norm.logpdf(log_sigma, 0.1, 1.35) mu_density2 = norm.logpdf(mu, 0.5, np.exp(log_sigma)) return np.logaddexp(sigma_density + mu_density, sigma_density2 + mu_density2)
def softmax(xs): """ Initialization of weight tensors. Used when outputs must sum to 1. """ n = np.exp(xs) d = np.sum(np.exp(xs)) return n/d
def callback(params, t, g): # log_weights = params[:10] - logsumexp(params[:10]) print("Iteration {} lower bound {}".format(t, -objective(params, t))) # print (np.exp(log_weights)) mean = params[0] log_std = params[1] print ('mean', mean) print ('std', np.exp(log_std)) # print ('u0', params[2][0][0]) # print ('u1', params[2][1][0]) # print ('w0', params[2][0][1]) # print ('w1', params[2][1][1]) # print ('b0', params[2][0][2]) # print ('b1', params[2][1][2]) # x_inverse = plt.cla() target_distribution = lambda x: np.exp(log_density(x)) var_distribution = lambda x: np.exp(variational_log_density(params, x)) plot_isocontours(ax, target_distribution) plot_isocontours(ax, var_distribution, cmap=plt.cm.bone) ax.set_autoscale_on(False) #PLot the z0 density var_distribution0 = lambda x: np.exp(diag_gaussian_log_density(x, mean, log_std)) plot_isocontours(ax, var_distribution0) for transform in params[2]: xlimits=[-6, 6] w = transform[1] b = transform[2] x = np.linspace(*xlimits, num=101) plt.plot(x, (-w[0]*x - b)/w[1], '-') u = transform[0] plt.plot(x, (-u[0]*x)/u[1], '-') #PLot variational samples samples = variational_sampler(params) plt.plot(samples[:, 0], samples[:, 1], 'x') # #Plot q0 variational samples # rs = npr.RandomState(0) # samples = sample_diag_gaussian(mean, log_std, n_samples, rs) # plt.plot(samples[:, 0], samples[:, 1], 'x') plt.draw() plt.pause(1.0/30.0)
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1/precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def logloss(ys, ys_hat, ws=None): #print 'ws',ws.shape, 'ys',ys.shape, 'xs',xs.shape, 'B',B.shape if ws is None: return np.sum(np.log(1 + np.exp(-ys * ys_hat))) / float(len(ys)) #+ (0.5 * reg * np.dot(B, B)) #/ float(len(ys)) else: try: return np.sum(ws * np.log(1 + np.exp(-ys * ys_hat))) / float(len(ys)) #+ (0.5 * reg * np.dot(B, B)) #/ float(len(ys)) except: pdb.set_trace()
def _log_1m_sf(self, params, T, *Xs): alpha_params = params[self._LOOKUP_SLICE["alpha_"]] log_alpha_ = np.dot(Xs[0], alpha_params) alpha_ = np.exp(log_alpha_) beta_params = params[self._LOOKUP_SLICE["beta_"]] log_beta_ = np.dot(Xs[1], beta_params) beta_ = np.exp(log_beta_) return -np.log1p((T / alpha_) ** -beta_)
def devec_ackley(x): a, b, c = 20.0, -0.2, 2.0*np.pi len_recip = 1.0/len(x) sum_sqrs, sum_cos = 0.0, 0.0 for i in x: sum_cos += np.cos(c*i) sum_sqrs += i*i return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) - np.exp(len_recip*sum_cos) + a + np.e)
def softmax_grads(Ks, beta, i, j): """ return the grad of the ith element of weighting w.r.t. j-th element of Ks """ if j == i: num = beta*np.exp(Ks[i]*beta) * (np.sum(np.exp(Ks*beta)) - np.exp(Ks[i]*beta)) else: num = -beta*np.exp(Ks[i]*beta + Ks[j]*beta) den1 = np.sum(np.exp(Ks*beta)) return num / (den1 * den1)
def beta_grads(Ks, beta, i): Karr = np.array(Ks) anum = Ks[i] * np.exp(Ks[i] * beta) aden = np.sum(np.exp(beta * Karr)) a = anum / aden bnum = np.exp(Ks[i] * beta) * (np.sum(np.multiply(Karr, np.exp(Karr * beta)))) bden = aden * aden b = bnum / bden return a - b
def _log_hazard(self, params, T, *Xs): alpha_params = params[self._LOOKUP_SLICE["alpha_"]] log_alpha_ = np.dot(Xs[0], alpha_params) alpha_ = np.exp(log_alpha_) beta_params = params[self._LOOKUP_SLICE["beta_"]] log_beta_ = np.dot(Xs[1], beta_params) beta_ = np.exp(log_beta_) return log_beta_ - log_alpha_ + np.expm1(log_beta_) * (np.log(T) - log_alpha_) - np.log1p((T / alpha_) ** beta_)
def compute_modfeat(self, w): mod_feat = self.conv_data_fea.copy() mod_dem = np.ones(mod_feat.shape) ## need to change to accommodate non-binary features ws = np.array([w[k*self.F:(k+1)*self.F] for k in range(self.K)]) w_tiled = np.array([ws for i in range(mod_feat.shape[0])]) mod_feat = mod_feat * w_tiled mod_dem = mod_dem * w_tiled mod_feat = np.sum(np.exp(mod_feat), axis=2) mod_dem = np.sum(np.exp(mod_dem), axis=2) return mod_feat / mod_dem
def log_density(x): ''' x: [n_samples, D] return: [n_samples] ''' x_, y_ = x[:, 0], x[:, 1] sigma_density = norm.logpdf(y_, 0, 1.35) mu_density = norm.logpdf(x_, -2.2, np.exp(y_)) sigma_density2 = norm.logpdf(y_, 0.1, 1.35) mu_density2 = norm.logpdf(x_, 2.2, np.exp(y_)) return np.logaddexp(sigma_density + mu_density, sigma_density2 + mu_density2)
def callback(params, t, g): print("Iteration {} lower bound {}".format(t, -objective(params, t))) plt.cla() target_distribution = lambda x : np.exp(log_posterior(x, t)) plot_isocontours(ax, target_distribution) mean, log_std = unpack_params(params) variational_contour = lambda x: mvn.pdf(x, mean, np.diag(np.exp(2*log_std))) plot_isocontours(ax, variational_contour) plt.draw() plt.pause(1.0/30.0)
def potassium_dynamics(V, n, t): # Use resting potential of zero V_ref = V+60 # Compute the alpha and beta as a function of V an1 = 0.01*(V_ref+55.) /(1-np.exp(-(V_ref+55.)/10.)) bn1 = 0.125 * np.exp(-(V_ref+65.)/80.) # Compute the channel state updates dndt = an1 * (1.0-n) - bn1*n return dndt
def PyLQR_TrajCtrl_GeneralTest(): #build RBF basis rbf_basis = np.array([ [-1.0, -1.0], [-1.0, 1.0], [1.0, -1.0], [1.0, 1.0] ]) gamma = 1 T = 100 R = 1e-5 # rbf_funcs = [lambda x, u, t, aux: np.exp(-gamma*np.linalg.norm(x[0:2]-basis)**2) + .01*np.linalg.norm(u)**2 for basis in rbf_basis] rbf_funcs = [ lambda x, u, t, aux: -np.exp(-gamma*np.linalg.norm(x[0:2]-rbf_basis[0])**2) + R*np.linalg.norm(u)**2, lambda x, u, t, aux: -np.exp(-gamma*np.linalg.norm(x[0:2]-rbf_basis[1])**2) + R*np.linalg.norm(u)**2, lambda x, u, t, aux: -np.exp(-gamma*np.linalg.norm(x[0:2]-rbf_basis[2])**2) + R*np.linalg.norm(u)**2, lambda x, u, t, aux: -np.exp(-gamma*np.linalg.norm(x[0:2]-rbf_basis[3])**2) + R*np.linalg.norm(u)**2 ] weights = np.array([.75, .5, .25, 1.]) weights = weights / (np.sum(weights) + 1e-6) cost_func = lambda x, u, t, aux: np.sum(weights * np.array([basis_func(x, u, t, aux) for basis_func in rbf_funcs])) lqr_traj_ctrl = PyLQR_TrajCtrl(use_autograd=True) lqr_traj_ctrl.build_ilqr_general_solver(cost_func, n_dims=rbf_basis.shape[1], T=T) n_eval_pnts = 50 coords = np.linspace(-2.5, 2.5, n_eval_pnts) xv, yv = np.meshgrid(coords, coords) z = [[cost_func(np.array([xv[i, j], yv[i, j]]), np.zeros(2), None, None) for j in range(yv.shape[1])] for i in range(len(xv))] fig = plt.figure() ax = fig.add_subplot(111) ax.hold(True) ax.contour(xv, yv, z) n_queries = 5 u_array = np.random.rand(2, T-1).T * 2 - 1 for i in range(n_queries): #start from a perturbed point x0 = np.random.rand(2) * 4 - 2 syn_traj = lqr_traj_ctrl.synthesize_trajectory(x0, u_array) #plot it ax.plot([x0[0]], [x0[1]], 'k*', markersize=12.0) ax.plot(syn_traj[:, 0], syn_traj[:, 1], linewidth=3.5) plt.show() return
def do_transform(self, item): if type(item) is not tuple: item = (item,) def printer(*args): if self.__verbosity__ > 0: print(*args) printer('Attempting to get {}'.format(item)) A_t, B_t = self.operators A_bar_t, B_bar_t = self.operators_bar A_1, A_2, A_3, B = self.__op_cache__.operators A_1_bar, A_2_bar, A_3_bar, B_bar = self.__op_cache__.operators_bar exp_kappa_int = np.exp(self.__kappa_int__).reshape((len(self.__kappa_int__), 1)) exp_kappa_bdy = np.exp(self.__kappa_bdy__).reshape((len(self.__kappa_bdy__), 1)) grad_kappa_x = self.__grad_kappa_x__.reshape((len(self.__grad_kappa_x__), 1)) grad_kappa_y = self.__grad_kappa_y__.reshape((len(self.__grad_kappa_y__), 1)) all_things = [()] printer(exp_kappa_int.shape, exp_kappa_bdy.shape, grad_kappa_x.shape, grad_kappa_y.shape) # first explode out the objects required for i in item: if i == A_t: all_things = sum([[a + (A_1,), a + (A_2,), a + (A_3,)] for a in all_things], []) elif i == A_bar_t: all_things = sum([[a + (A_1_bar,), a + (A_2_bar,), a + (A_3_bar,)] for a in all_things], []) elif i == B_t: all_things = [a + (B,) for a in all_things] elif i == B_bar_t: all_things = [a + (B_bar,) for a in all_things] else: all_things = [a + (i,) for a in all_things] printer('Mapped {} to {}'.format(item, all_things)) def __ret(x, y, fun_args=None): return self.calc_result( x, y, fun_args, all_things, exp_kappa_int, exp_kappa_bdy, grad_kappa_x, grad_kappa_y ) return __ret
def f2(params): # mixed inputs x = params[0] # float A = params[1] # 2d array B = params[2] # 1d array return np.exp(B**2)/x * A
def sigmoid(z): return 1/(1 + np.exp(-z))
def log_likelihoods(self, data, input, mask, tag, x): mus = self.forward(x, input, tag) etas = np.exp(self.inv_etas) lls = -0.5 * np.log(2 * np.pi * etas) - 0.5 * (data[:, None, :] - mus)**2 / etas return np.sum(lls * mask[:, None, :], axis=2)
def self_weighted_logit(x): return 1.0 / (1.0 + np.exp(-np.dot(x, x)))
def sig(x): return 1 / (1 + np.exp(-1 * x)) n = kerns[0].shape[0]
def sigmoid(x): return 1. / (1. + np.exp(-x))
def mixture_of_gaussian_em(data, Q, init_params=None, weights=None, num_iters=100): """ Use expectation-maximization (EM) to compute the maximum likelihood estimate of the parameters of a Gaussian mixture model. The datapoints x_i are assumed to come from the following model: z_i ~ Cate(pi) x_i | z_i ~ N(mu_{z_i}, Sigma_{z_i}) the parameters are {pi_q, mu_q, Sigma_q} for q = 1...Q Assume: - data x_i are vectors in R^M - covariance is diagonal S_q = diag([S_{q1}, .., S_{qm}]) """ N, M = data.shape ### concatenate all marks; N = # of spikes, M = # of mark dim if init_params is not None: pi, mus, inv_sigmas = init_params assert pi.shape == (Q, ) assert np.all(pi >= 0) and np.allclose(pi.sum(), 1) assert mus.shape == (M, Q) assert inv_sigmas.shape == (M, Q) else: pi = np.ones(Q) / Q mus = npr.randn(M, Q) inv_sigmas = -2 + npr.randn(M, Q) if weights is not None: assert weights.shape == (N, ) and np.all(weights >= 0) else: weights = np.ones(N) for itr in range(num_iters): ## E-step: ## output: number of spikes by number of mixture ## attribute spikes to each Q element sigmas = np.exp(inv_sigmas) responsibilities = np.zeros((N, Q)) responsibilities += np.log(pi) for q in range(Q): responsibilities[:, q] = np.sum( -0.5 * (data - mus[None, :, q])**2 / sigmas[None, :, q] - 0.5 * np.log(2 * np.pi * sigmas[None, :, q]), axis=1) # norm.logpdf(...) responsibilities -= logsumexp(responsibilities, axis=1, keepdims=True) responsibilities = np.exp(responsibilities) ## M-step: ## take in responsibilities (output of e-step) ## compute MLE of Gaussian parameters ## mean/std is weighted means/std of mix for q in range(Q): pi[q] = np.average(responsibilities[:, q]) mus[:, q] = np.average(data, weights=responsibilities[:, q] * weights, axis=0) sqerr = (data - mus[None, :, q])**2 inv_sigmas[:, q] = np.log(1e-8 + np.average( sqerr, weights=responsibilities[:, q] * weights, axis=0)) return mus, inv_sigmas, pi
def logreg_loss(x, D, z, lbda): res = - x * np.dot(D, z) return np.mean(np.log1p(np.exp(res))) + .5 * lbda * np.sum(z ** 2)
def test_logsumexp5(): combo_check(autograd.scipy.misc.logsumexp, [0])([R(2, 3, 4)], b=[np.exp(R(2, 3, 4))], axis=[None, 0, 1], keepdims=[True, False])
def G(self, t): ''' Separation of variables: t-dependent ''' return np.exp(-self.ll * t)
def NegELBO(param, prior, X, S, Ncon, G, M, K): """ Parameters ---------- param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) variational parameters, including: 1) tau_a1: len(M), first parameter of q(alpha_m) 2) tau_a2: len(M), second parameter of q(alpha_m) 3) tau_b1: len(M), first parameter of q(beta_m) 4) tau_b2: len(M), second parameter of q(beta_m) 5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m) 6) tau_v1: len(G), first parameter of q(nu_g) 7) tau_v2: len(G), second parameter of q(nu_g) 8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of q(W^g_{dk}) 9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of q(W^g_{dk}) 10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k) 11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k) prior: dictionary the naming of keys follow those in param {'tau_a1':val1, ...} X: shape(N, D) each row represents a sample and each column represents a feature S: shape(n_con, 4) each row represents a observed constrain (expert_id, sample1_id, sample2_id, constraint_type), where 1) expert_id: varies between [0, M-1] 2) sample1 id: varies between [0, N-1] 3) sample2 id: varies between [0, N-1] 4) constraint_type: 1 means must-link and 0 means cannot-link Ncon: shape(M, 1) number of constraints provided by each expert G: int number of local consensus in the posterior truncated Dirichlet Process M: int number of experts K: int maximal number of clusters among different solutions, due to the use of discriminative clustering, some local solution might have empty clusters Returns ------- """ eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 # Note the third term should have a minus sign before it ELBO = L_1 + L_2 - L_3 #ELBO = L_1 + L_2 return -ELBO
def ELBO_terms(param, prior, X, S, Ncon, G, M, K): eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 return (L_1, L_2, L_3)
def _evaluate(self, x, out, *args, **kwargs): part1 = -1. * self.a * anp.exp(-1. * self.b * anp.sqrt((1. / self.n_var) * anp.sum(x * x, axis=1))) part2 = -1. * anp.exp((1. / self.n_var) * anp.sum(anp.cos(self.c * x), axis=1)) out["F"] = part1 + part2 + self.a + anp.exp(1)
def rbf_covariance(cov_params, x, xp): output_scale = np.exp(cov_params[0]) lengthscales = np.exp(cov_params[1:]) diffs = np.expand_dims(x /lengthscales, 1)\ - np.expand_dims(xp/lengthscales, 0) return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=2))
def g_analytic(x, gamma = 2, g0 = 10): return g0*np.exp(-gamma*x)
def classical(p): "Classical node, requires autograd.numpy functions." return anp.exp(anp.sum(quantum(p[0], anp.log(p[1]))))
def sigmoid(x): return 1 / (1 + np.exp(-x))
def grad_analytic(x, D, lbda, step, n_iter): n, p = D.shape z = gradient_descent(x, D, lbda, step, n_iter) return -np.dot(D, z) / (1. + np.exp(x * np.dot(D, z))) / n
def f(x): return np.sum([ np.log( np.exp(-y_train[i] * np.dot(scale_parameter * A_train[i], x)) + 1) for i in range(len(A_train)) ])
def softmax(X, axis=0): return np.exp(X - logsumexp(X, axis=axis, keepdims=True))
def test_logsumexp3(): combo_check(autograd.scipy.misc.logsumexp, [0], modes=['fwd', 'rev'])([R(4)], b=[np.exp(R(4))], axis=[None, 0], keepdims=[True, False])
def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum()
def fun1(B, Bdims): if Bdims: Bdims = list(range(len(Bdims))) return np.einsum(np.exp(B**2), Bdims, np.transpose(B), Bdims[::-1], [])
def build_toy_dataset(n_data=80, noise_std=0.1, D=1): rs = npr.RandomState(0) inputs = np.concatenate( [np.linspace(0, 3, num=n_data / 2), np.linspace(6, 8, num=n_data / 2)]) targets = np.cos(inputs) + rs.randn(n_data) * noise_std inputs = (inputs - 4.0) / 2.0 inputs = inputs.reshape((len(inputs), D)) targets = targets.reshape((len(targets), D)) / 2.0 return inputs, targets if __name__ == '__main__': # Specify inference problem by its unnormalized log-posterior. rbf = lambda x: np.exp(-x**2) relu = lambda x: np.maximum(x, 0.0) # Implement a 3-hidden layer neural network. num_weights, predictions, logprob = \ make_nn_funs(layer_sizes=[1, 20, 20, 1], nonlinearity=rbf) inputs, targets = build_toy_dataset() objective = lambda weights, t: -logprob(weights, inputs, targets) # Set up figure. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.show(block=False) def callback(params, t, g):
def fun0(B, Bdims): return einsum2.einsum2(np.exp(B**2), Bdims, np.transpose(B), Bdims[::-1], [])
def sample(self, z, x, input=None, tag=None): T = z.shape[0] z = np.zeros_like(z, dtype=int) if self.single_subspace else z mus = self.forward(x, input, tag) etas = np.exp(self.inv_etas) return mus[np.arange(T), z, :] + np.sqrt(etas[z]) * npr.randn(T, self.N)
def test_exp(): fun = lambda x : 3.0 * np.exp(x) d_fun = grad(fun) check_grads(fun, npr.randn()) check_grads(d_fun, npr.randn())
from autograd import grad task_params = { 'target_name': 'measured log solubility in mols per litre', 'data_file': 'delaney.csv' } N_train = 800 N_val = 20 N_test = 20 model_params = dict( fp_length=50, # Usually neural fps need far fewer dimensions than morgan. fp_depth=4, # The depth of the network equals the fingerprint(radius.) conv_width=20, # Only the neural fps need this parameter. h1_size=100, # Size of hidden layer of network on top of fps. L2_reg=np.exp(-2)) train_params = dict(num_iters=100, batch_size=100, init_scale=np.exp(-4), step_size=np.exp(-6)) # Define the architecture of the network that sits on top of the fingerprints. vanilla_net_params = dict( layer_sizes=[model_params['fp_length'], model_params['h1_size']], # One hidden layer. normalize=True, L2_reg=model_params['L2_reg'], nll_func=rmse) def train_nn(pred_fun,
def main(): #====== Setup ======= n_iters, n_samples = 2500, 500 init_vals = np.random.randn(n_samples, 1) allsamps = [] logprob = bimodal_logprob #====== Tests ======= t = dt.datetime.now() print('running 1d tests ...') samps = langevin(logprob, copy(init_vals), num_iters=n_iters, num_samples=n_samples, step_size=0.05) print('done langevin in', dt.datetime.now() - t, '\n') allsamps.append(samps) samps = MALA(logprob, copy(init_vals), num_iters=n_iters, num_samples=n_samples, step_size=0.05) print('done MALA in', dt.datetime.now() - t, '\n') allsamps.append(samps) samps = RK_langevin(logprob, copy(init_vals), num_iters=n_iters, num_samples=n_samples, step_size=0.01) print('done langevin_RK in', dt.datetime.now() - t, '\n') allsamps.append(samps) t = dt.datetime.now() samps = RWMH(logprob, copy(init_vals), num_iters=n_iters, num_samples=n_samples, sigma=0.5) print('done RW MH in', dt.datetime.now() - t, '\n') allsamps.append(samps) t = dt.datetime.now() samps = HMC(logprob, copy(init_vals), num_iters=n_iters // 5, num_samples=n_samples, step_size=0.05, num_leap_iters=5) print('done HMC in', dt.datetime.now() - t, '\n') allsamps.append(samps) #====== Plotting ======= lims = [-5, 5] names = ['langevin', 'MALA', 'langevin_RK', 'RW MH', 'HMC'] f, axes = plt.subplots(len(names), sharex=True) for i, (name, samps) in enumerate(zip(names, allsamps)): sns.distplot(samps, bins=1000, kde=False, ax=axes[i]) axb = axes[i].twinx() axb.scatter(samps, np.ones(len(samps)), alpha=0.1, marker='x', color='red') axb.set_yticks([]) zs = np.linspace(*lims, num=250) axes[i].twinx().plot(zs, np.exp(bimodal_logprob(zs)), color='orange') axes[i].set_xlim(*lims) title = name axes[i].set_title(title) plt.show()
def diag_gaussian_log_density(x, mu, log_std): return np.sum(norm.logpdf(x, mu, np.exp(log_std)), axis=-1)
def values(self, param, pos): b0 = param[0] pos = pos.T v = np.exp(-b0 * (pos[0])**2) * np.exp(-b0 * (pos[1])**2) return v