def gen_prior(K_chol, sig2_omega, sig2_mu): th = np.zeros(parser.N) N = parser.idxs_and_shapes['mus'][1][0] parser.set(th, 'betas', K_chol.dot(npr.randn(len(lam0), K)).T) parser.set(th, 'omegas', np.sqrt(sig2_omega) * npr.randn(N, K)) parser.set(th, 'mus', np.sqrt(sig2_mu) * npr.randn(N)) return th
def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100, step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam modified to do minimiax optimization, for instance to help with training generative adversarial networks.""" x_max, unflatten_max = flatten(init_params_max) x_min, unflatten_min = flatten(init_params_min) m_max = np.zeros(len(x_max)) v_max = np.zeros(len(x_max)) m_min = np.zeros(len(x_min)) v_min = np.zeros(len(x_min)) for i in range(num_iters): g_max_uf, g_min_uf = grad_both(unflatten_max(x_max), unflatten_min(x_min), i) g_max, _ = flatten(g_max_uf) g_min, _ = flatten(g_min_uf) if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i, unflatten_max(g_max), unflatten_min(g_min)) m_max = (1 - b1) * g_max + b1 * m_max # First moment estimate. v_max = (1 - b2) * (g_max**2) + b2 * v_max # Second moment estimate. mhat_max = m_max / (1 - b1**(i + 1)) # Bias correction. vhat_max = v_max / (1 - b2**(i + 1)) x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps) m_min = (1 - b1) * g_min + b1 * m_min # First moment estimate. v_min = (1 - b2) * (g_min**2) + b2 * v_min # Second moment estimate. mhat_min = m_min / (1 - b1**(i + 1)) # Bias correction. vhat_min = v_min / (1 - b2**(i + 1)) x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps) return unflatten_max(x_max), unflatten_min(x_min)
def initParam(prior, X, N, D, G, M, K, dir_param, prng): """ initialize variational parameters with prior parameters """ [tpM, tpG, lb, ub] = [np.ones(M), np.ones(G), 10., 10.] tpR = prng.rand(2*M) [tau_a1, tau_a2, tau_b1, tau_b2, tau_v1, tau_v2] = \ [lb+(ub-lb)*tpR[0 : M], tpM,\ lb+(ub-lb)*tpR[M : 2*M], tpM, \ tpG, tpG] mu_w = prng.randn(G,D,K)/np.sqrt(D) sigma_w = np.ones(G*D*K) * 1e-3 mu_b = prng.randn(G, K)/np.sqrt(D) sigma_b = np.ones(G*K) * 1e-3 phi = np.reshape(prng.dirichlet(np.ones(G)*dir_param, M), M*G) mu_w = np.reshape(mu_w, G*D*K) mu_b = np.reshape(mu_b, G*K) param_init = np.concatenate((tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1,\ tau_v2, mu_w, sigma_w, mu_b, sigma_b)) return param_init
def l2_norm(x, y): if norm_for_l2: xn = x/_np.sqrt((x * x).sum()) yn = y/_np.sqrt((y * y).sum()) else: xn, yn = x, y return ((xn - yn) ** 2).sum()
def scalar_log_lik(theta_1, theta_2, x): arg = (x - theta_1) lik1 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2)) arg = (x - theta_1 - theta_2) lik2 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2)) return np.log(0.5 * lik1 + 0.5 * lik2)
def update(self, network): for i, layer in enumerate(network.parametric_layers): for n in layer.parameters.keys(): grad = layer.parameters.grad[n] self.accu[i][n] = self.rho * self.accu[i][n] + (1.0 - self.rho) * grad ** 2 step = grad * np.sqrt(self.d_accu[i][n] + self.eps) / np.sqrt(self.accu[i][n] + self.eps) layer.parameters.step(n, -step * self.lr) self.d_accu[i][n] = self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step ** 2
def callback(weights, iter): if iter % 10 == 0: print "max of weights", np.max(np.abs(weights)) train_preds = undo_norm(pred_fun(weights, train_smiles)) cur_loss = loss_fun(weights, train_smiles, train_targets) training_curve.append(cur_loss) print "Iteration", iter, "loss", cur_loss, "train RMSE", \ np.sqrt(np.mean((train_preds - train_raw_targets)**2)), if validation_smiles is not None: validation_preds = undo_norm(pred_fun(weights, validation_smiles)) print "Validation RMSE", iter, ":", \ np.sqrt(np.mean((validation_preds - validation_raw_targets) ** 2)),
def dKdu(u, v): """ compute the grads of a given K w.r.t. u you can just switch order of args to compute it for v """ anorm = np.sqrt(np.sum(u*u)) bnorm = np.sqrt(np.sum(v*v)) den2 = (anorm * bnorm) + 1e-20 a = v / den2 b = u / np.sum(np.square(u)) c = cosine_sim(u,v) return a - b*c
def cosine_sim(a_t, b_t): """ Computes the cosine similarity of vectors a and b. Specifically \frac{u \cdot v}{||u|| \cdot ||v||}. """ # numerator is the inner product num = np.dot(a_t, b_t) # denominator is the product of the norms anorm = np.sqrt(np.sum(a_t*a_t)) bnorm = np.sqrt(np.sum(b_t*b_t)) den2 = (anorm * bnorm) + 1e-5 return num / den2
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None): sum_gsq = np.zeros_like(paramvec) sum_usq = np.zeros_like(paramvec) vals = [] for epoch in range(epochs): permuted_batches = [batches[i] for i in npr.permutation(len(batches))] for im, angle in permuted_batches: val, g = vgrad(loss)(paramvec, im, angle) sum_gsq = rho*sum_gsq + (1.-rho)*g**2 ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g sum_usq = rho*sum_usq + (1.-rho)*ud**2 paramvec = paramvec + ud vals.append(val) if callback: callback(epoch, paramvec, vals, permuted_batches) return paramvec
def cost(usv): delta = .5 u = usv[0] s = usv[1] vt = usv[2] X = np.dot(np.dot(u, np.diag(s)), vt) return np.sum(np.sqrt((X - A)**2 + delta**2) - delta)
def plot_single_gp(ax, params, layer, unit, plot_xs): ax.cla() rs = npr.RandomState(0) deep_map = create_deep_map(params) gp_details = deep_map[layer][unit] gp_params = pack_gp_params(gp_details) pred_mean, pred_cov = predict_layer_funcs[layer][unit](gp_params, plot_xs, with_noise = False, FITC = False) x0 = deep_map[layer][unit]['x0'] y0 = deep_map[layer][unit]['y0'] noise_scale = deep_map[layer][unit]['noise_scale'] marg_std = np.sqrt(np.diag(pred_cov)) if n_samples_to_plot > 19: ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov*(random), size=n_samples_to_plot) ax.plot(plot_xs, sampled_funcs.T) ax.plot(x0, y0, 'ro') #ax.errorbar(x0, y0, yerr = noise_scale, fmt='o') ax.set_xticks([]) ax.set_yticks([])
def loglikelihood(self, g, beta, mu_ivp, alpha, pi, priors): logprobs = [] for i, ifx in enumerate(self._ifix): # get the logprobability for each mixture component ll = 0. zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): ll += norm.logpdf( yq, zM[..., q], scale=1/np.sqrt(alpha)).sum() logprobs.append(ll + np.log(pi[i])) logprobs = np.array(logprobs) lpmax = max(logprobs) loglik = lpmax + np.log(np.exp(logprobs - lpmax).sum()) Cg = self.latentforces[0].kernel(self.ttc[:, None]) Cg[np.diag_indices_from(Cg)] += 1e-5 Lg = np.linalg.cholesky(Cg) logprior = -0.5 * g.dot(cho_solve((Lg, True), g)) - \ np.log(np.diag(Lg)).sum() - \ Lg.shape[0] / 2 * np.log(2 * np.pi) for vn, x in zip(['beta'], beta): try: prior_logpdf = priors[vn] logprior += prior_logpdf(x) except KeyError: pass return loglik + logprior
def _get_responsibilities(self, pi, g, beta, mu_ivp, alpha): """ Gets the posterior responsibilities for each comp. of the mixture. """ probs = [[]]*len(self.N_data) for i, ifx in enumerate(self._ifix): zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): logprob = norm.logpdf( yq, zM[self.data_inds[q], :, q], scale=1/np.sqrt(alpha)) # sum over the dimension component logprob = logprob.sum(-1) if probs[q] == []: probs[q] = logprob else: probs[q] = np.column_stack((probs[q], logprob)) probs = [lp - pi for lp in probs] # subtract the maxmium for exponential normalize probs = [p - np.atleast_1d(p.max(axis=-1))[:, None] for p in probs] probs = [np.exp(p) / np.exp(p).sum(-1)[:, None] for p in probs] return probs
def predict_percentile(self, X, ancillary_X=None, p=0.5): """ Returns the median lifetimes for the individuals, by default. If the survival curve of an individual does not cross ``p``, then the result is infinity. http://stats.stackexchange.com/questions/102986/percentile-loss-functions Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. p: float, optional (default=0.5) the percentile, must be between 0 and 1. Returns ------- percentiles: DataFrame See Also -------- predict_median """ exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) return pd.DataFrame(exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * p - 1)), index=_get_index(X))
def adam(grad, x, batch_id=None, num_batches=None, callback=None, num_iters=100, step_size=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" m = np.zeros(len(x)) v = np.zeros(len(x)) if batch_id is not None: scale_factor = (2**(num_batches-batch_id)) / (2**(num_batches-1)) else: scale_factor = 1 for i in range(num_iters): g = grad(x, scale_factor) if callback: callback(x, i, g) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + 1)) # Bias correction. vhat = v / (1 - b2**(i + 1)) x -= step_size*mhat/(np.sqrt(vhat) + eps) return x
def test_div(): fun = lambda x, y : x / y make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5) for arg1, arg2 in arg_pairs(): arg1 = make_gap_from_zero(arg1) arg2 = make_gap_from_zero(arg2) check_grads(fun)(arg1, arg2)
def opt_traj(func, fdict, T, opt_method = 'SGD', init = None, \ learning_rate = 0.1, seed = 100, momentum = False, noise_level = 0.0): # do optimization and return the trajectory params = {'x': 0.0, 'y': 0.0} domain = fdict['domain'] optimum = fdict['optimum'] loss_and_grad = value_and_grad(func) #quick_grad_check(func, params) params = init_params(params, domain, init, seed) check_grads(func, params) opt_server = Parameter_Server(opt_method, momentum) opt_server.init_gradient_storage(params) x_traj = [] y_traj = [] f_traj = [] print 'optimising function using %s...' % opt_method for t in xrange(T): (func_value, func_grad) = loss_and_grad(params) x_traj.append(params['x']) y_traj.append(params['y']) f_traj.append(func_value) func_grad = inject_noise(func_grad, noise_level) if opt_method == 'SGD': norm = np.sqrt(func_grad['x'] ** 2 + func_grad['y'] ** 2) if norm >= 2.0: func_grad['x'] /= norm / 2; func_grad['y'] /= norm / 2 params = opt_server.update(params, func_grad, learning_rate) return np.array(x_traj), np.array(y_traj), np.array(f_traj)
def multivariate_t_rvs(self, m, S, random_state = None): '''generate random variables of multivariate t distribution Parameters ---------- m : array_like mean of random variable, length determines dimension of random variable S : array_like square array of covariance matrix df : int or float degrees of freedom n : int number of observations, return random array will be (n, len(m)) random_state : int seed Returns ------- rvs : ndarray, (n, len(m)) each row is an independent draw of a multivariate t distributed random variable ''' np.random.rand(9) m = np.asarray(m) d = self.n_features df = self.degree_freedom n = 1 if df == np.inf: x = 1. else: x = random_state.chisquare(df, n)/df np.random.rand(90) z = random_state.multivariate_normal(np.zeros(d),S,(n,)) return m + z/np.sqrt(x)[:,None]
def ackley(x): a, b, c = 20.0, -0.2, 2.0*np.pi len_recip = 1.0/len(x) sum_sqrs = sum(x*x) sum_cos = sum(np.cos(c*x)) return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) - np.exp(len_recip*sum_cos) + a + np.e)
def log_prior_density(theta): alpha = 2 beta = 0.5 mu = np.log(alpha/beta) sigma = np.log(np.sqrt(alpha/(beta**2))) params = np.array([mu,sigma]) return log_variational(params, theta)
def callback(params): print("Log likelihood {}".format(-objective(params))) plt.cla() print(params) # Show posterior marginals. plot_xs = np.reshape(np.linspace(-7, 7, 300), (300,1)) pred_mean, pred_cov = predict(params, X, y, plot_xs) marg_std = np.sqrt(np.diag(pred_cov)) ax.plot(plot_xs, pred_mean, 'b') ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]), np.concatenate([pred_mean - 1.96 * marg_std, (pred_mean + 1.96 * marg_std)[::-1]]), alpha=.15, fc='Blue', ec='None') # Show samples from posterior. rs = npr.RandomState(0) sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10) ax.plot(plot_xs, sampled_funcs.T) ax.plot(X, y, 'kx') ax.set_ylim([-1.5, 1.5]) ax.set_xticks([]) ax.set_yticks([]) plt.draw() plt.pause(1.0/60.0)
def __init__(self, mu, var): self.norm_const = - 0.5*np.log(2*np.pi) self.mu = np.atleast_1d(mu).flatten() self.var = np.atleast_1d(var).flatten() self.dim = np.prod(self.var.shape) assert(self.mu.shape == self.var.shape) self.std = np.sqrt(var) self.logstd = np.log(self.std)
def test_mod(): fun = lambda x, y : x % y make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5) for arg1, arg2 in arg_pairs(): if not arg1 is arg2: # Gradient undefined at x == y arg1 = make_gap_from_zero(arg1) arg2 = make_gap_from_zero(arg2) check_grads(fun)(arg1, arg2)
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1/precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def rmsprop(grad, x, callback=None, num_iters=100, step_size=0.1, gamma=0.9, eps = 10**-8): """Root mean squared prop: See Adagrad paper for details.""" avg_sq_grad = np.ones(len(x)) for i in range(num_iters): g = grad(x, i) if callback: callback(x, i, g) avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma) x -= step_size * g/(np.sqrt(avg_sq_grad) + eps) return x
def sample_from_mvn(mu, sigma,rs = npr.RandomState(0),FITC = False): if FITC: #if not np.allclose(sigma, np.diag(np.diag(sigma))): # print("NOT DIAGONAL") # return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu return np.dot(np.sqrt(sigma+1e-6),rs.randn(len(sigma)))+mu if random == 1 else mu #return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu else: return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
def natural_sample(J, h, num_samples=None, rng=rng): sample_shape = (num_samples,) + h.shape if num_samples else h.shape J = -2*J if J.ndim == 1: return h / J + rng.normal(size=sample_shape) / np.sqrt(J) else: L = np.linalg.cholesky(J) noise = solve_triangular(L, rng.normal(size=sample_shape).T, trans='T') return solve_posdef_from_cholesky(L, h.T).T + noise.T
def get_error_and_ll(w, v_prior, X, y, K, location, scale): v_noise = np.exp(parser.get(w, 'log_v_noise')[ 0, 0 ]) * scale**2 q = get_parameters_q(w, v_prior) samples_q = draw_samples(q, K) outputs = predict(samples_q, X) * scale + location log_factor = -0.5 * np.log(2 * math.pi * v_noise) - 0.5 * (np.tile(y, (1, K)) - np.array(outputs))**2 / v_noise ll = np.mean(logsumexp(log_factor - np.log(K), 1)) error = np.sqrt(np.mean((y - np.mean(outputs, 1, keepdims = True))**2)) return error, ll
def devec_ackley(x): a, b, c = 20.0, -0.2, 2.0*np.pi len_recip = 1.0/len(x) sum_sqrs, sum_cos = 0.0, 0.0 for i in x: sum_cos += np.cos(c*i) sum_sqrs += i*i return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) - np.exp(len_recip*sum_cos) + a + np.e)
def stepSize(iteration, sPrev, gradient, eta=1.0): sCur = 0.1 * (gradient**2) + 0.9 * sPrev step = eta * np.power(iteration, -0.5 + 1e-16) / (1. + np.sqrt(sCur)) return step, sCur
def external_R( self, θ: "Model parameters", dθ: "derivatives of model parameters" ) -> "Extrinsic curvature radius": v2 = self.external_v2(θ, dθ) a2 = self.external_a2(θ, dθ) return v2 / np.sqrt(a2)
def each_manifold_analysis_D1(sD1, kappa, n_t, eps=1e-8, t_vec=None): ''' This function computes the manifold capacity a_Mfull, the manifold radius R_M, and manifold dimension D_M with margin kappa using n_t randomly sampled vectors for a single manifold defined by a set of points sD1. Args: sD1: 2D array of shape (D+1, m) where m is number of manifold points kappa: Margin size (scalar) n_t: Number of randomly sampled vectors to use eps: Minimal distance (default 1e-8) t_vec: Optional 2D array of shape (D+1, m) containing sampled t vectors to use in evaluation Returns: a_Mfull: Calculated capacity (scalar) R_M: Calculated radius (scalar) D_M: Calculated dimension (scalar) ''' # Get the dimensionality and number of manifold points D1, m = sD1.shape # D+1 dimensional data D = D1 - 1 # Sample n_t vectors from a D+1 dimensional standard normal distribution unless a set is given if t_vec is None: t_vec = np.random.randn(D1, n_t) # Find the corresponding manifold point for each random vector ss, gg = maxproj(t_vec, sD1) # Compute V, S~ for each random vector s_all = np.empty((D1, n_t)) f_all = np.zeros(n_t) for i in range(n_t): # Get the t vector to use (keeping dimensions) t = np.expand_dims(t_vec[:, i], axis=1) if gg[i] + kappa < 0: # For this case, a solution with V = T is allowed by the constraints, so we don't need to # find it numerically v_f = t s_f = ss[:, i].reshape(-1, 1) else: # Get the solution for this t vector v_f, _, _, alpha, vminustsqk = minimize_vt_sq(t, sD1, kappa=kappa) f_all[i] = vminustsqk # If the solution vector is within eps of t, set them equal (interior point) if np.linalg.norm(v_f - t) < eps: v_f = t s_f = ss[:, i].reshape(-1, 1) else: # Otherwise, compute S~ from the solution scale = np.sum(alpha) s_f = (t - v_f) / scale # Store the calculated values s_all[:, i] = s_f[:, 0] # Compute the capacity from eq. 16, 17 in 2018 PRX paper. max_ts = np.maximum(np.sum(t_vec * s_all, axis=0) + kappa, np.zeros(n_t)) s_sum = np.sum(np.square(s_all), axis=0) lamb = np.asarray( [max_ts[i] / s_sum[i] if s_sum[i] > 0 else 0 for i in range(n_t)]) slam = np.square(lamb) * s_sum a_Mfull = 1 / np.mean(slam) # Compute R_M from eq. 28 of the 2018 PRX paper ds0 = s_all - s_all.mean(axis=1, keepdims=True) ds = ds0[0:-1, :] / s_all[-1, :] ds_sq_sum = np.sum(np.square(ds), axis=0) R_M = np.sqrt(np.mean(ds_sq_sum)) # Compute D_M from eq. 29 of the 2018 PRX paper t_norms = np.sum(np.square(t_vec[0:D, :]), axis=0, keepdims=True) t_hat_vec = t_vec[0:D, :] / np.sqrt(t_norms) s_norms = np.sum(np.square(s_all[0:D, :]), axis=0, keepdims=True) s_hat_vec = s_all[0:D, :] / np.sqrt(s_norms + 1e-12) ts_dot = np.sum(t_hat_vec * s_hat_vec, axis=0) D_M = D * np.square(np.mean(ts_dot)) return a_Mfull, R_M, D_M
def metric_tetrad(self, g) -> "Finds tetrad and inverse tetrad of g": v, e0 = self.metric_eigenproblem(g) e = np.einsum('ia,ab->ib', e0, np.diag(np.sqrt(v))) einv = np.einsum('ia,ab->ib', e0, np.diag(1 / np.sqrt(v))) return e, einv
def fun_FA(centers, maxK, max_iter, n_repeats, s_all=None, verbose=False, conjugate_gradient=True): ''' Extracts the low rank structure from the data given by centers Args: centers: 2D array of shape (N, P) where N is the ambient dimension and P is the number of centers maxK: Maximum rank to consider max_iter: Maximum number of iterations for the solver n_repeats: Number of repetitions to find the most stable solution at each iteration of K s: (Optional) iterable containing (P, 1) random normal vectors Returns: norm_coeff: Ratio of center norms before and after optimzation norm_coeff_vec: Mean ratio of center norms before and after optimization Proj: P-1 basis vectors V1_mat: Solution for each value of K res_coeff: Cost function after optimization for each K res_coeff0: Correlation before optimization ''' N, P = centers.shape # Configure the solver opts = {'max_iter': max_iter, 'gtol': 1e-6, 'xtol': 1e-6, 'ftol': 1e-8} # Subtract the global mean mean = np.mean(centers.T, axis=0, keepdims=True) Xb = centers.T - mean xbnorm = np.sqrt(np.square(Xb).sum(axis=1, keepdims=True)) # Gram-Schmidt into a P-1 dimensional basis q, r = qr(Xb.T, mode='economic') X = np.matmul(Xb, q[:, 0:P - 1]) # Sore the (P, P-1) dimensional data before extracting the low rank structure X0 = X.copy() xnorm = np.sqrt(np.square(X0).sum(axis=1, keepdims=True)) # Calculate the correlations C0 = np.matmul(X0, X0.T) / np.matmul(xnorm, xnorm.T) res_coeff0 = (np.sum(np.abs(C0)) - P) * 1 / (P * (P - 1)) # Storage for the results V1_mat = [] C0_mat = [] norm_coeff = [] norm_coeff_vec = [] res_coeff = [] # Compute the optimal low rank structure for rank 1 to maxK V1 = None for i in range(1, maxK + 1): best_stability = 0 for j in range(1, n_repeats + 1): # Sample a random normal vector unless one is supplied if s_all is not None and len(s_all) >= i: s = s_all[i * j - 1] else: s = np.random.randn(P, 1) # Create initial V. sX = np.matmul(s.T, X) if V1 is None: V0 = sX else: V0 = np.concatenate([sX, V1.T], axis=0) V0, _ = qr(V0.T, mode='economic') # (P-1, i) # Compute the optimal V for this i V1tmp, output = CGmanopt( V0, partial(square_corrcoeff_full_cost, grad=False), X, **opts) # Compute the cost cost_after, _ = square_corrcoeff_full_cost(V1tmp, X, grad=False) # Verify that the solution is orthogonal within tolerance assert np.linalg.norm(np.matmul(V1tmp.T, V1tmp) - np.identity(i), ord='fro') < 1e-10 # Extract low rank structure X0 = X - np.matmul(np.matmul(X, V1tmp), V1tmp.T) # Compute stability of solution denom = np.sqrt(np.sum(np.square(X), axis=1)) stability = min(np.sqrt(np.sum(np.square(X0), axis=1)) / denom) # Store the solution if it has the best stability if stability > best_stability: best_stability = stability best_V1 = V1tmp if n_repeats > 1 and verbose: print(j, 'cost=', cost_after, 'stability=', stability) # Use the best solution V1 = best_V1 # Extract the low rank structure XV1 = np.matmul(X, V1) X0 = X - np.matmul(XV1, V1.T) # Compute the current (normalized) cost xnorm = np.sqrt(np.square(X0).sum(axis=1, keepdims=True)) C0 = np.matmul(X0, X0.T) / np.matmul(xnorm, xnorm.T) current_cost = (np.sum(np.abs(C0)) - P) * 1 / (P * (P - 1)) if verbose: print('K=', i, 'mean=', current_cost) # Store the results V1_mat.append(V1) C0_mat.append(C0) norm_coeff.append((xnorm / xbnorm)[:, 0]) norm_coeff_vec.append(np.mean(xnorm / xbnorm)) res_coeff.append(current_cost) # Break the loop if there's been no reduction in cost for 3 consecutive iterations if (i > 4 and res_coeff[i - 1] > res_coeff[i - 2] and res_coeff[i - 2] > res_coeff[i - 3] and res_coeff[i - 3] > res_coeff[i - 4]): if verbose: print("Optimal K0 found") break return norm_coeff, norm_coeff_vec, q[:, 0:P - 1], V1_mat, res_coeff, res_coeff0
def NegELBO(param, prior, X, S, Ncon, G, M, K): """ Parameters ---------- param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) variational parameters, including: 1) tau_a1: len(M), first parameter of q(alpha_m) 2) tau_a2: len(M), second parameter of q(alpha_m) 3) tau_b1: len(M), first parameter of q(beta_m) 4) tau_b2: len(M), second parameter of q(beta_m) 5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m) 6) tau_v1: len(G), first parameter of q(nu_g) 7) tau_v2: len(G), second parameter of q(nu_g) 8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of q(W^g_{dk}) 9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of q(W^g_{dk}) 10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k) 11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k) prior: dictionary the naming of keys follow those in param {'tau_a1':val1, ...} X: shape(N, D) each row represents a sample and each column represents a feature S: shape(n_con, 4) each row represents a observed constrain (expert_id, sample1_id, sample2_id, constraint_type), where 1) expert_id: varies between [0, M-1] 2) sample1 id: varies between [0, N-1] 3) sample2 id: varies between [0, N-1] 4) constraint_type: 1 means must-link and 0 means cannot-link Ncon: shape(M, 1) number of constraints provided by each expert G: int number of local consensus in the posterior truncated Dirichlet Process M: int number of experts K: int maximal number of clusters among different solutions, due to the use of discriminative clustering, some local solution might have empty clusters Returns ------- """ eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 # Note the third term should have a minus sign before it ELBO = L_1 + L_2 - L_3 #ELBO = L_1 + L_2 return -ELBO
def cost(tensor, home, appliance, time): pred = np.einsum('Hh, hAt, tT ->HAT', home, appliance, time) mask = ~np.isnan(tensor) error = (pred - tensor)[mask].flatten() return np.sqrt((error**2).mean())
def sample(self, z, x, input=None, tag=None): T = z.shape[0] z = np.zeros_like(z, dtype=int) if self.single_subspace else z mus = self.forward(x, input, tag) etas = np.exp(self.inv_etas) return mus[np.arange(T), z, :] + np.sqrt(etas[z]) * npr.randn(T, self.N)
gpcsd_gen.temporal_cov_list[1].params['ell']['value'] = elltM_true gpcsd_gen.temporal_cov_list[1].params['sigma2']['value'] = sig2tM_true # %% Generate CSD and sample at interior electrode positions for comparing to tCSD csd = gpcsd_gen.sample_prior(2 * ntrials) csd_interior_electrodes = np.zeros((nx - 2, nt, 2 * ntrials)) for trial in range(2 * ntrials): csdinterp = scipy.interpolate.RectBivariateSpline(z, t, csd[:, :, trial]) csd_interior_electrodes[:, :, trial] = csdinterp(xshort, t) # %% Pass through forward model, add white noise lfp = np.zeros((nx, nt, 2 * ntrials)) for trial in range(2 * ntrials): lfp[:, :, trial] = fwd_model_1d(csd[:, :, trial], z, x, R_true) lfp = lfp + np.random.normal( 0, np.sqrt(sig2n_true), size=(nx, nt, 2 * ntrials)) lfp = normalize(lfp) # %% Visualize one trial plt.figure() plt.subplot(121) plt.imshow(csd[:, :, 0], vmin=-1, vmax=1, cmap='bwr', aspect='auto') plt.title('CSD') plt.xlabel('Time') plt.ylabel('depth') plt.colorbar() plt.subplot(122) plt.imshow(lfp[:, :, 0], cmap='bwr', aspect='auto') plt.title('LFP') plt.xlabel('Time') plt.colorbar()
def lognorm(self,ws): return np.exp(-0.5*(np.log(ws) - self.norm_mean)**2 /self.norm_sig**2)/np.sqrt(2*np.pi)/self.norm_sig/ws;
def mat_cosine_dist(X, Y): prod = np.diagonal(np.dot(X, Y.T), offset=0, axis1=-1, axis2=-2) len1 = np.sqrt(np.diagonal(np.dot(X, X.T), offset=0, axis1=-1, axis2=-2)) len2 = np.sqrt(np.diagonal(np.dot(Y, Y.T), offset=0, axis1=-1, axis2=-2)) return np.divide(np.divide(prod, len1), len2)
def fit(self, batch_size, epochs=500, learning_rate=0.0001): """STEP 1: Set up what the optimization routine will be""" """Just to streamline with GVI code, re-name variables""" self.M = min(batch_size, self.n) Y = self.Y X = self.X """Create objective & take gradient""" objective = self.create_objective() objective_gradient = grad(objective) params = self.params """STEP 2: Sample from X, Y and perform ADAM steps""" """STEP 2.1: These are just the ADAM optimizer default settings""" m1 = 0 m2 = 0 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-8 t = 0 """STEP 2.2: Loop over #epochs and take step for each subsample""" for epoch in range(epochs): """STEP 2.2.1: For each epoch, shuffle the data""" permutation = np.random.choice(range(Y.shape[0]), Y.shape[0], replace=False) """HERE: Should add a print statement here to monitor algorithm!""" if epoch % 100 == 0: print("epoch #", epoch, "/", epochs) #print("sigma2", np.exp(-q_params[3])) """STEP 2.2.2: Process M data points together and take one step""" for i in range(0, int(self.n / self.M)): """Get the next M observations (or less if we would run out of observations otherwise)""" end = min(self.n, (i + 1) * self.M) indices = permutation[(i * self.M):end] """ADAM step for this batch""" t += 1 if X is not None: if False: print("Y", Y[indices]) print( "X*coefs", np.matmul(X[indices, :], np.array([1.0, -2.0, 0.5, 4.0, -3.5]))) print("X*params", np.matmul(X[indices, :], params[:-1])) grad_params = objective_gradient(params, self.parser, Y[indices], X[indices, :]) else: grad_params = objective_gradient(params, self.parser, Y[indices], X_=None) # print(grad_params) # print("before:", params) m1 = beta1 * m1 + (1 - beta1) * grad_params m2 = beta2 * m2 + (1 - beta2) * grad_params**2 m1_hat = m1 / (1 - beta1**t) m2_hat = m2 / (1 - beta2**t) params -= learning_rate * m1_hat / (np.sqrt(m2_hat) + epsilon) # print("after", params) self.params = params
def ELBO_terms(param, prior, X, S, Ncon, G, M, K): eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 return (L_1, L_2, L_3)
def embedded_ω(self, H, P, g, dθ) -> "Extrinsic frequency": v2 = self.embedded_velocity2(g, dθ) a2 = self.embedded_acceleration2(H, P, dθ) return np.sqrt(np.abs(v2 * a2))
def func(y, t, arg1, arg2): return -np.sqrt(t) - y + arg1 - np.mean((y + arg2)**2)
def test_unary(): grad_test(lambda x: ti.sqrt(x), lambda x: np.sqrt(x)) grad_test(lambda x: ti.exp(x), lambda x: np.exp(x)) grad_test(lambda x: ti.log(x), lambda x: np.log(x))
from ubvi.autograd import logsumexp def logp(x): return (-np.log(1 + x**2) - np.log(np.pi)).flatten() np.random.seed(1) N_runs = 20 N = 30 d = 1 diag = True n_samples = 2000 n_logfg_samples = 10000 adam_learning_rate = lambda itr: 10. / np.sqrt(itr + 1) adam_num_iters = 10000 n_init = 10000 init_inflation = 16 lmb = lambda itr: 1. / np.sqrt(1 + itr) gauss = Gaussian(d, diag) adam = lambda x0, obj, grd: ubvi_adam(x0, obj, grd, adam_learning_rate, adam_num_iters, callback=gauss.print_perf) if not os.path.exists('results/'): os.mkdir('results')
import sys sys.path.append('../../src/') import autograd.numpy as np from dOTDmodel import dOTDModel from gendata import GenData from rhs import rhs_3D if __name__ == '__main__': notd = 3 # Number of dOTD modes to be learned npts = 10 # Number of training points rhs = rhs_3D # Right-hand side in governing equations ### Generate long trajectory mu = 0.1; sqmu = np.sqrt(mu) z0 = np.array([sqmu*np.cos(1), sqmu*np.sin(1), mu+1e-3]) ndim = z0.shape[0] u0 = np.array([[-0.84, -0.40, -0.36], \ [ 0.54, -0.63, -0.55], \ [ 0.00, -0.65, 0.75]]) dt = 0.01 tf = 50 gen = GenData(z0, u0, tf, dt, rhs) t, Z, U = gen.trajectory() ### Generate training, validation, and testing sets kwargs = dict(rec=True, n_neighbors=7) ind_trn = np.where((t >= 20) & (t < 20+2*np.pi))[0] a = np.floor(len(ind_trn)/(npts-1)) ind_trn = ind_trn[::int(a)]
def external_ωv( self, θ: "Model parameters", dθ: "derivatives of model parameters" ) -> "Extrinsic normalized frequency": v2 = self.external_v2(θ, dθ) a2 = self.external_a2(θ, dθ) return np.sqrt(np.abs(a2))
def gaussian(x, loc=None, scale=None): ''' N(x; loc, scale) ''' y = (x - loc) / scale return np.exp(-0.5 * y**2) / np.sqrt(2. * np.pi) / scale
[initial_mean, initial_log_sigma]) # Optimize print("-> Optimizing variational parameters...") print("-> Initial ELBO: {}".format(ELBO(initial_variational_params))) vparams = initial_variational_params for epoch in range(num_epochs): lr = next(sched) ### Adam optimizer g = gradient(vparams) m = beta1 * m + (1 - beta1) * g v = beta2 * v + (1 - beta2) * (g**2) # Correcting biased terms mhat = m / (1 - beta1**(epoch + 1)) vhat = v / (1 - beta2**(epoch + 1)) # Update step vparams -= lr * mhat / (np.sqrt(vhat) + epsilon) ### Logging and sampling from posterior print("Epoch {} -> ELBO: {}".format(epoch, ELBO(vparams))) # Sample from posterior num_posterior_samples = 10 mu, log_sigma = vparams[:num_weights], vparams[num_weights:] posterior_samples = mu + np.exp(log_sigma) * np.random.randn( num_posterior_samples, num_weights) plot_inputs = np.linspace(-8, 8, num=400) outputs = forward(posterior_samples, np.expand_dims(plot_inputs, 1)) # Plot plt.cla() ax.plot(inputs.ravel(), targets.ravel(), 'bx') ax.plot(plot_inputs, outputs[:, :, 0].T)
def percentile(self, p): return np.exp(self.mu_ + np.sqrt(2 * self.sigma_**2) * erfinv(1 - 2 * p))
def molecular_pursuit( target, code_coefs, basis_size=4, rtol=0.01, # Dependence on this parameter is sensitive rcond=1e-3, # lax colinearity condition for approximation cutoff=100.0, # higher gains than this are unlikely to be stable pitched=True, match_rtol=0.01, # stop search early if good enough verbose=0, **molecule_args): """ In the molecular matching pursuit problem we approximate a target correlation profile with a codebook of correlation molecules by maximising inner product. There are various distinctions between this and the atomic case. * we need to preserve the identity of the molecules * we don't need to precondition the tau term since tau and w are now coupled * so we only have 2 coefs * no bias term * tedious to get a basis dictionary that has unit gain * despite this we normalise and do a true matching pursuit * ... """ n_pts = target.size t = np.arange(n_pts) scale = max(np.sqrt(np.sum(np.square(target))), 1e-8) init_scale = scale deviance = scale # print("scale", scale) residual = target gain_rate = np.zeros((2, basis_size)) gain_rate[1] = 1 molecule_idx = -np.ones(basis_size, dtype=int) basis_eval = np.zeros((n_pts, basis_size)) for i in range(basis_size): # print("-----", i) # progress = (i-1) / (basis_size-2) rate, idx = choose_molecule( residual, code_coefs, t=t, pitched=pitched, verbose=verbose, match_rtol=match_rtol, **molecule_args, ) molecule_idx[i] = idx # print('prm', mag, w, tau, phi) gain_rate[:, i] = rate # molecule_eval = decaycos_eval(t, 1, w, tau, phi).ravel() # residual = target - molecule_eval basis_eval[:, i] = molecular_eval( t, *molecular_scale(code_coefs[idx], 1.0, rate)) gain_scale, sum_resid, rank, s = lstsq(basis_eval[:, :i + 1], target.reshape(-1, 1), rcond=rcond) # print(basis_eval[:, :i + 1]) # print("gain scale\n", gain_scale.ravel()) # print("gain rate\n", gain_rate[:, :i + 1]) if ((np.max(np.abs(gain_rate[0, :i + 1] * gain_scale.ravel())) > cutoff) and verbose >= 14): # exploding results indicate colinear molecules; warnings.warn( 'exploding solution at step {}\n' 'try raising `rcond`:\n' '{}->{}'.format( i, gain_scale.ravel(), gain_rate[0, :i + 1].ravel() * gain_scale.ravel())) # Now what? else: gain_rate[0, :i + 1] *= gain_scale.ravel() basis_eval[:, :i + 1] *= gain_scale.reshape((1, -1)) curr_approx = np.sum(basis_eval[:, :i + 1], axis=1) new_residual = target - curr_approx # plt.figure() # plt.plot(target, label='target') # plt.plot(curr_approx, label='hat') # plt.plot(residual, label='oldres') # plt.plot(new_residual, label='newres') # plt.plot(basis_eval[:, i]*mag, label='new_molecular') # plt.legend() # plt.show() # import pdb; pdb.set_trace() residual = new_residual new_deviance = np.sqrt(np.sum(np.square(new_residual))) # print("deviance", deviance, "/", scale) if deviance - new_deviance < rtol * scale: # we didn't improve the match so we won't next step either if verbose >= 17: print("failed to improve", new_deviance, "-", deviance, "<", rtol, "*", scale) break deviance = new_deviance scale /= new_deviance loss = deviance / scale return gain_rate[:, :i + 1], molecule_idx[:i + 1], loss, init_scale
def _evaluate(self, x, out, *args, **kwargs): part1 = -1. * self.a * anp.exp(-1. * self.b * anp.sqrt((1. / self.n_var) * anp.sum(x * x, axis=1))) part2 = -1. * anp.exp((1. / self.n_var) * anp.sum(anp.cos(self.c * x), axis=1)) out["F"] = part1 + part2 + self.a + anp.exp(1)
def choose_molecule_pitch_opt(target, code_coef, maxiter=5, t=None, lr=0.01, low_pitch=0.5**0.5, high_pitch=2.0**0.5, n_starts=65, trace=False, pdb=False, verbose=0, norm_method='analytic', **molecule_args): """ choose pitch for one molecule and return inner product at that pitch """ if t is None: t = np.arange(target.size) rates = np.exp( np.linspace(np.log(low_pitch), np.log(high_pitch), n_starts + 2, endpoint=True)[1:-1]) max_step = (high_pitch - low_pitch) / n_starts def multi_objective(rates): """ normalised inner product for each rate """ molecules = [molecular_scale( code_coef, 1, rate, ) for rate in rates] normecules = np.array([ molecular_eval_norm(t, *molecule, norm_method=norm_method, verbose=verbose) for molecule in molecules ]) if not np.all(np.isfinite(normecules)) and verbose >= 1: exploded = np.isfinite(normecules.sum(1)) warnings.warn( "{} normed molecules {} exploded with\n{} at rates\n{}".format( np.sum(exploded), normecules.shape, code_coef, rates[exploded])) obj = np.array([np.dot(normecule, target) for normecule in normecules]) return np.nan_to_num(obj) grad = elementwise_grad(multi_objective) # f, axarr = plt.subplots(2, 1) if trace: trace_list = [] for step_i in range(maxiter): # gradient ascent jac = grad(rates) if not np.all(np.isfinite(jac)) and verbose >= 1: warnings.warn( "jac exploded {}\nfor coefs \n{}\nat rate {}\nwith obj {}". format(jac, code_coef, rates, multi_objective(rates))) jac = np.nan_to_num(jac) step = np.clip(lr * jac, -max_step, max_step) if pdb: print(step_i, "jac", np.sqrt((jac**2).mean()), "step", np.sqrt((step**2).mean())) from IPython.core.debugger import set_trace set_trace() # val = multi_objective(rates) # best = np.argmax(val) # stepsize = jac[best] # print('stepsize', stepsize) # axarr[0].quiver( # rates, # X # val, # Y # step, # U # np.zeros_like(step), # V # np.full_like(step, step_i/(maxiter-1)), # C # cmap="magma", # angles='xy', # label="step {}".format(step_i)) # axarr[1].scatter( # rates, # X # val, # Y # cmap="magma", # label="step {}".format(step_i)) rates = rates + step # gradient ascent step rates = np.clip(rates, low_pitch, high_pitch) if trace: trace_list.append((multi_objective(rates), rates, jac, step)) if verbose >= 21: max_goodness = np.amax(multi_objective(rates)) print( "max_goodness at ", step_i, ) if not np.isfinite(max_goodness): from IPython.core.debugger import set_trace set_trace() if trace: return trace_list goodnesses = multi_objective(rates) best_idx = np.argmax(goodnesses) if verbose >= 11: print( "choose_molecule_pitch_opt", best_idx, rates[best_idx], "@", goodnesses[best_idx], ) return rates[best_idx], goodnesses[best_idx]
def cost(tensor, home, appliance, day, hour): pred = np.einsum('Hr, Ar, Dr, ATr ->HADT', home, appliance, day, hour) mask = ~np.isnan(tensor) error = (pred - tensor)[mask].flatten() return np.sqrt((error**2).mean())
def test_sqrt(): fun = lambda x : 3.0 * np.sqrt(x) d_fun = grad(fun) check_grads(fun, 10.0*npr.rand()) check_grads(d_fun, 10.0*npr.rand())
def embedded_radius(self, H, P, g, dθ) -> "Extrinsic curvature radius": v2 = self.embedded_velocity2(g, dθ) a2 = self.embedded_acceleration2(H, P, dθ) return v2 / np.sqrt(a2)
def fun(x): return np.sqrt(x)
def test_typical_dist(self): np_testing.assert_almost_equal(self.manifold.typical_dist, np.sqrt(self.n * self.k))