def _ll(self, m, p, a, xn, xln, **kwargs): """Computation of log likelihood Dimensions ---------- m : n_unique x n_features p : n_unique x n_features x n_features a : n_unique x n_lags (shared_alpha=F) OR 1 x n_lags (shared_alpha=T) xn: N x n_features xln: N x n_features x n_lags """ samples = xn.shape[0] xn = xn.reshape(samples, 1, self.n_features) m = m.reshape(1, self.n_unique, self.n_features) det = np.linalg.det(np.linalg.inv(p)) det = det.reshape(1, self.n_unique) lagged = np.dot(xln, a.T) # NFU lagged = np.swapaxes(lagged, 1, 2) # NUF xm = xn-(lagged + m) tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm) res = (-self.n_features/2.0)*np.log(2*np.pi) - 0.5*tem - 0.5*np.log(det) return res
def ns_loss_a(Wsub): h = Wsub[0, :N] vwo = Wsub[1, N:] vwi_negs = Wsub[2:, N:] vwo_h = npa.dot(vwo, h) vwi_negs_h = npa.dot(vwi_negs, h) return -npa.log(siga(vwo_h)) - npa.sum(npa.log(siga(-vwi_negs_h)))
def log_prior_density(theta): alpha = 2 beta = 0.5 mu = np.log(alpha/beta) sigma = np.log(np.sqrt(alpha/(beta**2))) params = np.array([mu,sigma]) return log_variational(params, theta)
def normalizing_flows(z_0, norm_flow_params): ''' z_0: [n_samples, D] u: [D,1] w: [D,1] b: [1] ''' current_z = z_0 all_zs = [] all_zs.append(z_0) for params_k in norm_flow_params: u = params_k[0] w = params_k[1] b = params_k[2] # Appendix equations m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u))) u_k = u + (-1. + np.log(1.+np.exp(np.dot(w.T,u))) - np.dot(w.T,u)) * (w/np.linalg.norm(w)) # u_k = u # [D,1] term1 = np.tanh(np.dot(current_z,w)+b) # [n_samples, D] term1 = np.dot(term1,u_k.T) # [n_samples, D] current_z = current_z + term1 all_zs.append(current_z) return current_z, all_zs
def gamma_logpdf(x, shape, rate): if np.any(x <= 0): return -np.infty return ( np.log(rate) * shape - sp.special.gammaln(shape) + np.log(x) * (shape-1) - rate * x)
def loglikelihood(self, g, beta, mu_ivp, alpha, pi, priors): logprobs = [] for i, ifx in enumerate(self._ifix): # get the logprobability for each mixture component ll = 0. zM = self._forward(g, beta, mu_ivp[i], ifx) for q, yq in enumerate(self.Y_train_): ll += norm.logpdf( yq, zM[..., q], scale=1/np.sqrt(alpha)).sum() logprobs.append(ll + np.log(pi[i])) logprobs = np.array(logprobs) lpmax = max(logprobs) loglik = lpmax + np.log(np.exp(logprobs - lpmax).sum()) Cg = self.latentforces[0].kernel(self.ttc[:, None]) Cg[np.diag_indices_from(Cg)] += 1e-5 Lg = np.linalg.cholesky(Cg) logprior = -0.5 * g.dot(cho_solve((Lg, True), g)) - \ np.log(np.diag(Lg)).sum() - \ Lg.shape[0] / 2 * np.log(2 * np.pi) for vn, x in zip(['beta'], beta): try: prior_logpdf = priors[vn] logprior += prior_logpdf(x) except KeyError: pass return loglik + logprior
def predict_cumulative_hazard(self, X, times=None, ancillary_X=None): """ Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- X: numpy array or DataFrame a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. times: iterable, optional an iterable of increasing times to predict the cumulative hazard at. Default is the set of all durations (observed and unobserved). Uses a linear interpolation if points in time are not in the index. ancillary_X: numpy array or DataFrame, optional a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns can be in any order. If a numpy array, columns must be in the same order as the training data. Returns ------- cumulative_hazard_ : DataFrame the cumulative hazard of individuals over the timeline """ import numpy as np times = coalesce(times, self.timeline, np.unique(self.durations)) exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X) mu_ = np.log(exp_mu_) Z = np.subtract.outer(np.log(times), mu_) / sigma_ return pd.DataFrame(-logsf(Z), columns=_get_index(X), index=times)
def gradient_log_recognition(params,theta,i): alpha = params[0] beta = params[1] if i==0: return np.log(theta)-special.polygamma(0,alpha)+special.polygamma(0,alpha+beta) if i==1: return np.log(1-theta)-special.polygamma(0,beta)+special.polygamma(0,alpha+beta)
def mvt_logpdf(x, mu, Li, df): dim = Li.shape[0] Ki = np.dot(Li.T, Li) #determinant is just multiplication of diagonal elements of cholesky logdet = 2*log(1./np.diag(Li)).sum() lpdf_const = (gammaln((df + dim) / 2) -(gammaln(df/2) + (log(df)+log(np.pi)) * dim*0.5 + logdet * 0.5) ) x = np.atleast_2d(x) if x.shape[1] != mu.size: x = x.T assert(x.shape[1] == mu.size or x.shape[0] == mu.size) d = (x - mu.reshape((1 ,mu.size))).T Ki_d_scal = np.dot(Ki, d) /df #vector d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar res_pdf = (lpdf_const - 0.5 * (df + dim) * np.log(d_Ki_d_scal_1)).flatten() if res_pdf.size == 1: res_pdf = np.float(res_pdf) return res_pdf
def _hmc_log_probability(self, L, mu_0, mu_self, A): """ Compute the log probability as a function of L. This allows us to take the gradients wrt L using autograd. :param L: :param A: :return: """ import autograd.numpy as anp # Compute pairwise distance L1 = anp.reshape(L,(self.N,1,self.dim)) L2 = anp.reshape(L,(1,self.N,self.dim)) D = - anp.sum((L1-L2)**2, axis=2) # Compute the logit probability logit_P = D + mu_0 + mu_self * np.eye(self.N) # Take the logistic of the negative distance P = 1.0 / (1+anp.exp(-logit_P)) # Compute the log likelihood ll = anp.sum(A * anp.log(P) + (1-A) * anp.log(1-P)) # Log prior of L under spherical Gaussian prior lp = -0.5 * anp.sum(L * L / self.sigma) # Log prior of mu0 under standardGaussian prior lp += -0.5 * mu_0**2 lp += -0.5 * mu_self**2 return ll + lp
def location_mixture_logpdf(samps, locations, location_weights, distr_at_origin, contr_var = False, variant = 1): # lpdfs = zeroprop.logpdf() diff = samps - locations[:, np.newaxis, :] lpdfs = distr_at_origin.logpdf(diff.reshape([np.prod(diff.shape[:2]), diff.shape[-1]])).reshape(diff.shape[:2]) logprop_weights = log(location_weights/location_weights.sum())[:, np.newaxis] if not contr_var: return logsumexp(lpdfs + logprop_weights, 0) #time_m1 = np.hstack([time0[:,:-1],time0[:,-1:]]) else: time0 = lpdfs + logprop_weights + log(len(location_weights)) if variant == 1: time1 = np.hstack([time0[:,1:],time0[:,:1]]) cov = np.mean(time0**2-time0*time1) var = np.mean((time0-time1)**2) lpdfs = lpdfs - cov/var * (time0-time1) return logsumexp(lpdfs - log(len(location_weights)), 0) elif variant == 2: cvar = (time0[:,:,np.newaxis] - np.dstack([np.hstack([time0[:, 1:], time0[:, :1]]), np.hstack([time0[:,-1:], time0[:,:-1]])])) ## self-covariance matrix of control variates K_cvar = np.diag(np.mean(cvar**2, (0, 1))) #add off diagonal K_cvar = K_cvar + (1.-np.eye(2)) * np.mean(cvar[:,:,0]*cvar[:,:,1]) ## covariance of control variates with random variable cov = np.mean(time0[:,:,np.newaxis] * cvar, 0).mean(0) optimal_comb = np.linalg.inv(K_cvar) @ cov lpdfs = lpdfs - cvar @ optimal_comb return logsumexp(lpdfs - log(len(location_weights)), 0)
def initialize(deep_map, X,num_pseudo_params): smart_map = {} for layer,layer_map in deep_map.iteritems(): smart_map[layer] = {} for unit,gp_map in layer_map.iteritems(): smart_map[layer][unit] = {} cov_params = gp_map['cov_params'] lengthscales = cov_params[1:] if layer == 0: pairs = itertools.combinations(X, 2) dists = np.array([np.abs(p1-p2) for p1,p2 in pairs]) smart_lengthscales = np.array([np.log(np.median(dists[:,i])) for i in xrange(len(lengthscales))]) kmeans = KMeans(n_clusters = num_pseudo_params, init = 'k-means++') fit = kmeans.fit(X) smart_x0 = fit.cluster_centers_ #inds = npr.choice(len(X), num_pseudo_params, replace = False) #smart_x0 = np.array(X)[inds,:] smart_y0 = np.ndarray.flatten(smart_x0) #smart_y0 = np.array(y)[inds] smart_noise_scale = np.log(np.var(smart_y0)) else: smart_x0 = gp_map['x0'] smart_y0 = np.ndarray.flatten(smart_x0[:,0]) smart_lengthscales = np.array([np.log(1) for i in xrange(len(lengthscales))]) smart_noise_scale = np.log(np.var(smart_y0)) gp_map['cov_params'] = np.append(cov_params[0],smart_lengthscales) gp_map['x0'] = smart_x0 gp_map['y0'] = smart_y0 #gp_map['noise_scale'] = smart_noise_scale smart_map[layer][unit] = gp_map smart_params = pack_deep_params(smart_map) return smart_params
def gradient_log_variational(params,theta, i): mu=params[0] sigma=params[1] x= theta if i==0: return (np.log(x)-mu)/(sigma**2) else: return (mu-np.log(x))**2/(sigma**3)-1/sigma
def gradient_log_variational(params,theta, i): a=params[0] b=params[1] x=theta if i==0: return -b*(x**(a-1))*((1-x**a)**(b-2))*(a*np.log(x)*(b*(x**a)-1)+x**a-1) else: return a*(x**(a-1))*((1-x**a)**(b-1))*(b*np.log(1-x**a)+1)
def __init__(self, mu, var): self.norm_const = - 0.5*np.log(2*np.pi) self.mu = np.atleast_1d(mu).flatten() self.var = np.atleast_1d(var).flatten() self.dim = np.prod(self.var.shape) assert(self.mu.shape == self.var.shape) self.std = np.sqrt(var) self.logstd = np.log(self.std)
def log_wishart_prior(p,wishart_gamma,wishart_m,sum_qs,Qdiags,icf): n = p + wishart_m + 1 k = icf.shape[0] out = 0 for ik in range(k): frobenius = sqsum(Qdiags[ik,:]) + sqsum(icf[ik,p:]) out = out + 0.5*wishart_gamma*wishart_gamma*frobenius - wishart_m*sum_qs[ik] C = n*p*(np.log(wishart_gamma)-0.5*np.log(2)) - log_gamma_distrib(0.5*n,p) return out - k*C
def _do_viterbi_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape state_sequence, logprob = _hmmc._viterbi( n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob) return logprob, state_sequence
def init(): offset = 2.0 #if optimum[0] < np.inf: # xmin = min(results['ADAM'][0][0], optimum[0]) - offset # xmax = max(results['ADAM'][0][0], optimum[0]) + offset #else: xmin = domain[0, 0] xmax = domain[0, 1] #if optimum[1] < np.inf: # ymin = min(results['ADAM'][1][0], optimum[1]) - offset # ymax = max(results['ADAM'][1][0], optimum[1]) + offset #else: ymin = domain[1, 0] ymax = domain[1, 1] x = np.arange(xmin, xmax, 0.01) y = np.arange(ymin, ymax, 0.01) X, Y = np.meshgrid(x, y) Z = np.zeros(np.shape(Y)) for a, _ in np.ndenumerate(Y): Z[a] = func(X[a], Y[a]) level = fdict['level'] if level is None: level = np.linspace(Z.min(), Z.max(), 20) else: if level[0] == 'normal': level = np.linspace(Z.min(), Z.max(), level[1]) if level[0] == 'log': level = np.logspace(np.log(Z.min()), np.log(Z.max()), level[1]) CF = ax[0].contour(X,Y,Z, levels=level) #plt.colorbar(CF, orientation='horizontal', format='%.2f') ax[0].grid() ax[0].plot(results['ADAM'][0][0], results['ADAM'][1][0], 'h', markersize=15, color = '0.75') if optimum[0] < np.inf and optimum[1] < np.inf: ax[0].plot(optimum[0], optimum[1], '*', markersize=40, markeredgewidth = 2, alpha = 0.5, color = '0.75') ax[0].legend(loc='upper center', ncol=3, bbox_to_anchor=(0.5, 1.15)) ax[1].plot(0, results['ADAM'][2][0], 'o') ax[1].axis([0, T, -0.5, max_err + 0.5]) ax[1].set_xlabel('num. iteration') ax[1].set_ylabel('loss') line1.set_data([], []) line2.set_data([], []) line3.set_data([], []) line4.set_data([], []) line5.set_data([], []) err1.set_data([], []) err2.set_data([], []) err3.set_data([], []) err4.set_data([], []) err5.set_data([], []) return line1, line2, line3, line4, line5, \ err1, err2, err3, err4, err5,
def logloss(ys, ys_hat, ws=None): #print 'ws',ws.shape, 'ys',ys.shape, 'xs',xs.shape, 'B',B.shape if ws is None: return np.sum(np.log(1 + np.exp(-ys * ys_hat))) / float(len(ys)) #+ (0.5 * reg * np.dot(B, B)) #/ float(len(ys)) else: try: return np.sum(ws * np.log(1 + np.exp(-ys * ys_hat))) / float(len(ys)) #+ (0.5 * reg * np.dot(B, B)) #/ float(len(ys)) except: pdb.set_trace()
def get_error_and_ll(w, v_prior, X, y, K, location, scale): v_noise = np.exp(parser.get(w, 'log_v_noise')[ 0, 0 ]) * scale**2 q = get_parameters_q(w, v_prior) samples_q = draw_samples(q, K) outputs = predict(samples_q, X) * scale + location log_factor = -0.5 * np.log(2 * math.pi * v_noise) - 0.5 * (np.tile(y, (1, K)) - np.array(outputs))**2 / v_noise ll = np.mean(logsumexp(log_factor - np.log(K), 1)) error = np.sqrt(np.mean((y - np.mean(outputs, 1, keepdims = True))**2)) return error, ll
def eval_log_properly(self, x): det = np.linalg.det(self.Sigma) const = (self.size/2.0)*np.log(2*np.pi) const = -0.5*np.log(det) - const prec = np.linalg.inv(self.Sigma) t = np.subtract(x, self.Mu) v = np.dot(np.transpose(t), prec) v = -0.5*np.dot(v, t) return const + v
def log_abc_kernel(x,n,k,e): ''' @summary: kernel density, we use normal here @param y: observed data @param x: simulator output, often the mean of kernel density @param e: bandwith of density ''' Sx = x Sy = k return -np.log(e)-np.log(2*np.pi)/2-(Sy-Sx)**2/(2*(e**2))
def log_wishart_prior(p,wishart_gamma,wishart_m,icf): n = p + wishart_m + 1 k = icf.shape[0] out = 0 for ik in range(k): sum_qs = icf[ik,:p].sum() frobenius = sqsum(np.exp(icf[ik,:p])) + sqsum(icf[ik,p:]) out = out + 0.5*wishart_gamma*wishart_gamma*frobenius - wishart_m*sum_qs C = n*p*(np.log(wishart_gamma)-0.5*np.log(2)) - log_gamma_distrib(0.5*n,p) return out - k*C
def KL_via_sampling(params,U): theta = generate_lognormal(params,U) alpha = 2 beta = 0.5 muPrior = np.log(alpha/beta) sigmaPrior = np.log(np.sqrt(alpha/(beta**2))) paramsPrior = np.array([muPrior,sigmaPrior]) E = np.log(lognormal_pdf(theta,params)/lognormal_pdf(theta,paramsPrior)) E = np.mean(E) return E
def logit(p): """takes rows of R^D+1 vectors on the simplex and outputs R^D logit values. Input: p: N x D+1 non-negative matrix such that each row sums to 1 Output: x: N x D matrix of real valued such that the softmax of x yields p Note: this is the inverse transformation of logistic """ x = np.log(p) - np.log(p[:,-1,np.newaxis]) #x -= x[:,-1,np.newaxis] return x[:,:-1]
def _do_backward_pass(self, framelogprob): # Based on hmmlearn's _BaseHMM safe_startmat = self.startprob_ + np.finfo(float).eps safe_transmat = self.transmat_ + np.finfo(float).eps n_samples, n_components = framelogprob.shape bwdlattice = np.zeros((n_samples, n_components)) _hmmc._backward(n_samples, n_components, np.log(safe_startmat), np.log(safe_transmat), framelogprob, bwdlattice) return bwdlattice
def abc_log_likelihood(theta,j,v,i): N = len(v) x, std = simulator(theta,v) log_kernels = log_abc_kernel(x,i,std) if len(log_kernels)>1: log_kernels_max = log_kernels.max() ll = np.log(np.sum(np.exp(log_kernels-log_kernels_max)))+log_kernels_max ll = np.log(1./N)+ll else: ll = log_kernels return ll
def _log_hazard(self, params, T, *Xs): mu_params = params[self._LOOKUP_SLICE["mu_"]] mu_ = np.dot(Xs[0], mu_params) sigma_params = params[self._LOOKUP_SLICE["sigma_"]] log_sigma_ = np.dot(Xs[1], sigma_params) sigma_ = np.exp(log_sigma_) Z = (np.log(T) - mu_) / sigma_ return norm.logpdf(Z) - log_sigma_ - np.log(T) - logsf(Z)
def log_abc_kernel(x,std): ''' @summary: kernel density, we use normal here @param x: simulator output, often the mean of kernel density @param e: bandwith of density ''' e=std/np.sqrt(M) #e = 1 Sx = x return -np.log(e)-np.log(2*np.pi)/2-(Sy-Sx)**2/(2*(e**2))
def condition_on(mu, sigma, A, y, sigma_obs): temp1 = np.dot(A, sigma) sigma_pred = np.dot(temp1, A.T) + sigma_obs L = np.linalg.cholesky(sigma_pred) v = solve_triangular(L, y - np.dot(A, mu)) ll = -1./2 * np.dot(v, v) - np.sum(np.log(np.diag(L))) \ - y.shape[0]/2.*np.log(2*np.pi) mu_cond = mu + np.dot(temp1.T, solve_triangular(L, v, 'T')) temp2 = solve_triangular(L, temp1) sigma_cond = sigma - np.dot(temp2.T, temp2) return (mu_cond, sigma_cond), ll
def z_lp(self, z): return vi.exp_log_likelihood( z, self.a, self.a**2, np.log(self.pi), np.log(1 - self.pi), self.sigma_a, self.sigma_eps, self.x, self.alpha)
def cb(self, t, on='R', alpha_ci=0.05, bound='two-sided'): r""" Confidence bounds of the ``on`` function at the ``alpa_ci`` level of significance. Can be the upper, lower, or two-sided confidence by changing value of ``bound``. Parameters ---------- x : array like or scalar The values of the random variables at which the confidence bounds will be calculated on : ('sf', 'ff', 'Hf'), optional The function on which the confidence bound will be calculated. bound : ('two-sided', 'upper', 'lower'), str, optional Compute either the two-sided, upper or lower confidence bound(s). Defaults to two-sided. alpha_ci : scalar, optional The level of significance at which the bound will be computed. Returns ------- cb : scalar or numpy array The value(s) of the upper, lower, or both confidence bound(s) of the selected function at x """ if self.method != 'MLE': raise Exception('Only MLE has confidence bounds') hess_inv = np.copy(self.hess_inv) pvars = hess_inv[np.triu_indices(hess_inv.shape[0])] old_err_state = np.seterr(all='ignore') if hasattr(self.dist, 'R_cb'): def R_cb(x): return self.dist.R_cb(x - self.gamma, *self.params, hess_inv, alpha_ci=alpha_ci, bound=bound) else: def R_cb(x): def sf_func(params): return self.dist.sf(x - self.gamma, *params) jac = np.atleast_2d(jacobian(sf_func)(np.array(self.params))) # Second-Order Taylor Series Expansion of Variance var_R = [] for i, j in enumerate(jac): j = np.atleast_2d(j).T * j j = j[np.triu_indices(j.shape[0])] var_R.append(np.sum(j * pvars)) # First-Order Taylor Series Expansion of Variance # var_R = (jac**2 * np.diag(hess_inv)).sum(axis=1).T R_hat = self.sf(x) if bound == 'two-sided': diff = (z(alpha_ci / 2) * np.sqrt(np.array(var_R)) * np.array([1., -1.]).reshape(2, 1)) elif bound == 'upper': diff = z(alpha_ci) * np.sqrt(np.array(var_R)) else: diff = -z(alpha_ci) * np.sqrt(np.array(var_R)) exponent = diff / (R_hat * (1 - R_hat)) R_cb = R_hat / (R_hat + (1 - R_hat) * np.exp(exponent)) return R_cb.T # Default cb is R cb = R_cb(t) if (on == 'ff') or (on == 'F'): cb = 1. - cb elif on == 'Hf': cb = -np.log(cb) elif on == 'hf': def cb_hf(x): out = [] for v in x: out.append(jacobian(lambda x: -np.log(R_cb(x)))(v)) return np.concatenate(out) cb = cb_hf(t) elif on == 'df': def cb_df(x): out = [] for v in x: out.append(jacobian(lambda x: (-np.log(R_cb(x)))(v) * self.sf(v))) return np.concatenate(out) cb = cb_df(t) np.seterr(**old_err_state) return cb
def log_loss(y, y_pred): return -(y * np.log(y_pred) + (1. - y) * np.log(1. - y_pred))
def prior2man(theta_2): return np.log(1.0 / (SIGMA_2 * np.sqrt(2 * np.pi)) * np.exp(-theta_2**2 / (2 * SIGMA_2**2)))
D = 2 # number of accumulation dimensions K = 3 # number of discrete states M = 2 # number of input dimensions N = 10 # number of observations bin_size = 0.01 latent_acc = LatentAccumulation(N, K, D, M=M, transitions="race", emissions="poisson", emission_kwargs={"bin_size": bin_size}) # set params betas = 0.075 * np.ones((D, )) sigmas = np.log(1e-3) * np.ones((D, )) latent_acc.dynamics.params = (betas, sigmas, latent_acc.dynamics.params[2]) latent_acc.emissions.Cs[0] = 4 * npr.randn(N, D) + npr.choice([-15, 15], (N, D)) latent_acc.emissions.ds[0] = 40 + 4.0 * npr.randn(N) # Sample state trajectories T = 100 # number of time bins trial_time = 1.0 # trial length in seconds dt = 0.01 # bin size in seconds N_samples = 100 # input statistics total_rate = 40 # the sum of the right and left poisson process rates is 40 us = []
def standardToNat(cls, pi): n = np.log(pi) return (n, )
def one_sided_exp(w_F): DeltaF = -(logsumexp(-w_F) - np.log(len(w_F))) return DeltaF
def m(x): return -1 + np.log(1 + np.exp(x))
def mpp_x_transform(self, x, gamma=0): return np.log(x - gamma)
def logp(z): first = logq0(z) second = np.log( np.abs(1 + np.dot(u_test, logdet_jac(w_test, z, b_test)))) return first - second
def logloss(actual, predicted): predicted = np.clip(predicted, EPS, 1 - EPS) loss = -np.sum(actual * np.log(predicted)) return loss / float(actual.shape[0])
def prior2(theta_2): return np.log(norm.pdf(theta_2, 0, SIGMA_2))
def test_forward_pass(T=1000, K=3): log_pi0, log_Ps, ll = make_parameters(T, K) a1 = forward_pass_np(log_pi0, log_Ps, ll) a2 = np.zeros((T, K)) forward_pass(-np.log(K) * np.ones(K), log_Ps, ll, a2) assert np.allclose(a1, a2)
def ClosestNote(period): n = 12 * np.log(48000.0 / (period * 440.0)) / np.log(2) + 57 n = int(round(n)) return n
def classical(p): "Classical node, requires autograd.numpy functions." return anp.exp(anp.sum(quantum(p[0], anp.log(p[1]))))
def median_(self): return self.lambda_ * (np.log(2)**(1.0 / self.rho_))
def logsumexp(X): max_X = np.max(X, axis=-1)[..., np.newaxis] return max_X + np.log(np.sum(np.exp(X - max_X), axis=-1)[..., np.newaxis])
def log_den(self, X): b = self.b unden = -np.sum(0.5*b*X**2+X-np.log(1.0+b*X), 1) return unden
def cb_df(x): out = [] for v in x: out.append(jacobian(lambda x: (-np.log(R_cb(x)))(v) * self.sf(v))) return np.concatenate(out)
def log_den(self, X): b = self.b w = self.w unden = np.sum(b*(-X + old_div((np.cos(w*X)-1),w)) + np.log(b*(1+np.sin(w*X))),1) return unden
def fit(self, X, B, T, W=None, show_progress=True): """Fits the model. :param X: numpy matrix of shape :math:`k \\cdot n` :param B: numpy vector of shape :math:`n` :param T: numpy vector of shape :math:`n` :param W: (optional) numpy vector of shape :math:`n` """ if W is None: W = numpy.ones(len(X)) X, B, T, W = (Z if type(Z) == numpy.ndarray else numpy.array(Z) for Z in (X, B, T, W)) keep_indexes = (T > 0) & (B >= 0) & (B <= 1) & (W >= 0) if sum(keep_indexes) < X.shape[0]: n_removed = X.shape[0] - sum(keep_indexes) warnings.warn( "Warning! Removed %d/%d entries from inputs where " "T <= 0 or B not 0/1 or W < 0" % (n_removed, len(X)) ) X, B, T, W = (Z[keep_indexes] for Z in (X, B, T, W)) n_features = X.shape[1] # scipy.optimize and emcee forces the the parameters to be a vector: # (log k, log p, log sigma_alpha, log sigma_beta, # a, b, alpha_1...alpha_k, beta_1...beta_k) # Generalized Gamma is a bit sensitive to the starting point! x0 = numpy.zeros(6 + 2 * n_features) x0[0] = +1 if self._fix_k is None else log(self._fix_k) x0[1] = -1 if self._fix_p is None else log(self._fix_p) args = (X, B, T, W, self._fix_k, self._fix_p, self._hierarchical, self._flavor) # Set up progressbar and callback bar = progressbar.ProgressBar( widgets=[ progressbar.Variable("loss", width=15, precision=9), " ", progressbar.BouncingBar(), " ", progressbar.Counter(width=6), " [", progressbar.Timer(), "]", ] ) def callback(LL, value_history=[]): value_history.append(LL) bar.update(len(value_history), loss=LL) # Define objective and use automatic differentiation def f(x): callback_ = callback if show_progress else None return -generalized_gamma_loss(x, *args, callback=callback_) jac = autograd.grad(lambda x: -generalized_gamma_loss(x, *args)) # Find the maximum a posteriori of the distribution res = scipy.optimize.minimize(f, x0, jac=jac, method="SLSQP", options={"maxiter": 9999}) if not res.success: raise Exception("Optimization failed with message: %s" % res.message) result = {"map": res.x} # TODO: should not use fixed k/p as search parameters if self._fix_k: result["map"][0] = log(self._fix_k) if self._fix_p: result["map"][1] = log(self._fix_p) # Make sure we're in a local minimum gradient = jac(result["map"]) gradient_norm = numpy.dot(gradient, gradient) if gradient_norm >= 1e-2 * len(X): warnings.warn("Might not have found a local minimum! " "Norm of gradient is %f" % gradient_norm) # Let's sample from the posterior to compute uncertainties if self._ci: (dim,) = res.x.shape n_walkers = 5 * dim sampler = emcee.EnsembleSampler( nwalkers=n_walkers, ndim=dim, log_prob_fn=generalized_gamma_loss, args=args, ) mcmc_initial_noise = 1e-3 p0 = [result["map"] + mcmc_initial_noise * numpy.random.randn(dim) for i in range(n_walkers)] n_burnin = 100 n_steps = numpy.ceil(2000.0 / n_walkers) n_iterations = n_burnin + n_steps bar = progressbar.ProgressBar( max_value=n_iterations, widgets=[ progressbar.Percentage(), " ", progressbar.Bar(), " %d walkers [" % n_walkers, progressbar.AdaptiveETA(), "]", ], ) for i, _ in enumerate(sampler.sample(p0, iterations=n_iterations)): bar.update(i + 1) result["samples"] = sampler.chain[:, n_burnin:, :].reshape((-1, dim)).T if self._fix_k: result["samples"][0, :] = log(self._fix_k) if self._fix_p: result["samples"][1, :] = log(self._fix_p) self.params = { k: { "k": exp(data[0]), "p": exp(data[1]), "a": data[4], "b": data[5], "alpha": data[6: 6 + n_features].T, "beta": data[6 + n_features: 6 + 2 * n_features].T, } for k, data in result.items() }
def _parameter_initialiser(self, x, c=None, n=None, t=None, offset=False): # Need an offset mpp function here norm_mod = para.Normal.fit(np.log(x), c=c, n=n, t=t, how='MLE') mu, sigma = norm_mod.params return mu, sigma
def a_lp(self, a, a2): return vi.exp_log_likelihood( self.z, a, a2, np.log(self.pi), np.log(1 - self.pi), self.sigma_a, self.sigma_eps, self.x, self.alpha)
def test_unary(): grad_test(lambda x: ti.sqrt(x), lambda x: np.sqrt(x)) grad_test(lambda x: ti.exp(x), lambda x: np.exp(x)) grad_test(lambda x: ti.log(x), lambda x: np.log(x))
def to_unconstrained(x): return np.log(x) - np.log(1. - x)
def log_df(self, x, mu, sigma): return np.log(self.df(x, mu, sigma))
def binary_crossentropy(actual, predicted): predicted = np.clip(predicted, EPS, 1 - EPS) return np.mean(-np.sum(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted)))
def dp_sgld(data, logl, logprior, T, start, L, b, eta0, dp_budget, model): epsilon, delta = dp_budget N = data.shape[0] l = start.shape[0] sample = np.copy(start) samples = np.empty([int(T * N / b), l]) priorgrad = grad(logprior) C = 128 * N * T * L**2 / (b * epsilon**2) * np.log( 5 * N * T / (2 * b * delta)) * np.log(2 / delta) # LD constant if model == 'logistic': likegrad = grad(logl) for i in range(int(T * N / b)): eta = max(1 / (i + 1), eta0) indx = np.random.choice(N, b) z = np.sqrt(max(eta**2 * C, eta)) * rng.randn(l) sample += eta * ( (N / b) * likegrad(sample, data[indx]) + priorgrad(sample)) + z sample = clip(sample, L - 1, 0) if sum(np.isnan(sample)) == 0: samples[i] = sample else: print 'Nans' return samples[:i] #if i % 100 == 0: print(i) elif model == 'multi': likeJ = jacobian(logl) #likeg = grad(logl) for i in range(int(T * N / b)): eta = eta0 indx = rng.choice(N, b) z = np.sqrt(max(eta**2 * C, eta)) * rng.randn(l) J = likeJ(sample, data[indx]) tot_g = np.zeros(l) #for value in data[indx]: #tot_g += clip(likeg(sample, value), L) #sample = sample + eta*((N/b)*tot_g + priorgrad(sample)) - z sample += eta * ((N / b) * np.sum(clip(J, L, 1), axis=0) + priorgrad(sample)) - z if sum(np.isnan(sample)) == 0: samples[i] = sample else: print 'Nans' raise Error() return samples[:i] if i % 10 == 0: print(i) elif model[:-1] == 'mvn_mix': k = int(model[-1]) #likeJ = jacobian(logl) likeg = grad(logl) for i in range(int(T * N / b)): eta = eta0 indx = rng.choice(N, b) z = np.sqrt(max(eta**2 * C, eta)) * rng.randn(l) #J = likeJ(sample, k, data[indx]) tot_g = np.zeros(l) for value in data[indx]: tot_g += clip(likeg(sample, k, value), L) sample = sample + eta * ( (N / b) * tot_g + priorgrad(sample, k)) - z #sample += eta*((N/b)*np.sum(clip(J, L, 1), axis=0) + priorgrad(sample) ) - z if sum(np.isnan(sample)) == 0: samples[i] = sample else: print 'Nans' return samples[:i] #if i % 10 == 0: print(i) return samples
def logq0(z): '''Start with a standard Gaussian ''' D = z.shape[0] return -D / 2 * np.log(2 * np.pi) - 0.5 * np.sum(z**2, axis=0)
def _mom(self, x): norm_mod = para.Normal.fit(np.log(x), how='MOM') mu, sigma = norm_mod.params return mu, sigma