def mvt_logpdf(x, mu, Li, df): dim = Li.shape[0] Ki = np.dot(Li.T, Li) #determinant is just multiplication of diagonal elements of cholesky logdet = 2*log(1./np.diag(Li)).sum() lpdf_const = (gammaln((df + dim) / 2) -(gammaln(df/2) + (log(df)+log(np.pi)) * dim*0.5 + logdet * 0.5) ) x = np.atleast_2d(x) if x.shape[1] != mu.size: x = x.T assert(x.shape[1] == mu.size or x.shape[0] == mu.size) d = (x - mu.reshape((1 ,mu.size))).T Ki_d_scal = np.dot(Ki, d) /df #vector d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar res_pdf = (lpdf_const - 0.5 * (df + dim) * np.log(d_Ki_d_scal_1)).flatten() if res_pdf.size == 1: res_pdf = np.float(res_pdf) return res_pdf
def testDirichletCategorical(self): def log_joint(p, x, alpha): log_prior = np.sum((alpha - 1) * np.log(p)) log_prior += -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) # TODO(mhoffman): We should make it possible to only use one-hot # when necessary. one_hot_x = one_hot(x, alpha.shape[0]) log_likelihood = np.sum(np.dot(one_hot_x, np.log(p))) return log_prior + log_likelihood vocab_size = 5 n_examples = 11 alpha = 1.3 * np.ones(vocab_size) p = np.random.gamma(alpha, 1.) p /= p.sum(-1, keepdims=True) x = np.random.choice(np.arange(vocab_size), n_examples, p=p) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.SIMPLEX, p, x, alpha)) new_alpha = alpha + np.histogram(x, np.arange(vocab_size + 1))[0] correct_marginalized_value = ( -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) + special.gammaln(new_alpha).sum() - special.gammaln(new_alpha.sum())) self.assertAlmostEqual(correct_marginalized_value, marginalized_value) self.assertTrue(np.allclose(new_alpha, conditional.alpha))
def log_beta_function(x): """ Log beta function ln(\gamma(x)) - ln(\gamma(\sum_{i=1}^{N}(x_{i})) """ return agnp.sum(agscipy.gammaln(x + agnp.finfo(agnp.float32).eps)) \ - agscipy.gammaln(agnp.sum(x + agnp.finfo(agnp.float32).eps))
def testGammaPoisson(self): def log_joint(x, y, a, b): log_prior = log_probs.gamma_gen_log_prob(x, a, b) log_likelihood = np.sum(-special.gammaln(y + 1) + y * np.log(x) - x) return log_prior + log_likelihood n_examples = 10 a = 2.3 b = 3. x = np.random.gamma(a, 1. / b) y = np.random.poisson(x, n_examples) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.NONNEGATIVE, x, y, a, b)) new_a = a + y.sum() new_b = b + n_examples correct_marginalized_value = ( a * np.log(b) - special.gammaln(a) - new_a * np.log(new_b) + special.gammaln(new_a) - special.gammaln(y + 1).sum()) self.assertAlmostEqual(correct_marginalized_value, marginalized_value) self.assertEqual(new_a, conditional.args[0]) self.assertAlmostEqual(new_b, 1. / conditional.args[2])
def testGammaGamma(self): def log_joint(x, y, a, b): log_prior = log_probs.gamma_gen_log_prob(x, a, b) log_likelihood = np.sum(log_probs.gamma_gen_log_prob(y, a, a * x)) return log_prior + log_likelihood n_examples = 10 a = 2.3 b = 3. x = np.random.gamma(a, 1. / b) y = np.random.gamma(a, 1. / x, n_examples) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.NONNEGATIVE, x, y, a, b)) new_a = a + a * n_examples new_b = b + a * y.sum() correct_marginalized_value = ( a * np.log(b) - special.gammaln(a) - new_a * np.log(new_b) + special.gammaln(new_a) + np.sum((a - 1) * np.log(y) - special.gammaln(a) + a * np.log(a))) self.assertAlmostEqual(correct_marginalized_value, marginalized_value) self.assertAlmostEqual(new_a, conditional.args[0]) self.assertAlmostEqual(new_b, 1. / conditional.args[2])
def log_multivariate_t(X, mu, Sigma, df): p = Sigma.shape[0] dots = np.sum((X - mu) * (np.dot(np.linalg.inv(Sigma), (X - mu).T).T), axis=1) return -0.5 * (df + p) * np.log(1. + dots / df) + gammaln( 0.5 * (df + p)) - gammaln(0.5 * df) - 0.5 * p * np.log( df * np.pi) - 0.5 * np.linalg.slogdet(Sigma)[1]
def beta_lnpdf(y, alpha, beta): """ y is either N x 1 or N x D, alpha is len(N) and beta is len(D) """ num = np.log(y) * (alpha - 1)[:, None] + np.log(1 - y) * (beta - 1)[:, None] denom = gammaln(alpha) + gammaln(beta) - gammaln(alpha + beta) return num - denom[:, None]
def log_joint(p, x, alpha): log_prior = np.sum((alpha - 1) * np.log(p)) log_prior += -special.gammaln(alpha).sum() + special.gammaln(alpha.sum()) # TODO(mhoffman): We should make it possible to only use one-hot # when necessary. one_hot_x = one_hot(x, alpha.shape[0]) log_likelihood = np.sum(np.dot(one_hot_x, np.log(p))) return log_prior + log_likelihood
def log_likelihoods(self, data, input, mask, tag, x): N, etas, nus = self.N, np.exp(self.inv_etas), np.exp(self.inv_nus) mus = self.forward(x, input, tag) resid = data[:, None, :] - mus z = resid / etas return -0.5 * (nus + N) * np.log(1.0 + (resid * z).sum(axis=2) / nus) + \ gammaln((nus + N) / 2.0) - gammaln(nus / 2.0) - N / 2.0 * np.log(nus) \ -N / 2.0 * np.log(np.pi) - 0.5 * np.sum(np.log(etas), axis=1)
def log_p(x, z): x = np.atleast_1d(x) z = np.atleast_1d(z) lp = -gammaln(a0) + a0 * np.log(b0) \ + (a0 - 1) * np.log(z) - b0 * z ll = np.sum(-gammaln(x[:, None] + 1) - z[None, :] + x[:, None] * np.log(z[None, :]), axis=0) return lp + ll
def log_likelihoods(self, data, input, mask, tag): D, mus, sigmas, nus = self.D, self.mus, np.exp(self.inv_sigmas), np.exp(self.inv_nus) # mask = np.ones_like(data, dtype=bool) if mask is None else mask resid = data[:, None, :] - mus z = resid / sigmas return -0.5 * (nus + D) * np.log(1.0 + (resid * z).sum(axis=2) / nus) + \ gammaln((nus + D) / 2.0) - gammaln(nus / 2.0) - D / 2.0 * np.log(nus) \ -D / 2.0 * np.log(np.pi) - 0.5 * np.sum(np.log(sigmas), axis=1)
def beta_logpdf(params, theta, to_scalar=True): alpha = np.exp(params["log_alpha"]) beta = np.exp(params["log_beta"]) logp = (gammaln(alpha + beta) - gammaln(alpha) - gammaln(beta) + (alpha - 1) * np.log(theta) + (beta - 1) * np.log(1 - theta)) if to_scalar: return np.sum(logp) else: return logp
def log_likelihoods(self, data, input, mask, tag): D = self.D mus = self._compute_mus(data, input, mask, tag) sigmas = self._compute_sigmas(data, input, mask, tag) nus = np.exp(self.inv_nus) resid = data[:, None, :] - mus z = resid / sigmas return -0.5 * (nus + D) * np.log(1.0 + (resid * z).sum(axis=2) / nus) + \ gammaln((nus + D) / 2.0) - gammaln(nus / 2.0) - D / 2.0 * np.log(nus) \ -D / 2.0 * np.log(np.pi) - 0.5 * np.sum(np.log(sigmas), axis=-1)
def log_like(var_par, draw, value, k): l = int(len(var_par) / 2) mu, cov = var_par[:l], np.exp(var_par[l:]) samples = draw * cov + mu pi = softmax(samples[:k]) d = len(value) thetas = softmax(samples[k:].reshape([k, d]), axis=1) n = np.sum(value) logps = np.log(pi) + np.dot( np.log(thetas), value) + gammaln(n + 1) - np.sum(gammaln(value + 1)) return logsumexp(logps)
def elbo((lambda_pi, lambda_phi, lambda_m, lambda_beta, lambda_nu, lambda_w)): """ ELBO computation """ e3 = e2 = h2 = 0 e1 = - log_beta_function(alpha_o) \ + agnp.dot((alpha_o - agnp.ones(K)), dirichlet_expectation(lambda_pi)) h1 = log_beta_function(lambda_pi) \ - agnp.dot((lambda_pi - agnp.ones(K)), dirichlet_expectation(lambda_pi)) logdet = agnp.log( agnp.array([agnp.linalg.det(lambda_w[k, :, :]) for k in range(K)])) logDeltak = agscipy.psi(lambda_nu / 2.) \ + agscipy.psi((lambda_nu - 1.) / 2.) + 2. * agnp.log( 2.) + logdet for n in range(N): e2 += agnp.dot(lambda_phi[n, :], dirichlet_expectation(lambda_pi)) h2 += -agnp.dot(lambda_phi[n, :], log_(lambda_phi[n, :])) product = agnp.array([ agnp.dot(agnp.dot(xn[n, :] - lambda_m[k, :], lambda_w[k, :, :]), (xn[n, :] - lambda_m[k, :]).T) for k in range(K) ]) e3 += 1. / 2 * agnp.dot(lambda_phi[n, :], (logDeltak - 2. * agnp.log(2 * agnp.pi) - lambda_nu * product - 2. / lambda_beta).T) product = agnp.array([ agnp.dot(agnp.dot(lambda_m[k, :] - m_o, lambda_w[k, :, :]), (lambda_m[k, :] - m_o).T) for k in range(K) ]) traces = agnp.array([ agnp.trace(agnp.dot(agnp.linalg.inv(w_o), lambda_w[k, :, :])) for k in range(K) ]) h4 = agnp.sum((1. + agnp.log(2. * agnp.pi) - 1. / 2 * (agnp.log(lambda_beta) + logdet))) logB = lambda_nu / 2. * logdet + lambda_nu * agnp.log( 2.) + 1. / 2 * agnp.log(agnp.pi) \ + agscipy.gammaln(lambda_nu / 2.) + agscipy.gammaln( (lambda_nu - 1) / 2.) h5 = agnp.sum((logB - (lambda_nu - 3.) / 2. * logDeltak + lambda_nu)) e4 = agnp.sum( (1. / 2 * (agnp.log(beta_o) + logDeltak - 2 * agnp.log(2. * agnp.pi) - beta_o * lambda_nu * product - 2. * beta_o / lambda_beta))) logB = nu_o / 2. * agnp.log(agnp.linalg.det(w_o)) + nu_o * agnp.log(2.) \ + 1. / 2 * agnp.log(agnp.pi) + agscipy.gammaln( nu_o / 2.) + agscipy.gammaln((nu_o - 1) / 2.) e5 = agnp.sum( (-logB + (nu_o - 3.) / 2. * logDeltak - lambda_nu / 2. * traces)) return e1 + e2 + e3 + e4 + e5 + h1 + h2 + h4 + h5
def EPtaulambda(self, tau_mu, tau_sigma, tau_a_prior, lambda_a_prior, lambda_b_prior, lambda_a_hat, lambda_b_hat): """ E[ln p(\tau | \lambda)] + E[ln p(\lambda)]""" etau_given_lambda = -gammaln(tau_a_prior) - tau_a_prior * ( np.log(lambda_b_hat) - psi(lambda_a_hat)) + (-tau_a_prior - 1.) * tau_mu - np.exp( -tau_mu + 0.5 * tau_sigma**2) * (lambda_a_hat / lambda_b_hat) elambda = -gammaln(lambda_a_prior) - 2 * lambda_a_prior * np.log( lambda_b_prior) + (-lambda_a_prior - 1.) * ( np.log(lambda_b_hat) - psi(lambda_a_hat)) - ( 1. / lambda_b_prior**2) * (lambda_a_hat / lambda_b_hat) return np.sum(etau_given_lambda) + np.sum(elambda)
def nll(params): # N.B. the likelihood must be expressed as a sum of log-gamma # factors in order to avoid overflow / underflow for large # numbers of successes or failures in an observation. return - np.sum( gammaln(np.sum(params)) - gammaln( np.sum(params) + np.sum(observations, axis=1)) + np.sum( gammaln(observations + params) - gammaln(params), axis=1))
def logp(zw, K, x, alphaz): assert isinstance(K, np.ndarray), "K is assumed to be a Numpy array" N = x.shape[0] D = x.shape[1] L = K.shape[0] num_z = N * np.sum(K) z = zw[:num_z] w = zw[num_z:] num_w = w.shape[0] log_prior = 0. log_likelihood = 0. # Prior for weights log_prior += np.sum(w_shp * np.log(w_rte) + (w_shp - 1.) * np.log(w) - w_rte * w - sp.gammaln(w_shp)) # Prior for top layer log_prior += np.sum(z_shp * np.log(z_rte) + (z_shp - 1.) * np.log(z[-N * K[-1]:]) - z_rte * z[-N * K[-1]:] - sp.gammaln(z_shp)) # Likelihood z1 = z[:N * K[0]].reshape((N, K[0])) w0 = w[:D * K[0]].reshape((K[0], D)) z1_sum_w0 = np.dot(z1, w0) log_likelihood = np.sum(x * np.log(z1_sum_w0) - z1_sum_w0 - sp.gammaln(x + 1)) if L > 1: # Layer 1 z2 = z[N * K[0]:N * (K[0] + K[1])].reshape((N, K[1])) w1 = w[D * K[0]:D * K[0] + K[0] * K[1]].reshape((K[1], K[0])) z2_sum_w1 = np.dot(z2, w1) aux = alphaz / z2_sum_w1 log_prior += np.sum(alphaz * np.log(aux) + (alphaz - 1.) * np.log(z1) - aux * z1 - sp.gammaln(alphaz)) # Layer 2 z3 = z[N * (K[0] + K[1]):N * (K[0] + K[1] + K[2])].reshape((N, K[2])) w2 = w[D * K[0] + K[0] * K[1]:D * K[0] + K[0] * K[1] + K[1] * K[2]].reshape((K[2], K[1])) z3_sum_w2 = np.dot(z3, w2) aux = alphaz / z3_sum_w2 log_prior += np.sum(alphaz * np.log(aux) + (alphaz - 1.) * np.log(z2) - aux * z2 - sp.gammaln(alphaz)) return log_prior + log_likelihood
def multivariate_studentst_logpdf(data, mus, Sigmas, nus, Ls=None): """ Compute the log probability density of a multivariate Student's t distribution. This will broadcast as long as data, mus, Sigmas, nus have the same (or at least be broadcast compatible along the) leading dimensions. Parameters ---------- data : array_like (..., D) The points at which to evaluate the log density mus : array_like (..., D) The mean(s) of the t distribution(s) Sigmas : array_like (..., D, D) The covariances(s) of the t distribution(s) nus : array_like (...,) The degrees of freedom of the t distribution(s) Ls : array_like (..., D, D) Optionally pass in the Cholesky decomposition of Sigmas Returns ------- lps : array_like (...,) Log probabilities under the multivariate Gaussian distribution(s). """ # Check inputs D = data.shape[-1] assert mus.shape[-1] == D assert Sigmas.shape[-2] == Sigmas.shape[-1] == D if Ls is not None: assert Ls.shape[-2] == Ls.shape[-1] == D else: Ls = np.linalg.cholesky(Sigmas) # (..., D, D) # Quadratic term q = batch_mahalanobis(Ls, data - mus) / nus # (...,) lp = -0.5 * (nus + D) * np.log1p(q) # (...,) # Normalizer lp = lp + gammaln(0.5 * (nus + D)) - gammaln(0.5 * nus) # (...,) lp = lp - 0.5 * D * np.log(np.pi) - 0.5 * D * np.log(nus) # (...,) L_diag = np.reshape(Ls, Ls.shape[:-2] + (-1, ))[..., ::D + 1] # (..., D) half_log_det = np.sum(np.log(abs(L_diag)), axis=-1) # (...,) lp = lp - half_log_det return lp
def prep_opt(y_train, N, coeffs): summedy_mat = np.sum(y_train, axis=0) summedy = np.reshape(summedy_mat, [np.size(summedy_mat), -1]) a1 = np.reshape([np.repeat(coeffs.T[1], N)], [np.size(summedy), -1]) a0 = np.reshape([np.repeat(coeffs.T[0], N)], [np.size(summedy), -1]) a1y = np.multiply(a1, summedy) a0y = np.multiply(a0, summedy) consts = np.sum(gammaln( y_train + scale)) - D * n_neurons * N * gammaln(scale) - np.sum( coeffs.T[0] * (D * scale * N)) - np.sum(a0y) - np.sum(summedy * np.log(scale)) return summedy, a1y, a0y, a1, consts
def nll_GLM_GanmorCalciumAR1(w, X, Y, hyperparams, nlfun, S=10): """ Negative log-likelihood for a GLM with Ganmor AR1 mixture model for calcium imaging data. Input: w: [D x 1] vector of GLM regression weights X: [T x D] design matrix Y: [T x 1] calcium fluorescence observations hyperparams: [3 x 1] model hyperparameters: log tau, log alpha, log Gaussian variance nlfun: [func] function handle for nonlinearity S: [scalar] number of spikes to marginalize return_hess: [bool] flag for returning Hessian Output: negative log-likelihood, gradient, and Hessian """ # unpack hyperparams tau, alpha, sig2 = hyperparams # compute AR(1) diffs taudecay = np.exp(-1.0 / tau) # decay factor for one time bin Y = np.pad(Y, (1, 0)) # pad Y by a time bin Ydff = (Y[1:] - taudecay * Y[:-1]) / alpha # compute grid of spike counts ygrid = np.arange(0, S + 1) # Gaussian log-likelihood terms log_gauss_grid = -0.5 * (Ydff[:, None] - ygrid[None, :])**2 / ( sig2 / alpha**2) - 0.5 * np.log(2.0 * np.pi * sig2) Xproj = X @ w poissConst = gammaln(ygrid + 1) # compute neglogli, gradient, and (optionally) Hessian f, logf, df, ddf = nlfun(Xproj) logPcounts = logf[:, None] * ygrid[None, :] - f[:, None] - poissConst[None, :] # compute log-likelihood for each time bin logjoint = log_gauss_grid + logPcounts logli = logsumexp(logjoint, axis=1) # log likelihood for each time bin negL = -np.sum(logli) # negative log likelihood # gradient dLpoiss = (df / f)[:, None] * ygrid[ None, :] - df[:, None] # deriv of Poisson log likelihood gwts = np.sum(np.exp(logjoint - logli[:, None]) * dLpoiss, axis=1) # gradient weights gradient = -X.T @ gwts # Hessian ddLpoiss = (ddf / f - (df / f)**2)[:, None] * ygrid[None, :] - ddf[:, None] ddL = (ddLpoiss + dLpoiss**2) hwts = np.sum(np.exp(logjoint - logli[:, None]) * ddL, axis=1) - gwts**2 # hessian weights H = -X.T @ (X * hwts[:, None]) return negL, gradient, H
def generalized_gamma_loss(x, X, B, T, W, fix_k, fix_p, hierarchical, flavor, callback=None): # parameters for this distribution is p, k, lambd k = exp(x[0]) if fix_k is None else fix_k # x[0], x[1], x p = exp(x[1]) if fix_p is None else fix_p log_sigma_alpha = x[2] log_sigma_beta = x[3] a = x[4] b = x[5] n_features = int((len(x)-6)/2) alpha = x[6:6+n_features] beta = x[6+n_features:6+2*n_features] lambd = exp(dot(X, alpha)+a) # lambda = exp(\alpha+a), X shape is N * n_groups, alpha is \n_features * 1 # PDF: p*lambda^(k*p) / gamma(k) * t^(k*p-1) * exp(-(x*lambda)^p) log_pdf = log(p) + (k*p) * log(lambd) - gammaln(k) \ + (k*p-1) * log(T) - (T*lambd)**p cdf = gammainc(k, (T*lambd)**p) if flavor == 'logistic': # Log-likelihood with sigmoid c = expit(dot(X, beta)+b) # fit one beta for each group LL_observed = log(c) + log_pdf LL_censored = log((1 - c) + c * (1 - cdf)) elif flavor == 'linear': # L2 loss, linear c = dot(X, beta)+b LL_observed = -(1 - c)**2 + log_pdf LL_censored = -(c*cdf)**2 LL_data = sum( W * B * LL_observed + W * (1 - B) * LL_censored, 0) \ - n_features*log_sigma_alpha
def log_likelihoods(self, data, input, mask, tag): assert data.dtype == int lambdas = np.exp(self.log_lambdas) mask = np.ones_like(data, dtype=bool) if mask is None else mask lls = -gammaln(data[:,None,:] + 1) - lambdas + data[:,None,:] * np.log(lambdas) assert lls.shape == (data.shape[0], self.K, self.D) return np.sum(lls * mask[:, None, :], axis=2)
def poisson_logpdf(data, lambdas, mask=None): """ Compute the log probability density of a Poisson distribution. This will broadcast as long as data and lambdas have the same (or at least compatible) leading dimensions. Parameters ---------- data : array_like (..., D) The points at which to evaluate the log density lambdas : array_like (..., D) The rates of the Poisson distribution(s) mask : array_like (..., D) bool Optional mask indicating which entries in the data are observed Returns ------- lps : array_like (...,) Log probabilities under the Poisson distribution(s). """ D = data.shape[-1] assert data.dtype in (int, np.int8, np.int16, np.int32, np.int64) assert lambdas.shape[-1] == D # Check mask mask = mask if mask is not None else np.ones_like(data, dtype=bool) assert mask.shape == data.shape # Compute log pdf lls = -gammaln(data + 1) - lambdas + data * np.log(lambdas) return np.sum(lls * mask, axis=-1)
def pred_like(test_data, var_par, n_samples, k): # Method for prediction likelihood N_test, d = test_data.shape l = int(len(var_par) / 2) gln_test_n = gammaln(test_data.sum(axis=1) + 1) gln_test_values = np.sum(gammaln(test_data + 1), axis=1) mu, cov = var_par[:l], np.exp(var_par[l:]) like_matrix = np.empty([N_test, n_samples]) samples = draw_samples(var_par, n_samples) for s, sample in enumerate(samples): pi = softmax(sample[:k]) thetas = softmax(sample[k:].reshape([k, d]), axis=1) logps = (np.log(pi) + np.dot(test_data, np.log( thetas.T))).T + gln_test_n - gln_test_values like_matrix[:, s] = logsumexp2(np.stack(logps)[:, :N_test], axis=0) return np.mean(logsumexp2(like_matrix, axis=1) - np.log(n_samples))
def log_likelihoods(self, data, input, mask, tag, x): assert data.dtype == int lambdas = self.mean(self.forward(x, input, tag)) mask = np.ones_like(data, dtype=bool) if mask is None else mask lls = -gammaln(data[:, None, :] + 1) - lambdas + data[:, None, :] * np.log(lambdas) return np.sum(lls * mask[:, None, :], axis=2)
def testConditionAndMarginalizeBeta(self): def log_joint(x, a, b): return np.sum((a - 1) * np.log(x) + (b - 1) * np.log1p(-x)) a = np.random.gamma(1., 1., [3, 4]) b = np.random.gamma(1., 1., 4) x = np.random.gamma(1., 1., [3, 4]) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.UNIT_INTERVAL, x, a, b)) correct_marginalized_value = (special.gammaln(a) + special.gammaln(b) - special.gammaln(a + b)).sum() self.assertAlmostEqual(correct_marginalized_value, marginalized_value) self.assertTrue(np.allclose(a, conditional.args[0])) self.assertTrue(np.allclose(b, conditional.args[1]))
def testConditionAndMarginalizeDirichlet(self): def log_joint(x, alpha): return np.sum((alpha - 1) * np.log(x)) alpha = np.random.gamma(1., 1., [3, 4]) x = np.random.gamma(alpha, 1.) x /= x.sum(-1, keepdims=True) conditional, marginalized_value = ( _condition_and_marginalize(log_joint, 0, SupportTypes.SIMPLEX, x, alpha)) correct_marginalized_value = (special.gammaln(alpha).sum() - special.gammaln(np.sum(alpha, 1)).sum()) self.assertAlmostEqual(correct_marginalized_value, marginalized_value) for i in range(alpha.shape[0]): self.assertTrue(np.allclose(alpha[i], conditional[i].item(0).alpha))
def _negative_log_likelihood(log_params, frequency, avg_monetary_value, weights, penalizer_coef): warnings.simplefilter(action="ignore", category=FutureWarning) params = np.exp(log_params) p, q, v = params x = frequency m = avg_monetary_value negative_log_likelihood_values = ( gammaln(p * x + q) - gammaln(p * x) - gammaln(q) + q * np.log(v) + (p * x - 1) * np.log(m) + (p * x) * np.log(x) - (p * x + q) * np.log(x * m + v)) * weights penalizer_term = penalizer_coef * sum(params**2) return -negative_log_likelihood_values.sum() / weights.sum( ) + penalizer_term
def nll(params): a, b = params # N.B. the likelihood must be expressed as a sum of log-gamma # factors in order to avoid overflow / underflow for large # numbers of successes or failures in an observation. return - np.sum( gammaln(successes + failures + 1) + gammaln(successes + a) + gammaln(failures + b) + gammaln(a + b) - gammaln(successes + 1) - gammaln(failures + 1) - gammaln(successes + failures + a + b) - gammaln(a) - gammaln(b))
def independent_studentst_logpdf(data, mus, sigmasqs, nus, mask=None): """ Compute the log probability density of a Gaussian distribution with a diagonal covariance. This will broadcast as long as data, mus, sigmas have the same (or at least compatible) leading dimensions. Parameters ---------- data : array_like (..., D) The points at which to evaluate the log density mus : array_like (..., D) The mean(s) of the Student's t distribution(s) sigmasqs : array_like (..., D) The diagonal variances(s) of the Student's t distribution(s) nus : array_like (..., D) The degrees of freedom of the Student's t distribution(s) mask : array_like (..., D) bool Optional mask indicating which entries in the data are observed Returns ------- lps : array_like (...,) Log probabilities under the Student's t distribution(s). """ D = data.shape[-1] assert mus.shape[-1] == D assert sigmasqs.shape[-1] == D assert nus.shape[-1] == D # Check mask mask = mask if mask is not None else np.ones_like(data, dtype=bool) assert mask.shape == data.shape normalizer = gammaln(0.5 * (nus + 1)) - gammaln(0.5 * nus) normalizer = normalizer - 0.5 * (np.log(np.pi) + np.log(nus) + np.log(sigmasqs)) ll = normalizer - 0.5 * (nus + 1) * np.log(1.0 + (data - mus)**2 / (sigmasqs * nus)) return np.sum(ll * mask, axis=-1)
def __init__(self, mu, K, df, Ki = None, logdet_K = None, L = None): mu = np.atleast_1d(mu).flatten() K = np.atleast_2d(K) assert(np.prod(mu.shape) == K.shape[0] ) assert(K.shape[0] == K.shape[1]) self.mu = mu self.K = K self.df = df self._freeze_chi2 = stats.chi2(df) self.dim = K.shape[0] self._df_dim = self.df + self.dim #(self.Ki, self.logdet) = (np.linalg.inv(K), np.linalg.slogdet(K)[1]) (self.Ki, self.L, self.Li, self.logdet) = pdinv(K) self.lpdf_const = np.float(gammaln((self.df + self.dim) / 2) -(gammaln(self.df/2) + (log(self.df)+log(np.pi)) * self.dim*0.5 + self.logdet * 0.5) )
def logZ(natparam): alpha = natparam + 1 return np.sum(np.sum(gammaln(alpha), -1) - gammaln(np.sum(alpha, -1)))
def negbin_loglike(r, p, x): # the negative binomial log likelihood we want to maximize return gammaln(r+x) - gammaln(r) - gammaln(x+1) + x*np.log(p) + r*np.log(1-p)
def logZ(natparam): alpha = natparam + 1 return gammaln(alpha).sum() - gammaln(alpha.sum())