def fit_maxlike(x, r_guess): # follows Wikipedia's section on negative binomial max likelihood assert np.var(x) > np.mean(x), "Likelihood-maximizing parameters don't exist!" loglike = lambda r, p: np.sum(negbin_loglike(r, p, x)) p = lambda r: np.sum(x) / np.sum(r+x) rprime = lambda r: grad(loglike)(r, p(r)) r = newton(rprime, r_guess) return r, p(r)
def log_likelihood(weights, inputs, targets): logprobs = outputs(weights, inputs) loglik = 0.0 num_time_steps, num_examples, _ = inputs.shape for t in range(num_time_steps): loglik += np.sum(logprobs[t] * targets[t]) return loglik / (num_time_steps * num_examples)
def log_marginal_likelihood(params, data): cluster_lls = [] for log_proportion, mean, chol in zip(*unpack_params(params)): cov = np.dot(chol.T, chol) + 0.000001 * np.eye(D) cluster_log_likelihood = log_proportion + mvn.logpdf(data, mean, cov) cluster_lls.append(np.expand_dims(cluster_log_likelihood, axis=0)) cluster_lls = np.concatenate(cluster_lls, axis=0) return np.sum(logsumexp(cluster_lls, axis=0))
def quick_grad_check(fun, arg0, extra_args=(), kwargs={}, verbose=True, eps=EPS, rtol=RTOL, atol=ATOL, rs=None): """Checks the gradient of a function (w.r.t. to its first arg) in a random direction""" if verbose: print("Checking gradient of {0} at {1}".format(fun, arg0)) if rs is None: rs = np.random.RandomState() random_dir = rs.standard_normal(np.shape(arg0)) random_dir = random_dir / np.sqrt(np.sum(random_dir * random_dir)) unary_fun = lambda x : fun(arg0 + x * random_dir, *extra_args, **kwargs) numeric_grad = unary_nd(unary_fun, 0.0, eps=eps) analytic_grad = np.sum(grad(fun)(arg0, *extra_args, **kwargs) * random_dir) assert np.allclose(numeric_grad, analytic_grad, rtol=rtol, atol=atol), \ "Check failed! nd={0}, ad={1}".format(numeric_grad, analytic_grad) if verbose: print("Gradient projection OK (numeric grad: {0}, analytic grad: {1})".format( numeric_grad, analytic_grad))
def logsumexp(x): """Numerically stable log(sum(exp(x))), also defined in scipy.misc""" max_x = np.max(x) return max_x + np.log(np.sum(np.exp(x - max_x)))
def gaussian_entropy(log_std): return 0.5 * D * (1.0 + np.log(2 * np.pi)) + np.sum(log_std)
def objective(params): gp_params, latents = unpack_params(params) gp_likelihood = sum([log_marginal_likelihood(gp_params[i], latents, data[:, i]) for i in range(data_dimension)]) latent_prior_likelihood = np.sum(norm.logpdf(latents)) return -gp_likelihood - latent_prior_likelihood
return r, p(r) if __name__ == "__main__": # generate data npr.seed(0) data = negbin_sample(r=5, p=0.5, size=1000) # fit likelihood-extremizing parameters r, p = fit_maxlike(data, r_guess=1) # report fit print('Fit parameters:') print('r={r}, p={p}'.format(r=r, p=p)) print('Check that we are at a local stationary point:') loglike = lambda r, p: np.sum(negbin_loglike(r, p, data)) grad_both = multigrad(loglike, argnums=[0,1]) print(grad_both(r, p)) import matplotlib.pyplot as plt xm = data.max() plt.figure() plt.hist(data, bins=np.arange(xm+1)-0.5, normed=True, label='normed data counts') plt.xlim(0,xm) plt.plot(np.arange(xm), np.exp(negbin_loglike(r, p, np.arange(xm))), label='maxlike fit') plt.xlabel('k') plt.ylabel('p(k)') plt.legend(loc='best') plt.show()
def logprob(weights, inputs, targets): log_prior = np.sum(norm.logpdf(weights, 0, weight_scale)) preds = predictions(weights, inputs) log_lik = np.sum(norm.logpdf(preds, targets, noise_scale)) return log_prior + log_lik
def training_loss(weights): # Training loss is the negative log-likelihood of the training labels. preds = logistic_predictions(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) return -np.sum(np.log(label_probabilities))
def logsumexp(X, axis, keepdims=False): max_X = np.max(X) return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=keepdims))
def logsumexp(X, axis=1): max_X = np.max(X) return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
from __future__ import absolute_import import scipy.stats import autogradwithbay.numpy as np from autogradwithbay.scipy.special import digamma from autogradwithbay.core import primitive rvs = primitive(scipy.stats.dirichlet.rvs) pdf = primitive(scipy.stats.dirichlet.pdf) logpdf = primitive(scipy.stats.dirichlet.logpdf) logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (alpha - 1) / x, argnum=0) logpdf.defgrad(lambda ans, x, alpha: lambda g: g * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1) # Same as log pdf, but multiplied by the pdf (ans). pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (alpha - 1) / x, argnum=0) pdf.defgrad(lambda ans, x, alpha: lambda g: g * ans * (digamma(np.sum(alpha)) - digamma(alpha) + np.log(x)), argnum=1)
gamma = primitive(scipy.special.gamma) gammaln = primitive(scipy.special.gammaln) gammasgn = primitive(scipy.special.gammasgn) rgamma = primitive(scipy.special.rgamma) multigammaln = primitive(scipy.special.multigammaln) gammasgn.defgrad_is_zero() polygamma.defgrad_is_zero(argnums=(0,)) polygamma.defgrad(lambda ans, n, x: lambda g: g * polygamma(n + 1, x), argnum=1) psi.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) digamma.defgrad( lambda ans, x: lambda g: g * polygamma(1, x)) gamma.defgrad( lambda ans, x: lambda g: g * ans * psi(x)) gammaln.defgrad( lambda ans, x: lambda g: g * psi(x)) rgamma.defgrad( lambda ans, x: lambda g: g * psi(x) / -gamma(x)) multigammaln.defgrad(lambda ans, a, d: lambda g: g * np.sum(digamma(np.expand_dims(a, -1) - np.arange(d)/2.), -1)) multigammaln.defgrad_is_zero(argnums=(1,)) ### Bessel functions ### j0 = primitive(scipy.special.j0) y0 = primitive(scipy.special.y0) j1 = primitive(scipy.special.j1) y1 = primitive(scipy.special.y1) jn = primitive(scipy.special.jn) yn = primitive(scipy.special.yn) j0.defgrad(lambda ans, x: lambda g: -g * j1(x)) y0.defgrad(lambda ans, x: lambda g: -g * y1(x)) j1.defgrad(lambda ans, x: lambda g: g * (j0(x) - jn(2, x)) / 2.0)
def to_scalar(x): if isinstance(x, list) or isinstance(x, ListNode) or \ isinstance(x, tuple) or isinstance(x, TupleNode): return sum([to_scalar(item) for item in x]) return np.sum(np.real(np.sin(x)))
def example_func(y): z = y**2 lse = logsumexp(z) return np.sum(lse)
def sum_output(*args, **kwargs): return np.sum(fun(*args, **kwargs))
def loss(W_vect, X, T): log_prior = -L2_reg * np.dot(W_vect, W_vect) log_lik = np.sum(predictions(W_vect, X) * T) return - log_prior - log_lik
def rbf_covariance(kernel_params, x, xp): output_scale = np.exp(kernel_params[0]) lengthscales = np.exp(kernel_params[1:]) diffs = np.expand_dims(x /lengthscales, 1)\ - np.expand_dims(xp/lengthscales, 0) return output_scale * np.exp(-0.5 * np.sum(diffs**2, axis=2))
def logprob(weights, inputs, targets): log_prior = -L2_reg * np.sum(weights**2, axis=1) preds = predictions(weights, inputs) log_lik = -np.sum((preds - targets)**2, axis=1)[:, 0] / noise_variance return log_prior + log_lik