def _single_gmm(): """Returns a mixture of gaussian applicant distributions.""" return distributions.Mixture( components=[ ApplicantDistribution( features=distributions.Gaussian(mean=mean, std=0.5), group_membership=distributions.Constant(group), will_default=distributions.Bernoulli(p=default_likelihoods[0])), ApplicantDistribution( features=distributions.Gaussian( mean=np.array(mean) + np.array(intercluster_vec), std=0.5), group_membership=distributions.Constant(group), will_default=distributions.Bernoulli(p=default_likelihoods[1])) ], weights=[0.3, 0.7])
def test_gaussian_has_right_mean_std(self): my_distribution = distributions.Gaussian(mean=[0, 0, 1], std=0.1) rng = np.random.RandomState(seed=100) samples = [my_distribution.sample(rng) for _ in range(1000)] self.assertLess( np.linalg.norm(np.mean(samples, 0) - np.array([0, 0, 1])), 0.1) self.assertLess( np.linalg.norm(np.std(samples, 0) - np.array([0.1, 0.1, 0.1])), 0.1)
def p_x_fn(self, z_above: nd.NDArray, weight: nd.NDArray, bias: nd.NDArray = None) -> distributions.BaseDistribution: # z_above: [n_samples, batch_size, size_above] # weight: [size_above, data_size] if self.data_distribution == 'gaussian': params = nd.dot(z_above, weight) + bias variance = nd.ones_like(params) return distributions.Gaussian(params, variance) elif self.data_distribution == 'bernoulli': params = nd.dot(z_above, weight) + bias return distributions.Bernoulli(logits=params) elif self.data_distribution == 'poisson': # minimum intercept is 0.01 return distributions.Poisson( 0.01 + nd.dot(z_above, util.softplus(weight))) else: raise ValueError('Incompatible data distribution: %s' % self.data_distribution)
def __init__(self, sdim=None, udim=None, weights=None, sigma=None, filename=None, *args, **kwargs): if filename is not None: self.load(filename) self.compile() return self.sdim = sdim self.udim = udim self.dist = distributions.Gaussian(sigma=sigma) self.weights = self.random_weights( ) if weights is None else BlockyVector(weights) self.compile()
if monitor: monitor_vals.append(monitor(A, obs_distr)) Tracer()() return seq, A, obs_distr, ll_test, monitor_vals if __name__ == '__main__': X = np.loadtxt('EMGaussian.data') Xtest = np.loadtxt('EMGaussian.test') K = 4 # Run simple EM (no HMM) iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) new_centers = [distributions.Gaussian(c.mean, np.eye(2)) \ for c in centers] tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = \ em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) # example with fixed parameters A = 1. / 6 * np.ones((K, K)) A[np.diag(np.ones(K)) == 1] = 0.5 lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr) log_p = smoothing(lalpha, lbeta) p = np.exp(log_p) def plot_traj(p): plt.figure() ind = np.arange(100)
import numpy as onp import json_tricks as json import utils import stein import kernels import distributions import models import config as cfg key = random.PRNGKey(0) # Poorly conditioned Gaussian d = 50 variances = jnp.logspace(-5, 0, num=d) target = distributions.Gaussian(jnp.zeros(d), variances) proposal = distributions.Gaussian(jnp.zeros(d), jnp.ones(d)) @partial(jit, static_argnums=1) def get_sd(samples, fun): """Compute SD(samples, p) given witness function fun""" return stein.stein_discrepancy(samples, target.logpdf, fun) def kl_gradient(x): """Optimal witness function.""" return grad(lambda x: target.logpdf(x) - proposal.logpdf(x))(x) print("Computing theoretically optimal Stein discrepancy...")
import distributions import gen_data import hmm import hsmm import numpy as np import matplotlib.pyplot as plt import sys if __name__ == '__main__': if sys.argv[1] == 'HMM': # HMM K = 2 pi = np.array([0.3, 0.7]) A = np.array([[0.1, 0.9], [0.2, 0.8]]) obs_distr = [ distributions.Gaussian(np.array([3., 0.]), np.array([[2., 1.], [1., 4.]])), distributions.Gaussian(np.array([-2., 3.]), np.array([[3., -1.], [-1., 2.]])) ] seq, X = gen_data.gen_hmm(pi, A, obs_distr, 10000) seq_test, Xtest = gen_data.gen_hmm(pi, A, obs_distr, 1000) init_pi = np.ones(K) / K init_obs_distr = [ distributions.Gaussian(np.array([1., 0.]), np.eye(2)), distributions.Gaussian(np.array([0., 1.]), np.eye(2)) ] # HMM print 'HMM - batch EM'
Xtest = np.loadtxt('EMGaussian.test') K = 4 iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) for k in range(K): centers[k].sigma2 = 1. # Isotropic tau, obs_distr, pi, ll_train_iso, ll_test_iso = \ em(X, centers, assignments, n_iter=iterations, Xtest=Xtest) plot_em(X, tau, obs_distr, contours=True) plt.title('EM with covariance matrices proportional to identity') # General new_centers = [distributions.Gaussian(c.mean, c.sigma2*np.eye(2)) \ for c in centers] tau, obs_distr, pi, ll_train_gen, ll_test_gen = \ em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) plot_em(X, tau, obs_distr, contours=True) plt.title('EM with general covariance matrices') # log-likelihood plot plt.figure() plt.plot(ll_train_iso, label='isotropic, training') plt.plot(ll_test_iso, label='isotropic, test') plt.plot(ll_train_gen, label='general, training') plt.plot(ll_test_gen, label='general, test') plt.xlabel('iterations') plt.ylabel('log-likelihood') plt.title('Comparison of learning curves')