def transition(self, state, params, data, rng): s = State() s.p = rng.beta(params['alpha'] + sum(state.s == 1), params['beta'] + sum(state.s == 0)) n = len(data) s.s = empty(n) for i in range(n): p_cluster = empty(2) for j in range(2): if j==0: p = s.p else: p = 1-s.p lh = stats.norm(state.mu[j], sqrt(state.var[j])).pdf(data[i]) prior = p p_cluster[j] = lh * prior s.s[i] = helpers.discrete_sample(p_cluster) s.mu = empty(2) for j in range(2): n_j = sum(s.s==j) mu_prime = params['mu0'] / params['sigma0'] ** 2 + sum(data[s.s == j]) / state.var[j] mu_prime /= (1 / params['sigma0'] ** 2 + n_j / state.var[j]) prec_prime = 1/params['sigma0']**2 + n_j/state.var[j] s.mu[j] = rng.normal(mu_prime, sqrt(1/prec_prime)) assert not isnan(s.mu[j]) s.omega = state.omega s.var = state.var return s
def sample_c(self, i, c, params, data, dp_alpha, beta, rng, debug=False): c_diff = delete(c, i) cluster_ids = unique(c_diff) n_clusters = len(cluster_ids) p = zeros(n_clusters + 1) x = data[i].astype(int) alpha_set = [] beta_set = [] count_set = [] for j, cluster_id in enumerate(cluster_ids): count = sum(c_diff == cluster_id) prior = log(count) c_in = (c == cluster_id) c_in[i] = False alpha = beta + sum(data[c_in] == True, 0) beta = beta + sum(data[c_in] == False, 0) lh = sum(betaln(alpha + x, beta + (1 - x)) - betaln(alpha, beta)) p[j] = lh + prior prior = log(dp_alpha) lh = sum(betaln(beta + x, beta - x + 1) - betaln(beta, beta)) p[-1] = prior + lh if debug: p_conv = exp(p) p_conv /= sum(p_conv) idx = helpers.discrete_sample(p, rng=rng, log_mode=True) if idx == len(p) - 1: c_return = cluster_ids[-1] + 1 else: c_return = cluster_ids[idx] if debug: return c_return, (p_conv, alpha_set, beta_set, x, count_set) else: return c_return
def load(self, params, rng): """ Loads the latent variables and data Expected parameter keys: n_points Number of points in the dataset clusters A list of clusters of type :py:class:`Cluster` weights A list of mixing weights for each cluster in *clusters* """ try: n_points = params['n_points'] clusters = params['clusters'] weights = asarray(params['weights']) self.clusters = clusters except KeyError as error: raise ParameterException("Required finite mixture parameter not passed in: %r" % error) dim = clusters[0].dim self.c = helpers.discrete_sample(weights, n_points, rng) data = empty((n_points, dim)) for i, cluster in enumerate(clusters): idx = self.c == i n_in_cluster = int(sum(idx)) data[idx] = cluster.sample_points(n_in_cluster, rng) return data
def transition(self, state, params, data, rng): s = State() s.p = rng.beta(params['alpha'] + sum(state.s == 1), params['beta'] + sum(state.s == 0)) n = len(data) s.s = empty(n) for i in range(n): p_cluster = empty(2) for j in range(2): if j == 0: p = s.p else: p = 1 - s.p lh = stats.norm(state.mu[j], sqrt(state.var[j])).pdf(data[i]) prior = p p_cluster[j] = lh * prior s.s[i] = helpers.discrete_sample(p_cluster) s.mu = empty(2) for j in range(2): n_j = sum(s.s == j) mu_prime = params['mu0'] / params['sigma0']**2 + sum( data[s.s == j]) / state.var[j] mu_prime /= (1 / params['sigma0']**2 + n_j / state.var[j]) prec_prime = 1 / params['sigma0']**2 + n_j / state.var[j] s.mu[j] = rng.normal(mu_prime, sqrt(1 / prec_prime)) assert not isnan(s.mu[j]) s.omega = state.omega s.var = state.var return s
def test_discrete_sample(): from helpers import discrete_sample w = asarray([3, 6, 2], 'd') def bin_samples(samples): w_sampled = bincount(samples, minlength=len(w)) / len(samples) return w_sampled r = w / sum(w) rng = random.RandomState(0) samples = discrete_sample(w, 1e5, rng=rng) w_sampled = bin_samples(samples) delta = .05 assert_almost_equal(w_sampled[0], r[0], delta=delta) assert_almost_equal(w_sampled[1], r[1], delta=delta) assert_almost_equal(w_sampled[2], r[2], delta=delta) samples = discrete_sample(log(w), 1e5, rng=rng, log_mode=True) w_sampled = bin_samples(samples) delta = .05 assert_almost_equal(w_sampled[0], r[0], delta=delta) assert_almost_equal(w_sampled[1], r[1], delta=delta) assert_almost_equal(w_sampled[2], r[2], delta=delta) samples = discrete_sample(w, 1e5, rng, temperature=10000) r = repeat(1 / len(w), len(w)) w_sampled = bin_samples(samples) delta = .05 assert_almost_equal(w_sampled[0], r[0], delta=delta) assert_almost_equal(w_sampled[1], r[1], delta=delta) assert_almost_equal(w_sampled[2], r[2], delta=delta) samples = discrete_sample(w, 1e5, rng, temperature=.01) w_sampled = bin_samples(samples) r = zeros_like(w) r[argmax(w)] = 1 delta = .05 assert_almost_equal(w_sampled[0], r[0], delta=delta) assert_almost_equal(w_sampled[1], r[1], delta=delta) assert_almost_equal(w_sampled[2], r[2], delta=delta)
def sample_alpha(self, state, params, n, rng): n_clusters = len(unique(state.c)) def calc_alpha_llh(alpha): prior = stats.gamma.logpdf(alpha, params['alpha_shape'], scale=params['alpha_scale']) lh = gammaln(alpha) + n_clusters * log(alpha) - gammaln(alpha + n) return prior + lh grid = linspace(.1, 10, 1000) alpha_llh = calc_alpha_llh(grid) alpha = grid[helpers.discrete_sample(alpha_llh, rng=rng, log_mode=True)] return alpha
def sample_latent(self, params, data_params, rng): s = State() s.alpha = rng.gamma(params['alpha_shape'], scale=params['alpha_scale']) n = data_params['n'] c = zeros(n, int) for i in range(1, n): c_before = c[:i] cluster_ids = unique(c_before) p = zeros(len(cluster_ids) + 1) for j, cluster_id in enumerate(cluster_ids): p[j] = sum(cluster_ids == cluster_id) p[-1] = s.alpha c[i] = helpers.discrete_sample(p, rng=rng) s.c = c return s