Ejemplo n.º 1
0
    def transition(self, state, params, data, rng):
        s = State()
        s.p = rng.beta(params['alpha'] + sum(state.s == 1), params['beta'] + sum(state.s == 0))
        n = len(data)
        s.s = empty(n)
        for i in range(n):
            p_cluster = empty(2)
            for j in range(2):
                if j==0:
                    p = s.p
                else:
                    p = 1-s.p
                lh = stats.norm(state.mu[j], sqrt(state.var[j])).pdf(data[i])
                prior = p
                p_cluster[j] = lh * prior
            s.s[i] = helpers.discrete_sample(p_cluster)

        s.mu = empty(2)
        for j in range(2):
            n_j = sum(s.s==j)
            mu_prime = params['mu0'] / params['sigma0'] ** 2 + sum(data[s.s == j]) / state.var[j]
            mu_prime /= (1 / params['sigma0'] ** 2 + n_j / state.var[j])
            prec_prime = 1/params['sigma0']**2 + n_j/state.var[j]
            s.mu[j] = rng.normal(mu_prime, sqrt(1/prec_prime))
            assert not isnan(s.mu[j])
        s.omega = state.omega
        s.var = state.var

        return s
Ejemplo n.º 2
0
 def sample_c(self, i, c, params, data, dp_alpha, beta, rng, debug=False):
     c_diff = delete(c, i)
     cluster_ids = unique(c_diff)
     n_clusters = len(cluster_ids)
     p = zeros(n_clusters + 1)
     x = data[i].astype(int)
     alpha_set = []
     beta_set = []
     count_set = []
     for j, cluster_id in enumerate(cluster_ids):
         count = sum(c_diff == cluster_id)
         prior = log(count)
         c_in = (c == cluster_id)
         c_in[i] = False
         alpha = beta + sum(data[c_in] == True, 0)
         beta = beta + sum(data[c_in] == False, 0)
         lh = sum(betaln(alpha + x, beta + (1 - x)) - betaln(alpha, beta))
         p[j] = lh + prior
     prior = log(dp_alpha)
     lh = sum(betaln(beta + x, beta - x + 1) - betaln(beta, beta))
     p[-1] = prior + lh
     if debug:
         p_conv = exp(p)
         p_conv /= sum(p_conv)
     idx = helpers.discrete_sample(p, rng=rng, log_mode=True)
     if idx == len(p) - 1:
         c_return = cluster_ids[-1] + 1
     else:
         c_return = cluster_ids[idx]
     if debug:
         return c_return, (p_conv, alpha_set, beta_set, x, count_set)
     else:
         return c_return
Ejemplo n.º 3
0
    def load(self, params, rng):
        """
        Loads the latent variables and data

        Expected parameter keys:

        n_points
         Number of points in the dataset

        clusters
         A list of clusters of type :py:class:`Cluster`

        weights
         A list of mixing weights for each cluster in *clusters*
        """
        try:
            n_points = params['n_points']
            clusters = params['clusters']
            weights = asarray(params['weights'])
            self.clusters = clusters
        except KeyError as error:
            raise ParameterException("Required finite mixture parameter not passed in: %r" % error)
        dim = clusters[0].dim
        self.c = helpers.discrete_sample(weights, n_points, rng)
        data = empty((n_points, dim))
        for i, cluster in enumerate(clusters):
            idx = self.c == i
            n_in_cluster = int(sum(idx))
            data[idx] = cluster.sample_points(n_in_cluster, rng)
        return data
Ejemplo n.º 4
0
    def transition(self, state, params, data, rng):
        s = State()
        s.p = rng.beta(params['alpha'] + sum(state.s == 1),
                       params['beta'] + sum(state.s == 0))
        n = len(data)
        s.s = empty(n)
        for i in range(n):
            p_cluster = empty(2)
            for j in range(2):
                if j == 0:
                    p = s.p
                else:
                    p = 1 - s.p
                lh = stats.norm(state.mu[j], sqrt(state.var[j])).pdf(data[i])
                prior = p
                p_cluster[j] = lh * prior
            s.s[i] = helpers.discrete_sample(p_cluster)

        s.mu = empty(2)
        for j in range(2):
            n_j = sum(s.s == j)
            mu_prime = params['mu0'] / params['sigma0']**2 + sum(
                data[s.s == j]) / state.var[j]
            mu_prime /= (1 / params['sigma0']**2 + n_j / state.var[j])
            prec_prime = 1 / params['sigma0']**2 + n_j / state.var[j]
            s.mu[j] = rng.normal(mu_prime, sqrt(1 / prec_prime))
            assert not isnan(s.mu[j])
        s.omega = state.omega
        s.var = state.var

        return s
Ejemplo n.º 5
0
 def sample_c(self, i, c, params, data, dp_alpha, beta, rng, debug=False):
     c_diff = delete(c, i)
     cluster_ids = unique(c_diff)
     n_clusters = len(cluster_ids)
     p = zeros(n_clusters + 1)
     x = data[i].astype(int)
     alpha_set = []
     beta_set = []
     count_set = []
     for j, cluster_id in enumerate(cluster_ids):
         count = sum(c_diff == cluster_id)
         prior = log(count)
         c_in = (c == cluster_id)
         c_in[i] = False
         alpha = beta + sum(data[c_in] == True, 0)
         beta = beta + sum(data[c_in] == False, 0)
         lh = sum(betaln(alpha + x, beta + (1 - x)) - betaln(alpha, beta))
         p[j] = lh + prior
     prior = log(dp_alpha)
     lh = sum(betaln(beta + x, beta - x + 1) - betaln(beta, beta))
     p[-1] = prior + lh
     if debug:
         p_conv = exp(p)
         p_conv /= sum(p_conv)
     idx = helpers.discrete_sample(p, rng=rng, log_mode=True)
     if idx == len(p) - 1:
         c_return = cluster_ids[-1] + 1
     else:
         c_return = cluster_ids[idx]
     if debug:
         return c_return, (p_conv, alpha_set, beta_set, x, count_set)
     else:
         return c_return
Ejemplo n.º 6
0
def test_discrete_sample():
    from helpers import discrete_sample

    w = asarray([3, 6, 2], 'd')

    def bin_samples(samples):
        w_sampled = bincount(samples, minlength=len(w)) / len(samples)
        return w_sampled

    r = w / sum(w)
    rng = random.RandomState(0)
    samples = discrete_sample(w, 1e5, rng=rng)
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(log(w), 1e5, rng=rng, log_mode=True)
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(w, 1e5, rng, temperature=10000)
    r = repeat(1 / len(w), len(w))
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(w, 1e5, rng, temperature=.01)
    w_sampled = bin_samples(samples)
    r = zeros_like(w)
    r[argmax(w)] = 1
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)
Ejemplo n.º 7
0
def test_discrete_sample():
    from helpers import discrete_sample

    w = asarray([3, 6, 2], 'd')

    def bin_samples(samples):
        w_sampled = bincount(samples, minlength=len(w)) / len(samples)
        return w_sampled

    r = w / sum(w)
    rng = random.RandomState(0)
    samples = discrete_sample(w, 1e5, rng=rng)
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(log(w), 1e5, rng=rng, log_mode=True)
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(w, 1e5, rng, temperature=10000)
    r = repeat(1 / len(w), len(w))
    w_sampled = bin_samples(samples)
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)

    samples = discrete_sample(w, 1e5, rng, temperature=.01)
    w_sampled = bin_samples(samples)
    r = zeros_like(w)
    r[argmax(w)] = 1
    delta = .05
    assert_almost_equal(w_sampled[0], r[0], delta=delta)
    assert_almost_equal(w_sampled[1], r[1], delta=delta)
    assert_almost_equal(w_sampled[2], r[2], delta=delta)
Ejemplo n.º 8
0
    def sample_alpha(self, state, params, n, rng):
        n_clusters = len(unique(state.c))

        def calc_alpha_llh(alpha):
            prior = stats.gamma.logpdf(alpha, params['alpha_shape'], scale=params['alpha_scale'])
            lh = gammaln(alpha) + n_clusters * log(alpha) - gammaln(alpha + n)
            return prior + lh

        grid = linspace(.1, 10, 1000)
        alpha_llh = calc_alpha_llh(grid)
        alpha = grid[helpers.discrete_sample(alpha_llh, rng=rng, log_mode=True)]
        return alpha
Ejemplo n.º 9
0
    def sample_alpha(self, state, params, n, rng):
        n_clusters = len(unique(state.c))

        def calc_alpha_llh(alpha):
            prior = stats.gamma.logpdf(alpha,
                                       params['alpha_shape'],
                                       scale=params['alpha_scale'])
            lh = gammaln(alpha) + n_clusters * log(alpha) - gammaln(alpha + n)
            return prior + lh

        grid = linspace(.1, 10, 1000)
        alpha_llh = calc_alpha_llh(grid)
        alpha = grid[helpers.discrete_sample(alpha_llh, rng=rng,
                                             log_mode=True)]
        return alpha
Ejemplo n.º 10
0
 def sample_latent(self, params, data_params, rng):
     s = State()
     s.alpha = rng.gamma(params['alpha_shape'], scale=params['alpha_scale'])
     n = data_params['n']
     c = zeros(n, int)
     for i in range(1, n):
         c_before = c[:i]
         cluster_ids = unique(c_before)
         p = zeros(len(cluster_ids) + 1)
         for j, cluster_id in enumerate(cluster_ids):
             p[j] = sum(cluster_ids == cluster_id)
         p[-1] = s.alpha
         c[i] = helpers.discrete_sample(p, rng=rng)
     s.c = c
     return s
Ejemplo n.º 11
0
 def sample_latent(self, params, data_params, rng):
     s = State()
     s.alpha = rng.gamma(params['alpha_shape'], scale=params['alpha_scale'])
     n = data_params['n']
     c = zeros(n, int)
     for i in range(1, n):
         c_before = c[:i]
         cluster_ids = unique(c_before)
         p = zeros(len(cluster_ids) + 1)
         for j, cluster_id in enumerate(cluster_ids):
             p[j] = sum(cluster_ids == cluster_id)
         p[-1] = s.alpha
         c[i] = helpers.discrete_sample(p, rng=rng)
     s.c = c
     return s