Ejemplo n.º 1
0
def update_params_ExponentialNode(node, X, rand_gen, gamma_prior):
    """
    The prior over the rate parameter is a Gamma

    p(\lambda) = Gamma(\alpha_0=a_0, \beta_0=b_0)

    see[1]

    [1] - https: // en.wikipedia.org / wiki / Conjugate_prior

    p(\lambda|X) = Gamma(\alpha_n=a_n, \beta_n=b_n)

    see[1]
    """

    assert isinstance(gamma_prior, PriorGamma)

    N = len(X)

    #
    # updating posterior parameters
    sum_x = X.sum()
    a_n = gamma_prior.a_0 + N
    b_n = gamma_prior.b_0 + sum_x

    #
    # sampling
    lambda_sam = sample_parametric_node(Gamma(a_n, b_n), 1, None, rand_gen)
    lambda_sam = lambda_sam  # / b_n

    #
    # updating params
    node.l = lambda_sam[0]
Ejemplo n.º 2
0
 def update_params(generator, ll_node, prior, n=1000000):
     ll_node.row_ids = list(range(n))
     ll_node.scope = [0]
     X = sample_parametric_node(generator, n, RandomState(1234)).reshape(-1, 1)
     update_parametric_parameters_posterior(ll_node, X, RandomState(1234), prior)
     print("expected", generator.params, "found", ll_node.params)
     return generator, ll_node
Ejemplo n.º 3
0
    def assert_correct_node_sampling_continuous(self, node, samples, plot):
        node.scope = [0]
        rand_gen = np.random.RandomState(1234)
        samples_gen = sample_parametric_node(node, 1000000, rand_gen)

        if plot:
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(1, 1)

            x = np.linspace(np.min(samples), np.max(samples), 1000)
            ax.plot(x, likelihood(node, x.reshape(-1, 1)), 'r-', lw=2, alpha=0.6,
                    label=node.__class__.__name__ + ' pdf')
            ax.hist(samples, normed=True, histtype='stepfilled', alpha=0.7, bins=1000)
            ax.legend(loc='best', frameon=False)
            plt.show()

        scipy_obj, params = get_scipy_obj_params(node)
        # H_0 dist are identical
        test_outside_samples = kstest(samples, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_outside_samples.pvalue, 0.05)

        test_generated_samples = kstest(samples_gen, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
Ejemplo n.º 4
0
def update_params_GaussianNode2(node, X, rand_gen, nig_prior):
    """
    The prior over parameters is a Normal - Inverse - Gamma(NIG)


    [1] - Murphy K., Conjugate Bayesian analysis of the Gaussian distribution(2007)
          https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf
          https://en.wikipedia.org/wiki/Conjugate_prior
          http://thaines.com/content/misc/gaussian_conjugate_prior_cheat_sheet.pdf
          ** http://homepages.math.uic.edu/~rgmartin/Teaching/Stat591/Bayes/Notes/591_gibbs.pdf
          ** https://people.eecs.berkeley.edu/~jordan/courses/260-spring10/lectures/lecture5.pdf

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a NIG

    p(\mu, \sigma ^ 2, | X) = NIG(m_n, V_n, a_n, b_n)

    see[1]
    """

    assert isinstance(nig_prior, PriorNormalInverseGamma), nig_prior

    n = len(X)
    X_hat = np.mean(X)

    mean = (nig_prior.V_0 * nig_prior.m_0 + n * X_hat) / (nig_prior.V_0 + n)

    v = nig_prior.V_0 + n

    a = nig_prior.a_0 + n / 2

    b = nig_prior.b_0 + (n / 2) * (np.var(X) +
                                   (v / (v + n)) * np.power(X_hat - mean, 2))

    inv_sigma2_sam = sample_parametric_node(Gamma(a, b), 1, rand_gen)

    sigma2_sam = 1 / inv_sigma2_sam

    mu_sam = sample_parametric_node(Gaussian(mean, sigma2_sam / v), 1,
                                    rand_gen)

    # updating params
    node.mean = mu_sam[0]
    # node.stdev = np.sqrt(node.variance)
    node.stdev = np.sqrt(sigma2_sam)[0]
Ejemplo n.º 5
0
def draw_params_gamma_prior(gamma_prior, defaults, rand_gen):

    rate_sam = sample_parametric_node(Gamma(gamma_prior.a_0, gamma_prior.b_0),
                                      1, rand_gen)

    #
    # updating params (only scale)
    beta = rate_sam[0]
    return {'alpha': defaults['alpha'], 'beta': beta}
Ejemplo n.º 6
0
def update_params_LogNormalFixVarNode(node, X, rand_gen, normal_prior):
    """
    The prior over \mu is a Normal distribution

    p(\mu) = Normal(mu_0, tau_0)

    with mean mu_0 and precision(inverse variance) tau_0

    see[1]

    [1] - https: // en.wikipedia.org / wiki / Conjugate_prior

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a Normal

    p(\mu, | X) = Normal(mu_n, tau_n)

    see[1]
    """

    assert isinstance(normal_prior, PriorNormal)

    N = len(X)

    #
    # if N is 0, then it would be like sampling from the prior
    tau_n = normal_prior.tau_0 + N * node.precision

    #
    # x = X[node.row_ids, node.scope]
    log_sum_x = np.log(X).sum() if N > 0 else 0
    mu_n = (log_sum_x * node.precision +
            normal_prior.tau_0 * normal_prior.mu_0) / tau_n
    sum_x = X.sum()
    # mu_n = (sum_x * node.precision + node.tau_0 * node.mu_0) / tau_n

    #
    # sampling
    # TODO, optimize it with numba
    std_n = 1.0 / np.sqrt(tau_n)
    # print('STDN', std_n, tau_n, mu_n, log_sum_x)

    mu_sam = sample_parametric_node(Gaussian(mu_n, std_n), 1, rand_gen)
    # print('STDN', std_n, tau_n, mu_n, sum_x, np.log(mu_sam), mu_sam)
    #
    # updating params (only mean)
    node.mean = mu_sam[0]
Ejemplo n.º 7
0
def update_params_GammaFixAlphaNode(node, X, rand_gen, gamma_prior):
    """
    The prior over \beta is again a Gamma distribution

    p(\beta) = Gamma(a_0, b_0)

    with shape \alpha_0 = a_0 and rate \beta_0 = b_0

    see[1], eq. (52 - 54), considering the inverse of the scale, the rate \frac{1}{\beta}
        and [2]

    [1] - Fink, D. A Compendium of Conjugate Priors(1997)
          https: // www.johndcook.com / CompendiumOfConjugatePriors.pdf
    [2] - https: // en.wikipedia.org / wiki / Conjugate_prior

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a Gamma

    p(\beta, | X) = Gamma(a_n, b_n)

    see[1, 2]
    """

    assert isinstance(gamma_prior, PriorGamma)

    N = len(X)

    #
    # if N is 0, then it would be like sampling from the prior
    # a_n = a_0 + N * alpha
    a_n = gamma_prior.a_0 + N * node.alpha
    # logger.info(a_n, gamma_prior.a_0, N, node.alpha)

    #
    # x = X[node.row_ids, node.scope]
    sum_x = X.sum()
    b_n = gamma_prior.b_0 + sum_x

    #
    # sampling
    # TODO, optimize it with numba
    rate_sam = sample_parametric_node(Gamma(a_n, b_n), 1, None, rand_gen)

    #
    # updating params (only scale)
    node.beta = rate_sam[0]
Ejemplo n.º 8
0
def draw_params_gaussian_prior(nig_prior, rand_gen):
    sigma2_sam = scipy.stats.invgamma.rvs(
        a=nig_prior.a_0,
        size=1,
        # scale=1.0 / b_n,
        random_state=rand_gen)
    sigma2_sam = sigma2_sam * nig_prior.b_0
    std_n = np.sqrt(sigma2_sam * nig_prior.V_0)
    mu_sam = sample_parametric_node(Gaussian(nig_prior.m_0, std_n), 1,
                                    rand_gen)
    # print('sigm', sigma2_sam, 'std_n', std_n, 'v_n', V_n, mu_sam, m_n)

    #
    # updating params
    mean = mu_sam[0]
    # node.stdev = np.sqrt(node.variance)
    stdev = np.sqrt(sigma2_sam)[0]

    return {'mean': mean, 'stdev': stdev}
Ejemplo n.º 9
0
    def assert_correct_node_sampling_discrete(self, node, samples, plot):
        node.scope = [0]
        rand_gen = np.random.RandomState(1234)
        samples_gen = sample_parametric_node(node, 1000000, rand_gen)

        fvals, fobs = np.unique(samples, return_counts=True)

        # H_0 data comes from same dist
        test_outside_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0])
        # reject H_0 (data comes from dist) if p < 0.05
        # we pass the test if they come from the dist, pass if p >= 0.05
        self.assertGreaterEqual(test_outside_samples.pvalue, 0.05)

        fvals, fobs = np.unique(samples_gen, return_counts=True)

        test_generated_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0])
        # reject H_0 (data comes from dist) if p < 0.05
        # we pass the test if they come from the dist, pass if p >= 0.05
        self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
Ejemplo n.º 10
0
    def _sample_instances(node, row_ids):
        if len(row_ids) == 0:
            return
        node.row_ids = row_ids

        if isinstance(node, Product):
            for c in node.children:
                _sample_instances(c, row_ids)
            return

        if isinstance(node, Sum):
            w_children_log_probs = np.zeros((len(row_ids), len(node.weights)))
            for i, c in enumerate(node.children):
                w_children_log_probs[:, i] = np.log(node.weights[i])

            z_gumbels = rand_gen.gumbel(loc=0,
                                        scale=1,
                                        size=(w_children_log_probs.shape[0],
                                              w_children_log_probs.shape[1]))
            g_children_log_probs = w_children_log_probs + z_gumbels
            rand_child_branches = np.argmax(g_children_log_probs, axis=1)

            for i, c in enumerate(node.children):
                new_row_ids = row_ids[rand_child_branches == i]
                node.edge_counts[i] = len(new_row_ids)
                _sample_instances(c, new_row_ids)

                if return_Zs:
                    Z[new_row_ids, Z_id_map[node.id]] = i

        if isinstance(node, Leaf):
            #
            # sample from leaf
            X[row_ids,
              node.scope] = sample_parametric_node(node,
                                                   n_samples=len(row_ids),
                                                   rand_gen=rand_gen)
            if return_partition:
                P[row_ids, node.scope] = node.id

            return
Ejemplo n.º 11
0
def update_params_PoissonNode(node, X, rand_gen, gamma_prior):
    """
    The prior over \lambda is a Gamma distribution

    p(\lambda) = Gamma(a_0, b_0)

    with shape \alpha_0 = a_0 and scale \beta_0 = b_0

    see[1]

    [1] - https: // en.wikipedia.org / wiki / Conjugate_prior

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a Gamma

    p(\lambda, | X) = Gamma(a_n, b_n)

    see[1]
    """

    assert isinstance(gamma_prior, PriorGamma)

    N = len(X)

    #
    # if N is 0, then it would be like sampling from the prior
    # x = X[node.row_ids, node.scope]
    sum_x = X.sum()
    a_n = gamma_prior.a_0 + sum_x
    b_n = gamma_prior.b_0 + N

    #
    # sampling
    # TODO, optimize it with numba
    lambda_sam = sample_parametric_node(Gamma(a_n, b_n), 1, None, rand_gen)
    lambda_sam = lambda_sam  # / b_n

    #
    # updating params
    node.mean = lambda_sam[0]
Ejemplo n.º 12
0
def update_params_GaussianNode(node, X, rand_gen, nig_prior):
    """
    The prior over parameters is a Normal - Inverse - Gamma(NIG)

    p(\mu, \sigma ^ 2) = NIG(m_0, V_0, a_0, b_0) =
                     = N(\mu | m_0, \sigma ^ {2}V_0)IG(\sigma ^ {2} | a_0, b_0)

    see[1], eq. 190 - 191

    [1] - Murphy K., Conjugate Bayesian analysis of the Gaussian distribution(2007)
          https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a NIG

    p(\mu, \sigma ^ 2, | X) = NIG(m_n, V_n, a_n, b_n)

    see[1]
    """

    assert isinstance(nig_prior, PriorNormalInverseGamma), nig_prior

    N = len(X)

    # N = len(node.row_ids)

    # eq (197)
    inv_V_0 = 1.0 / nig_prior.V_0
    inv_V_n = inv_V_0 + N
    V_n = 1 / inv_V_n

    # eq (198), just switching from avg to sum to prevent nans in numpy
    # when there are no instances assigned, it should be like sampling from the prior
    # x = X[node.row_ids, node.scope]
    # avg_x = x.mean()
    # m_n = (inv_V_0 * node.m_0 + N * avg_x) * V_n
    sum_x = X.sum()
    avg_x = sum_x / N if N else 0
    m_n = (inv_V_0 * nig_prior.m_0 + sum_x) * V_n

    # eq (199)
    # inv_V_n = 1.0 / V_n
    a_n = nig_prior.a_0 + N / 2
    # mu_n_hat = - m_n * m_n * inv_V_na
    # b_n = node.b_0 + (node.m_0 * node.m_0 * inv_V_0 +
    #                   np.dot(x, x) - m_n * m_n * inv_V_n
    #                   # (x * x - mu_n_hat).sum()
    #                   ) / 2
    b_n = (
        nig_prior.b_0
        + (np.dot(X - avg_x, X - avg_x) + (N * inv_V_0 * (avg_x - nig_prior.m_0) * (avg_x - nig_prior.m_0)) * V_n) / 2
    )

    #
    # sampling
    # first sample the variance from IG, then the mean from a N
    # see eq (191) and
    # TODO, optimize it with numba
    sigma2_sam = scipy.stats.invgamma.rvs(
        a=a_n,
        size=1,
        # scale=1.0 / b_n,
        random_state=rand_gen,
    )
    sigma2_sam = sigma2_sam * b_n
    std_n = np.sqrt(sigma2_sam * V_n)
    mu_sam = sample_parametric_node(Gaussian(m_n, std_n), 1, None, rand_gen)
    # logger.info('sigm', sigma2_sam, 'std_n', std_n, 'v_n', V_n, mu_sam, m_n)

    #
    # updating params
    node.mean = mu_sam[0]
    # node.stdev = np.sqrt(node.variance)
    node.stdev = np.sqrt(sigma2_sam)[0]
Ejemplo n.º 13
0
def draw_params_poisson_prior(gamma_prior, rand_gen):
    lambda_sam = sample_parametric_node(
        Gamma(gamma_prior.a_0, gamma_prior.b_0), 1, rand_gen)
    return {'mean': lambda_sam}
Ejemplo n.º 14
0
def draw_params_exponential_prior(gamma_prior, rand_gen):
    lambda_sam = sample_parametric_node(
        Gamma(gamma_prior.a_0, gamma_prior.b_0), 1, rand_gen)
    return {'l': lambda_sam}
Ejemplo n.º 15
0
 def test_sample_categorical_dict(self):
     rand_gen = np.random.RandomState(1234)
     node = CategoricalDictionary(p={3: 0.3, 5: 0.7}, scope=0)
     samples_gen = sample_parametric_node(node, 10, None, rand_gen)
     print(samples_gen)