예제 #1
0
def extend():
    import numpy as np
    from spn.structure.leaves.parametric.Parametric import Leaf

    class Pareto(Leaf):
        def __init__(self, a, scope=None):
            Leaf.__init__(self, scope=scope)
            self.a = a

    def pareto_likelihood(node, data=None, dtype=np.float64):
        probs = np.ones((data.shape[0], 1), dtype=dtype)
        from scipy.stats import pareto

        probs[:] = pareto.pdf(data[:, node.scope], node.a)
        return probs

    from spn.algorithms.Inference import add_node_likelihood

    add_node_likelihood(Pareto, pareto_likelihood)

    spn = 0.3 * Pareto(2.0, scope=0) + 0.7 * Pareto(3.0, scope=0)

    from spn.algorithms.Inference import log_likelihood

    print("pareto", log_likelihood(spn, np.array([1.5]).reshape(-1, 1)))
예제 #2
0
    def test_hierarchical_sum_multiple_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic computations in a hierarchy
        spn = 0.3 * (0.2 * Leaf(scope=[0, 1]) + 0.8 * Leaf(scope=[0, 1])) + 0.7 * (
            0.4 * Leaf(scope=[0, 1]) + 0.6 * Leaf(scope=[0, 1])
        )
        data = np.random.rand(10, 3)
        self.assert_correct(spn, data, data[:, 0] * data[:, 1])

        add_node_likelihood(Leaf, multiply_ll)

        # test different node contributions
        spn = 0.3 * (0.2 * Leaf(scope=[0, 1]) + 0.8 * Leaf(scope=[0, 1])) + 0.7 * (
            0.4 * Leaf(scope=[0, 1]) + 0.6 * Leaf(scope=[0, 1])
        )

        spn.children[0].children[0].multiplier = 2
        spn.children[0].children[1].multiplier = 3
        spn.children[1].children[0].multiplier = 4
        spn.children[1].children[1].multiplier = 5
        data = np.random.rand(10, 3)
        dprod = data[:, 0] * data[:, 1]
        r = 0.3 * (0.2 * 2 * dprod + 0.8 * 3 * dprod) + 0.7 * (0.4 * 4 * dprod + 0.6 * 5 * dprod)
        self.assert_correct(spn, data, r)
예제 #3
0
    def test_sum_one_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test that we get basic computations right
        spn = 0.5 * Leaf(scope=0) + 0.5 * Leaf(scope=0)
        data = np.random.rand(10, 1)
        self.assert_correct(spn, data, data)

        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 1)
        self.assert_correct(spn, data, data)

        # test that we can pass whatever dataset, and the scopes are being respected
        # this is important for inner nodes
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 3)
        r = 0.1 * data[:, 0] + 0.9 * data[:, 0]
        r = r.reshape(-1, 1)
        self.assert_correct(spn, data, r)

        # test that it fails if the weights are not normalized
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        spn.weights[1] = 0.2
        data = np.random.rand(10, 3)
        with self.assertRaises(AssertionError):
            l = likelihood(spn, data)
        with self.assertRaises(AssertionError):
            log_likelihood(spn, data)

        # test the log space
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 3)
        r = 0.1 * data[:, 0] + 0.9 * data[:, 0]
        r = r.reshape(-1, 1)
        self.assert_correct(spn, data, r)
예제 #4
0
    def test_sample_spn_weights(self):
        n = 100000
        l = 0.1 * leaf(0.0001) + 0.9 * leaf(0.99)
        r = 0.3 * leaf(0.5) + 0.7 * leaf(0.1)
        spn = 0.1 * l + 0.9 * r
        add_node_likelihood(Leaf, node_fixed_ll)
        rand_gen = RandomState(12345)
        data = rand_gen.rand(n, 2)
        sample_induced_trees(spn, data, rand_gen)

        sample_spn_weights(spn, rand_gen, omega_uninf_prior=0)

        remaining_instances = n - spn.edge_counts[
            0]  # this are the unseen instances for L
        expected_obs = np.array(l.edge_counts)
        expected_obs[0] += 0.1 * remaining_instances
        expected_obs[1] += 0.9 * remaining_instances
        self.assertGreaterEqual(
            chisquare(np.array(l.weights) * n, expected_obs).pvalue, 0.05)

        remaining_instances = n - spn.edge_counts[
            1]  # this are the unseen instances for R
        expected_obs = np.array(r.edge_counts)
        expected_obs[0] += 0.3 * remaining_instances
        expected_obs[1] += 0.7 * remaining_instances
        self.assertGreaterEqual(
            chisquare(np.array(r.weights) * n, expected_obs).pvalue, 0.05)
예제 #5
0
    def test_ll_matrix(self):
        add_node_likelihood(Leaf, sum_and_multiplier_ll)

        node_1_1_1_1 = leaf(2, 1)
        node_1_1_1_2 = leaf(2, 2)
        node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2
        node_1_1_2 = leaf([0, 1], 3)
        node_1_1 = node_1_1_1 * node_1_1_2
        node_1_2_1_1_1 = leaf(0, 5)
        node_1_2_1_1_2 = leaf(1, 4)
        node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2
        node_1_2_1_2 = leaf([0, 1], 6)
        node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2
        node_1_2_2 = leaf(2, 3)
        node_1_2 = node_1_2_1 * node_1_2_2
        spn = 0.4 * node_1_1 + 0.6 * node_1_2

        assign_ids(spn)

        max_id = max([n.id for n in get_nodes_by_type(spn)])

        data = np.random.rand(10, 10)

        node_1_1_1_1_r = data[:, 2] * 1
        node_1_1_1_2_r = data[:, 2] * 2
        node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r
        node_1_1_2_r = 3 * (data[:, 0] + data[:, 1])
        node_1_1_r = node_1_1_1_r * node_1_1_2_r
        node_1_2_1_1_1_r = data[:, 0] * 5
        node_1_2_1_1_2_r = data[:, 1] * 4
        node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r
        node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1])
        node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r
        node_1_2_2_r = data[:, 2] * 3
        node_1_2_r = node_1_2_1_r * node_1_2_2_r
        spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r

        self.assert_correct(spn, data, spn_r)

        lls = np.zeros((data.shape[0], max_id + 1))
        likelihood(spn, data, lls_matrix=lls)
        llls = np.zeros((data.shape[0], max_id + 1))
        log_likelihood(spn, data, lls_matrix=llls)

        self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls))))

        self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
예제 #6
0
    def test_sum_multiple_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic computations in multiple dimensions
        spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * Leaf(scope=[0, 1])
        data = np.random.rand(10, 2)
        l = likelihood(spn, data)
        self.assert_correct(spn, data, data[:, 0] * data[:, 1])
예제 #7
0
    def test_prod_multiple_dimension(self):
        add_node_likelihood(Leaf, sums_ll)

        # test basic computations in multiple dimensions
        spn = Leaf(scope=[0, 1]) * Leaf(scope=[2, 3])
        data = np.random.rand(10, 4)
        r = (data[:, 0] + data[:, 1]) * (data[:, 2] + data[:, 3])
        self.assert_correct(spn, data, r)
예제 #8
0
    def test_induced_trees(self):
        add_node_likelihood(Leaf, constant_equal_ll)

        n = 100000

        spn = 0.1 * ((0.1 * (Leaf(0) * Leaf(1)) + 0.9 *
                      (Leaf(0) * Leaf(1))) * Leaf(2)) + 0.9 * (
                          (0.8 * (Leaf(0) * Leaf(1)) + 0.2 *
                           (Leaf(0) * Leaf(1))) * Leaf(2))

        self.check_induced_tree(spn, n)
        self.check_counts(spn, n)

        # so far we have tested that the induced trees produce a proper map, does the sampling and the lls are fine
        # we need to check still that the right counts are being passed down accordingly
        # and that the actual computation of the samples and probabilities is correct

        spn = 0.3 * (0.0001 * (Leaf(0) * Leaf(1)) + 0.9999 *
                     (Leaf(0) * Leaf(1))) + 0.7 * (
                         0.1 * (0.3 * (Leaf(0) * Leaf(1)) + 0.7 *
                                (Leaf(0) * Leaf(1))) + 0.9 *
                         (0.4 * (Leaf(0) * Leaf(1)) + 0.6 *
                          (Leaf(0) * Leaf(1))))

        self.check_induced_tree(spn, n)
        self.check_counts(spn, n)

        l = 0.1 * leaf(0.0001) + 0.9 * leaf(0.99)
        r = 0.1 * leaf(0.5) + 0.9 * leaf(0.1)
        spn = 0.1 * l + 0.9 * r
        add_node_likelihood(Leaf, node_fixed_ll)
        rand_gen = RandomState(12345)
        data = rand_gen.rand(n, 2)
        sample_induced_trees(spn, data, rand_gen)
        self.check_counts(spn, n)

        s = 0.1 * (0.1 * 0.0001 + 0.9 * 0.99) + 0.9 * (0.1 * 0.5 + 0.9 * 0.1)
        expected_freq = np.array(
            [0.1 * (0.1 * 0.0001 + 0.9 * 0.99), 0.9 *
             (0.1 * 0.5 + 0.9 * 0.1)]) / s
        expected_obs = expected_freq * n
        self.assertGreaterEqual(
            chisquare(spn.edge_counts, expected_obs).pvalue, 0.05)

        s = 0.1 * 0.0001 + 0.9 * 0.99
        expected_freq = np.array([0.1 * 0.0001, 0.9 * 0.99]) / s
        expected_obs = expected_freq * spn.edge_counts[0]
        self.assertGreaterEqual(
            chisquare(l.edge_counts, expected_obs).pvalue, 0.05)

        s = 0.1 * 0.5 + 0.9 * 0.1
        expected_freq = np.array([0.1 * 0.5, 0.9 * 0.1]) / s
        expected_obs = expected_freq * spn.edge_counts[1]
        self.assertGreaterEqual(
            chisquare(r.edge_counts, expected_obs).pvalue, 0.05)
예제 #9
0
    def test_type(self):
        add_node_likelihood(Leaf, identity_ll)

        # test that we get basic computations right
        spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * (Leaf(scope=0) * Leaf(scope=1))
        data = np.random.rand(10, 4)
        l = likelihood(spn, data, dtype=np.float32)
        self.assertEqual(l.dtype, np.float32)

        l = likelihood(spn, data, dtype=np.float128)
        self.assertEqual(l.dtype, np.float128)
예제 #10
0
    def test_prod_one_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic product
        spn = Leaf(scope=0) * Leaf(scope=1)
        data = np.random.rand(10, 2)
        self.assert_correct(spn, data, data[:, 0] * data[:, 1])

        # test respecting the scopes
        spn = Leaf(scope=0) * Leaf(scope=1)
        data = np.random.rand(10, 3)
        self.assert_correct(spn, data, data[:, 0] * data[:, 1])
예제 #11
0
    def test_handmade_multidim(self):
        add_node_likelihood(Leaf, sum_and_multiplier_ll)

        spn = 0.3 * ((0.9 * (leaf(0, 1) * leaf(1, 2)) + 0.1 * (leaf(0, 3) * leaf(1, 4))) * leaf(2, 5)) + 0.7 * (
            0.6 * leaf([0, 1, 2], 6) + 0.4 * leaf([0, 1, 2], 7)
        )
        data = np.random.rand(10, 10)

        r = 0.3 * (
            (0.9 * (data[:, 0] * 2 * data[:, 1]) + 0.1 * (3 * data[:, 0] * 4 * data[:, 1])) * 5 * data[:, 2]
        ) + 0.7 * (0.6 * 6 * (data[:, 0] + data[:, 1] + data[:, 2]) + 0.4 * 7 * (data[:, 0] + data[:, 1] + data[:, 2]))

        self.assert_correct(spn, data, r)
예제 #12
0
    def test_hierarchical_prod_multiple_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic computations in a hierarchy
        spn = (Leaf(scope=[0, 1]) * Leaf(scope=[2, 3])) * (Leaf(scope=[4, 5]) * Leaf(scope=[6, 7]))
        data = np.random.rand(10, 8)
        self.assert_correct(spn, data, np.prod(data, axis=1))

        add_node_likelihood(Leaf, sums_ll)

        # test different node contributions
        spn = (Leaf(scope=[0, 1]) * Leaf(scope=[2, 3])) * (Leaf(scope=[4, 5]) * Leaf(scope=[6, 7]))
        data = np.random.rand(10, 10)
        dprod = (data[:, 0] + data[:, 1]) * (data[:, 2] + data[:, 3]) * (data[:, 4] + data[:, 5]) * (
            data[:, 6] + data[:, 7])
        self.assert_correct(spn, data, dprod)
예제 #13
0
    def test_hierarchical_sum_one_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic computations in a hierarchy + respecting the scopes
        spn = 0.3 * (0.2 * Leaf(scope=0) + 0.8 * Leaf(scope=0)) + 0.7 * (0.4 * Leaf(scope=0) + 0.6 * Leaf(scope=0))
        data = np.random.rand(10, 3)
        self.assert_correct(spn, data, data[:, 0])

        add_node_likelihood(Leaf, multiply_ll)

        # test that the different nodes contribute differently
        spn = 0.3 * (0.2 * Leaf(scope=0) + 0.8 * Leaf(scope=0)) + 0.7 * (0.4 * Leaf(scope=0) + 0.6 * Leaf(scope=0))
        spn.children[0].children[0].multiplier = 2
        spn.children[0].children[1].multiplier = 3
        spn.children[1].children[0].multiplier = 4
        spn.children[1].children[1].multiplier = 5
        data = np.random.rand(10, 3)
        r = 0.3 * (0.2 * 2 * data[:, 0] + 0.8 * 3 * data[:, 0]) + 0.7 * (0.4 * 4 * data[:, 0] + 0.6 * 5 * data[:, 0])
        self.assert_correct(spn, data, r)
예제 #14
0
def add_conditional_inference_support():
    add_node_likelihood(Conditional_Gaussian, conditional_likelihood)
    add_node_likelihood(Conditional_Poisson, conditional_likelihood)
    add_node_likelihood(Conditional_Bernoulli, conditional_likelihood)

    add_node_mpe_likelihood(Conditional_Gaussian,
                            conditional_mpe_log_likelihood)
    add_node_mpe_likelihood(Conditional_Poisson,
                            conditional_mpe_log_likelihood)
    add_node_mpe_likelihood(Conditional_Bernoulli,
                            conditional_mpe_log_likelihood)
예제 #15
0
def add_piecewise_inference_support():
    add_node_likelihood(PiecewiseLinear, piecewise_log_likelihood)
    add_node_mpe_likelihood(PiecewiseLinear, piecewise_mpe_likelihood)
예제 #16
0
def add_cltree_inference_support():
    add_node_likelihood(CLTree, log_lambda_func=cltree_log_likelihood)
예제 #17
0
def add_histogram_inference_support():
    add_node_likelihood(Histogram, log_lambda_func=histogram_log_likelihood)
예제 #18
0
def add_parametric_inference_range_support():
    add_node_likelihood(Categorical, categorical_likelihood_range)
예제 #19
0
def add_parametric_inference_support():
    add_node_likelihood(Gaussian, parametric_likelihood)
    add_node_likelihood(Gamma, parametric_likelihood)
    add_node_likelihood(LogNormal, parametric_likelihood)
    add_node_likelihood(Poisson, parametric_likelihood)
    add_node_likelihood(Bernoulli, parametric_likelihood)
    add_node_likelihood(Categorical, parametric_likelihood)
    add_node_likelihood(NegativeBinomial, parametric_likelihood)
    add_node_likelihood(Hypergeometric, parametric_likelihood)
    add_node_likelihood(Geometric, parametric_likelihood)
    add_node_likelihood(Exponential, parametric_likelihood)
    add_node_likelihood(Uniform, parametric_likelihood)
    add_node_likelihood(CategoricalDictionary, parametric_likelihood)

    add_node_mpe_likelihood(Gaussian, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Gamma, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(LogNormal, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Poisson, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Bernoulli, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Categorical, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(NegativeBinomial, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Hypergeometric, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Geometric, parametric_mpe_log_likelihood)
    add_node_mpe_likelihood(Exponential, parametric_mpe_log_likelihood)
예제 #20
0
def add_utility_inference_support():
    add_node_likelihood(Utility, histogram_likelihood)
예제 #21
0
def add_parametric_inference_support():
    add_node_likelihood(Gaussian, continuous_likelihood)
    add_node_likelihood(Gamma, gamma_likelihood)
    add_node_likelihood(LogNormal, lognormal_likelihood)
    add_node_likelihood(Poisson, discrete_likelihood)
    add_node_likelihood(Bernoulli, bernoulli_likelihood)
    add_node_likelihood(Categorical, categorical_likelihood)
    add_node_likelihood(Geometric, geometric_likelihood)
    add_node_likelihood(Exponential, exponential_likelihood)
    add_node_likelihood(Uniform, uniform_likelihood)
    add_node_likelihood(CategoricalDictionary,
                        categorical_dictionary_likelihood)
예제 #22
0
def add_cltree_inference_support():
    add_node_likelihood(CLTree, cltree_likelihood)
예제 #23
0
def add_static_inference_range_support():
    add_node_likelihood(StaticNumeric, static_likelihood_range)
예제 #24
0
def add_parametric_inference_support():
    add_node_likelihood(MultivariateGaussian,
                        lambda_func=continuous_multivariate_likelihood)
    add_node_likelihood(Gaussian,
                        lambda_func=continuous_likelihood,
                        log_lambda_func=continuous_log_likelihood)
    add_node_likelihood(Hypergeometric,
                        log_lambda_func=continuous_log_likelihood)
    add_node_likelihood(Gamma, log_lambda_func=gamma_log_likelihood)
    add_node_likelihood(LogNormal, log_lambda_func=continuous_log_likelihood)
    add_node_likelihood(Poisson, log_lambda_func=discrete_log_likelihood)
    add_node_likelihood(Bernoulli,
                        lambda_func=discrete_likelihood,
                        log_lambda_func=discrete_log_likelihood)
    add_node_likelihood(Categorical,
                        log_lambda_func=categorical_log_likelihood)
    add_node_likelihood(Geometric, log_lambda_func=discrete_log_likelihood)
    add_node_likelihood(Exponential, log_lambda_func=continuous_log_likelihood)
    add_node_likelihood(Uniform, log_lambda_func=uniform_log_likelihood)
    add_node_likelihood(CategoricalDictionary,
                        log_lambda_func=categorical_dictionary_log_likelihood)
예제 #25
0
def add_histogram_inference_support():
    add_node_likelihood(Histogram, histogram_likelihood)

    add_node_mpe_likelihood(Histogram, histogram_mpe_log_likelihood)
예제 #26
0
def add_piecewise_inference_range_support():
    add_node_likelihood(PiecewiseLinear, piecewise_likelihood_range)
예제 #27
0
def add_parametric_inference_support():

    for p in SUPPORTED_PARAM_FORMS:
        add_node_likelihood(p, parametric_log_likelihood)
        add_node_mpe_likelihood(p, parametric_mpe_log_likelihood)
예제 #28
0
def add_piecewise_inference_support():
    add_node_likelihood(PiecewiseLinear,
                        log_lambda_func=piecewise_log_likelihood)
예제 #29
0
def add_parametric_inference_range_support():
    add_node_likelihood(Categorical,
                        log_lambda_func=categorical_log_likelihood_range)
예제 #30
0
def add_conditional_inference_support():
    add_node_likelihood(SupervisedOr, conditional_supervised_likelihood,
                        conditional_supervised_likelihood)
    add_node_likelihood(SupervisedLeaf, supervised_leaf_likelihood)