Ejemplo n.º 1
0
    def test_bcpp(self):
        D = Gaussian(mean=1.0, stdev=1.0, scope=[0])
        E = Gaussian(mean=2.0, stdev=2.0, scope=[1])
        F = Gaussian(mean=3.0, stdev=3.0, scope=[0])
        G = Gaussian(mean=4.0, stdev=4.0, scope=[1])

        B = D * E
        C = F * G

        A = 0.3 * B + 0.7 * C

        spn_cc_eval_func = get_cpp_function(A)

        np.random.seed(17)
        data = np.random.normal(10, 0.01,
                                size=200000).tolist() + np.random.normal(
                                    30, 10, size=200000).tolist()
        data = np.array(data).reshape((-1, 2))

        py_ll = log_likelihood(A, data)

        c_ll = spn_cc_eval_func(data)

        for i in range(py_ll.shape[0]):
            self.assertAlmostEqual(py_ll[i, 0], c_ll[i, 0])
Ejemplo n.º 2
0
def create_spflow_spn(n_feats):
    gaussians1 = []
    gaussians2 = []
    for i in range(n_feats):
        g1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        g2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        gaussians1.append(g1)
        gaussians2.append(g2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([gaussians1[i], gaussians1[i + 1]])
        p2 = Product([gaussians2[i], gaussians2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
Ejemplo n.º 3
0
    def test_sum(self):
        spn = 0.5 * Gaussian(0.0, 1.0, scope=0) + 0.5 * Gaussian(
            2.0, 1.0, scope=0)

        spn_text = "(0.5*(Gaussian(V0|mean=0.0;stdev=1.0)) + 0.5*(Gaussian(V0|mean=2.0;stdev=1.0)))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Ejemplo n.º 4
0
def create_spflow_spn(n_feats, ctype=Gaussian):
    children1 = []
    children2 = []
    for i in range(n_feats):
        if ctype == Gaussian:
            c1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
            c2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        else:
            #c1 = Bernoulli(p=1.0, scope=i)
            #c2 = Bernoulli(p=1.0, scope=i)
            c1 = Bernoulli(p=np.random.rand(), scope=i)
            c2 = Bernoulli(p=np.random.rand(), scope=i)

        children1.append(c1)
        children2.append(c2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([children1[i], children1[i + 1]])
        p2 = Product([children2[i], children2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
def test_gaussian_leaf_serialization(tmpdir):
    """Tests the binary serialization of two SPFlow Gaussian leaf nodes
    by round-tripping and comparing the parameters before and after serialization
    & deserialization"""
    g1 = Gaussian(mean=0.5, stdev=1, scope=0)
    g2 = Gaussian(mean=0.125, stdev=0.25, scope=1)
    p = Product(children=[g1, g2])

    binary_file = os.path.join(tmpdir, "test.bin")
    print(f"Test binary file: {binary_file}")

    model = SPNModel(p, "float32", "test")
    query = JointProbability(model)

    BinarySerializer(binary_file).serialize_to_file(query)

    deserialized = BinaryDeserializer(binary_file).deserialize_from_file()

    assert (isinstance(deserialized, JointProbability))
    assert (isinstance(deserialized.graph, SPNModel))
    assert (deserialized.graph.featureType == model.featureType)
    assert (deserialized.graph.name == model.name)

    deserialized = deserialized.graph.root

    assert isinstance(deserialized, Product)
    assert (len(deserialized.children) == 2)
    gaussian1 = deserialized.children[0]
    gaussian2 = deserialized.children[1]
    assert (g1.scope == gaussian1.scope)
    assert (g1.mean == gaussian1.mean)
    assert (g1.stdev == gaussian1.stdev)
    assert (g2.scope == gaussian2.scope)
    assert (g2.mean == gaussian2.mean)
    assert (g2.stdev == gaussian2.stdev)
Ejemplo n.º 6
0
    def test_spn(self):
        spn = 0.4 * (Gaussian(0.0, 1.0, scope=0) * Gaussian(2.0, 3.0, scope=1)) + \
              0.6 * (Gaussian(4.0, 5.0, scope=0) * Gaussian(6.0, 7.0, scope=1))

        spn_text = "(0.4*((Gaussian(V0|mean=0.0;stdev=1.0) * Gaussian(V1|mean=2.0;stdev=3.0))) + " + \
                   "0.6*((Gaussian(V0|mean=4.0;stdev=5.0) * Gaussian(V1|mean=6.0;stdev=7.0))))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Ejemplo n.º 7
0
    def test_multiple_sum(self):
        spn = 0.6 * (0.4 * Gaussian(0.0, 1.0, scope=0) + 0.6 * Gaussian(
            2.0, 1.0, scope=0)) + 0.4 * Gaussian(2.0, 1.0, scope=0)

        spn_text = "(0.6*((0.4*(Gaussian(V0|mean=0.0;stdev=1.0)) + 0.6*(Gaussian(V0|mean=2.0;stdev=1.0)))) + 0.4*(Gaussian(V0|mean=2.0;stdev=1.0)))"

        print(spn_to_str_equation(spn))

        self.assertEqual(spn_to_str_equation(spn), spn_text)
Ejemplo n.º 8
0
    def test_compression_internal_nodes(self):
        C1 = Gaussian(mean=1, stdev=0, scope=0)
        C2 = Gaussian(mean=1, stdev=1, scope=1)
        C3 = Gaussian(mean=1, stdev=0, scope=0)
        C4 = Gaussian(mean=1, stdev=1, scope=1)

        R = 0.4 * (C1 * C2) + 0.6 * (C3 * C4)

        Compress(R)
        self.assertTrue(*is_valid(R))

        self.assertEqual(id(R.children[0]), id(R.children[1]))
        self.assertEqual(id(R.children[0].children[0]), id(C1))
        self.assertEqual(id(R.children[0].children[1]), id(C2))
Ejemplo n.º 9
0
    def test_induced_trees(self):
        spn = 0.5 * (Gaussian(mean=10, stdev=1, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \
              0.5 * (Gaussian(mean=50, stdev=1, scope=0) * Categorical(p=[0, 1.0], scope=1))

        data = np.zeros((2, 2))

        data[1, 1] = 1

        data[:, 0] = np.nan

        mpevals = mpe(spn, data)

        self.assertAlmostEqual(mpevals[0, 0], 10)
        self.assertAlmostEqual(mpevals[1, 0], 50)
Ejemplo n.º 10
0
    def test_eval_parametric(self):
        data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape(
            (1, 7))

        spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) *
               Exponential(l=1.0, scope=[1]) *
               Gamma(alpha=1.0, beta=1.0, scope=[2]) *
               LogNormal(mean=1.0, stdev=1.0, scope=[3]) *
               Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) *
               Categorical(p=[0.1, 0.2, 0.7], scope=[6]))

        ll = log_likelihood(spn, data)

        tf_ll = eval_tf(spn, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))

        spn_copy = Copy(spn)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(
            spn_copy, data, 1)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            tf_graph_to_spn(variable_dict)

        str_val = spn_to_str_equation(spn)
        str_val2 = spn_to_str_equation(spn_copy)

        self.assertEqual(str_val, str_val2)
Ejemplo n.º 11
0
    def test_induced_trees(self):
        spn = 0.5 * (Gaussian(mean=10, stdev=0.000000001, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \
              0.5 * (Gaussian(mean=50, stdev=0.000000001, scope=0) * Categorical(p=[0, 1.0], scope=1))

        rand_gen = np.random.RandomState(17)

        data = np.zeros((2, 2))

        data[1, 1] = 1

        data[:, 0] = np.nan

        samples = sample_instances(spn, data, rand_gen)

        self.assertAlmostEqual(samples[0, 0], 10)
        self.assertAlmostEqual(samples[1, 0], 50)
Ejemplo n.º 12
0
    def test_compression_leaves_deeper(self):
        C1 = Gaussian(mean=1, stdev=0, scope=0)
        C2 = Gaussian(mean=1, stdev=1, scope=1)
        C3 = Gaussian(mean=1, stdev=0, scope=0)
        C4 = Gaussian(mean=2, stdev=0, scope=1)

        R = 0.4 * (C1 * C2) + 0.6 * (C3 * C4)

        Compress(R)
        self.assertTrue(*is_valid(R))

        self.assertNotEqual(id(R.children[0]), id(R.children[1]))
        self.assertEqual(id(R.children[0].children[0]), id(C1))
        self.assertEqual(id(R.children[0].children[1]), id(C2))
        self.assertEqual(id(R.children[1].children[0]), id(C1))
        self.assertEqual(id(R.children[1].children[1]), id(C4))
Ejemplo n.º 13
0
    def test_torch_vs_tf_time(self):
        # Create sample data
        from sklearn.datasets.samples_generator import make_blobs
        import tensorflow as tf
        from time import time

        X, y = make_blobs(n_samples=10,
                          centers=3,
                          n_features=2,
                          random_state=0)
        X = X.astype(np.float32)

        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])
        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Convert
        tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X)
        torch_spn = SumNode.from_spn(s)

        # Optimizer
        lr = 0.001
        tf_optim = tf.train.AdamOptimizer(lr)
        torch_optim = optim.Adam(torch_spn.parameters(), lr)

        t0 = time()
        epochs = 10
        optimize_tf_graph(tf_spn,
                          variable_dict,
                          data_placeholder,
                          X,
                          epochs=epochs,
                          optimizer=tf_optim)
        t1 = time()
        optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim)
        t2 = time()

        print("Tensorflow took: ", t1 - t0)
        print("PyTorch took: ", t2 - t1)
def test_cpu_gaussian():
    # Construct a minimal SPN using two Gaussian leaves.
    g1 = Gaussian(mean=0.5, stdev=1, scope=0)
    g2 = Gaussian(mean=0.125, stdev=0.25, scope=1)
    g3 = Gaussian(mean=0.345, stdev=0.24, scope=2)
    g4 = Gaussian(mean=0.456, stdev=0.1, scope=3)
    g5 = Gaussian(mean=0.94, stdev=0.48, scope=4)
    g6 = Gaussian(mean=0.56, stdev=0.42, scope=5)
    g7 = Gaussian(mean=0.76, stdev=0.14, scope=6)
    g8 = Gaussian(mean=0.32, stdev=0.8, scope=7)
    g9 = Gaussian(mean=0.58, stdev=0.9, scope=8)
    g10 = Gaussian(mean=0.14, stdev=0.2, scope=9)
    p = Product(children=[g1, g2, g3, g4, g5, g6, g7, g8, g9, g10])

    # Randomly sample input values from the two Gaussian (normal) distributions.
    inputs = np.column_stack(
        (np.random.normal(0.5, 1, 30), np.random.normal(0.125, 0.25, 30),
         np.random.normal(0.345, 0.24, 30), np.random.normal(0.456, 0.1, 30),
         np.random.normal(0.94, 0.48, 30), np.random.normal(0.56, 0.42, 30),
         np.random.normal(0.76, 0.14, 30), np.random.normal(0.32, 0.8, 30),
         np.random.normal(0.58, 0.9,
                          30), np.random.normal(0.14, 0.2,
                                                30))).astype("float64")

    # Insert some NaN in random places into the input data.
    inputs.ravel()[np.random.choice(inputs.size, 10, replace=False)] = np.nan

    if not CPUCompiler.isVectorizationSupported():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CPUCompiler(computeInLogSpace=False,
                          vectorize=False).log_likelihood(p,
                                                          inputs,
                                                          supportMarginal=True,
                                                          batchSize=10)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(p, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(
        np.isclose(np.exp(results), np.exp(reference)))
Ejemplo n.º 15
0
    def test_gaussian_spn_ll(self):
        root = 0.3 * (Gaussian(mean=0, stdev=1, scope=0) * Gaussian(
            mean=1, stdev=1, scope=1)) + 0.7 * (Gaussian(
                mean=2, stdev=1, scope=0) * Gaussian(mean=3, stdev=1, scope=1))

        sympyecc = spn_to_sympy(root)
        logsympyecc = spn_to_sympy(root, log=True)

        sym_l = float(sympyecc.evalf(subs={"x0": 0, "x1": 0}))
        sym_ll = float(logsympyecc.evalf(subs={"x0": 0, "x1": 0}))

        data = np.array([0, 0], dtype=np.float).reshape(-1, 2)

        self.assertTrue(
            np.alltrue(np.isclose(np.log(sym_l), log_likelihood(root, data))))
        self.assertTrue(
            np.alltrue(np.isclose(sym_ll, log_likelihood(root, data))))
def test_vector_slp_mini():
    g0 = Gaussian(mean=0.13, stdev=0.5, scope=0)
    g1 = Gaussian(mean=0.14, stdev=0.25, scope=2)
    g2 = Gaussian(mean=0.11, stdev=1.0, scope=3)
    g3 = Gaussian(mean=0.12, stdev=0.75, scope=1)

    spn = Sum(children=[g0, g1, g2, g3], weights=[0.2, 0.4, 0.1, 0.3])

    # Randomly sample input values from Gaussian (normal) distributions.
    num_samples = 100
    inputs = np.column_stack(
        (np.random.normal(loc=0.5, scale=1, size=num_samples),
         np.random.normal(loc=0.125, scale=0.25, size=num_samples),
         np.random.normal(loc=0.345, scale=0.24, size=num_samples),
         np.random.normal(loc=0.456, scale=0.1,
                          size=num_samples))).astype("float64")

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(num_samples)

    # Compile the kernel with batch size 1 to enable SLP vectorization.
    compiler = CPUCompiler(vectorize=True,
                           computeInLogSpace=True,
                           vectorLibrary="LIBMVEC")
    kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False)

    # Execute the compiled Kernel.
    time_sum = 0
    for i in range(len(reference)):
        # Check the computation results against the reference
        start = time.time()
        result = compiler.execute(kernel, inputs=np.array([inputs[i]]))
        time_sum = time_sum + time.time() - start
        print(
            f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}",
            end='\r')
        if not np.isclose(result, reference[i]):
            print(
                f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}"
            )
            raise AssertionError()
    print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")
Ejemplo n.º 17
0
    def test_compression_leaves(self):
        C1 = Gaussian(mean=1, stdev=0, scope=0)
        C2 = Gaussian(mean=1, stdev=0, scope=0)

        A = 0.7 * C1 + 0.3 * C2

        Compress(A)

        self.assertTrue(*is_valid(A))
        self.assertEqual(id(A.children[0]), id(A.children[1]))

        C1 = Gaussian(mean=1, stdev=0, scope=0)
        C2 = Gaussian(mean=1, stdev=0, scope=1)

        B = C1 * C2

        Compress(B)
        self.assertTrue(*is_valid(B))

        self.assertNotEqual(id(B.children[0]), id(B.children[1]))
Ejemplo n.º 18
0
def get_gender_spn():
    from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian

    spn1 = Categorical(p=[0.0, 1.0], scope=[0]) * Categorical(p=[0.2, 0.8],
                                                              scope=[1])
    spn2 = Categorical(p=[1.0, 0.0], scope=[0]) * Categorical(p=[0.7, 0.3],
                                                              scope=[1])
    spn3 = 0.4 * spn1 + 0.6 * spn2
    spn = spn3 * Gaussian(mean=20, stdev=3, scope=[2])

    spn.scope = sorted(spn.scope)
    return spn
Ejemplo n.º 19
0
def test_cuda_gaussian():

    # Construct a minimal SPN using two Gaussian leaves.
    g1 = Gaussian(mean=0.5, stdev=1, scope=0)
    g2 = Gaussian(mean=0.125, stdev=0.25, scope=1)
    g3 = Gaussian(mean=0.345, stdev=0.24, scope=2)
    g4 = Gaussian(mean=0.456, stdev=0.1, scope=3)
    g5 = Gaussian(mean=0.94, stdev=0.48, scope=4)
    g6 = Gaussian(mean=0.56, stdev=0.42, scope=5)
    g7 = Gaussian(mean=0.76, stdev=0.14, scope=6)
    g8 = Gaussian(mean=0.32, stdev=0.8, scope=7)
    g9 = Gaussian(mean=0.58, stdev=0.9, scope=8)
    g10 = Gaussian(mean=0.14, stdev=0.2, scope=9)
    p = Product(children=[g1, g2, g3, g4, g5, g6, g7, g8, g9, g10])

    # Randomly sample input values from the two Gaussian (normal) distributions.
    inputs = np.column_stack(
        (np.random.normal(0.5, 1, 30), np.random.normal(0.125, 0.25, 30),
         np.random.normal(0.345, 0.24, 30), np.random.normal(0.456, 0.1, 30),
         np.random.normal(0.94, 0.48, 30), np.random.normal(0.56, 0.42, 30),
         np.random.normal(0.76, 0.14, 30), np.random.normal(0.32, 0.8, 30),
         np.random.normal(0.58, 0.9,
                          30), np.random.normal(0.14, 0.2,
                                                30))).astype("float32")
    if not CUDACompiler.isAvailable():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CUDACompiler().log_likelihood(p, inputs, supportMarginal=False)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(p, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(
        np.isclose(np.exp(results), np.exp(reference)))
Ejemplo n.º 20
0
    def test_equal_to_tf(self):
        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])

        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Test for 100 random samples
        data = np.random.randn(100, 2)

        # LL from SPN
        ll = log_likelihood(s, data)

        # PyTorch implementation
        g00 = GaussianNode(mean=0.0, std=1.0, scope=0)
        g10 = GaussianNode(mean=1.0, std=2.0, scope=1)
        g01 = GaussianNode(mean=3.0, std=2.0, scope=0)
        g11 = GaussianNode(mean=5.0, std=1.0, scope=1)
        p0 = ProductNode(children=[g00, g10])
        p1 = ProductNode(children=[g01, g11])
        rootnode = SumNode(weights=[0.2, 0.8], children=[p0, p1])

        datatensor = torch.Tensor(data)
        # LL from pytorch
        ll_torch = rootnode(datatensor)

        # Assert equality
        self.assertTrue(
            np.isclose(np.array(ll).squeeze(),
                       ll_torch.detach().numpy(),
                       atol=DELTA).all())
Ejemplo n.º 21
0
def update_params_LogNormalFixVarNode(node, X, rand_gen, normal_prior):
    """
    The prior over \mu is a Normal distribution

    p(\mu) = Normal(mu_0, tau_0)

    with mean mu_0 and precision(inverse variance) tau_0

    see[1]

    [1] - https: // en.wikipedia.org / wiki / Conjugate_prior

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a Normal

    p(\mu, | X) = Normal(mu_n, tau_n)

    see[1]
    """

    assert isinstance(normal_prior, PriorNormal)

    N = len(X)

    #
    # if N is 0, then it would be like sampling from the prior
    tau_n = normal_prior.tau_0 + N * node.precision

    #
    # x = X[node.row_ids, node.scope]
    log_sum_x = np.log(X).sum() if N > 0 else 0
    mu_n = (log_sum_x * node.precision +
            normal_prior.tau_0 * normal_prior.mu_0) / tau_n
    sum_x = X.sum()
    # mu_n = (sum_x * node.precision + node.tau_0 * node.mu_0) / tau_n

    #
    # sampling
    # TODO, optimize it with numba
    std_n = 1.0 / np.sqrt(tau_n)
    # print('STDN', std_n, tau_n, mu_n, log_sum_x)

    mu_sam = sample_parametric_node(Gaussian(mu_n, std_n), 1, rand_gen)
    # print('STDN', std_n, tau_n, mu_n, sum_x, np.log(mu_sam), mu_sam)
    #
    # updating params (only mean)
    node.mean = mu_sam[0]
Ejemplo n.º 22
0
def update_params_GaussianNode2(node, X, rand_gen, nig_prior):
    """
    The prior over parameters is a Normal - Inverse - Gamma(NIG)


    [1] - Murphy K., Conjugate Bayesian analysis of the Gaussian distribution(2007)
          https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf
          https://en.wikipedia.org/wiki/Conjugate_prior
          http://thaines.com/content/misc/gaussian_conjugate_prior_cheat_sheet.pdf
          ** http://homepages.math.uic.edu/~rgmartin/Teaching/Stat591/Bayes/Notes/591_gibbs.pdf
          ** https://people.eecs.berkeley.edu/~jordan/courses/260-spring10/lectures/lecture5.pdf

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a NIG

    p(\mu, \sigma ^ 2, | X) = NIG(m_n, V_n, a_n, b_n)

    see[1]
    """

    assert isinstance(nig_prior, PriorNormalInverseGamma), nig_prior

    n = len(X)
    X_hat = np.mean(X)

    mean = (nig_prior.V_0 * nig_prior.m_0 + n * X_hat) / (nig_prior.V_0 + n)

    v = nig_prior.V_0 + n

    a = nig_prior.a_0 + n / 2

    b = nig_prior.b_0 + (n / 2) * (np.var(X) +
                                   (v / (v + n)) * np.power(X_hat - mean, 2))

    inv_sigma2_sam = sample_parametric_node(Gamma(a, b), 1, rand_gen)

    sigma2_sam = 1 / inv_sigma2_sam

    mu_sam = sample_parametric_node(Gaussian(mean, sigma2_sam / v), 1,
                                    rand_gen)

    # updating params
    node.mean = mu_sam[0]
    # node.stdev = np.sqrt(node.variance)
    node.stdev = np.sqrt(sigma2_sam)[0]
Ejemplo n.º 23
0
    def test_spn_to_torch(self):
        # SPFLow implementation
        n0 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        n1 = Categorical(p=[0.1, 0.3, 0.6])
        n2 = Sum(weights=[0.1, 0.2, 0.3, 0.4], children=[n0, n1])
        n3 = Product(children=[n0, n1])

        torch_n0 = GaussianNode.from_spn(n0)
        torch_n1 = CategoricalNode.from_spn(n1)
        torch_n2 = SumNode.from_spn(n2)
        torch_n3 = ProductNode.from_spn(n3)

        self.assertEqual(torch_n0.mean, n0.mean)
        self.assertEqual(torch_n0.std, n0.stdev)
        self.assertTrue(
            np.isclose(torch_n1.p.detach().numpy(), n1.p, atol=DELTA).all())
        self.assertTrue(
            np.isclose(torch_n2.weights.detach().numpy(),
                       n2.weights,
                       atol=DELTA).all())
Ejemplo n.º 24
0
    def test_clustering(self):
        np.random.seed(0)

        centers = [[10, 10], [-10, -10], [10, -10]]
        center_stdev = 0.7
        X, labels_true = make_blobs(n_samples=1000000,
                                    centers=centers,
                                    cluster_std=center_stdev)

        initial_cluster_centers = [[1, 1], [0, 0], [1, 0]]
        g0x = Gaussian(mean=initial_cluster_centers[0][0], stdev=1.0, scope=0)
        g0y = Gaussian(mean=initial_cluster_centers[0][1], stdev=1.0, scope=1)
        g1x = Gaussian(mean=initial_cluster_centers[1][0], stdev=1.0, scope=0)
        g1y = Gaussian(mean=initial_cluster_centers[1][1], stdev=1.0, scope=1)
        g2x = Gaussian(mean=initial_cluster_centers[2][0], stdev=1.0, scope=0)
        g2y = Gaussian(mean=initial_cluster_centers[2][1], stdev=1.0, scope=1)

        spn = 0.6 * (0.5 * (g0x * g0y) + 0.5 * (g1x * g1y)) + 0.4 * (g2x * g2y)

        EM_optimization(spn, X, iterations=5)

        cluster_centers2 = [[g0x.mean, g0y.mean], [g1x.mean, g1y.mean],
                            [g2x.mean, g2y.mean]]

        print("\ntrue centers", centers)
        print("initial ctrs", initial_cluster_centers)
        print("final   ctrs", cluster_centers2)

        for i, cluster_location in enumerate(centers):
            self.assertAlmostEqual(cluster_location[0], cluster_centers2[i][0],
                                   2)
            self.assertAlmostEqual(cluster_location[1], cluster_centers2[i][1],
                                   2)

        for n in get_nodes_by_type(spn, Gaussian):
            self.assertAlmostEqual(n.stdev, center_stdev, 2)
Ejemplo n.º 25
0
        node.p = node.p / psum
        node.p = node.p.tolist()

    elif isinstance(node, CategoricalDictionary):
        if node.p is not None:
            node.p.clear()
        v, c = np.unique(data, return_counts=True)
        p = c / c.sum()
        node.p = dict(zip(v, p))

    else:
        raise Exception("Unknown parametric " + str(type(node)))


if __name__ == '__main__':
    node = Gaussian(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.mean, np.mean(data))
    assert np.isclose(node.stdev, np.std(data))

    node = Gamma(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.alpha / node.beta, np.mean(data)), node.alpha

    node = LogNormal(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.mean, np.log(data).mean(), atol=0.00001)
    assert np.isclose(node.stdev, np.log(data).std(), atol=0.00001)
Ejemplo n.º 26
0
def update_params_GaussianNode(node, X, rand_gen, nig_prior):
    """
    The prior over parameters is a Normal - Inverse - Gamma(NIG)

    p(\mu, \sigma ^ 2) = NIG(m_0, V_0, a_0, b_0) =
                     = N(\mu | m_0, \sigma ^ {2}V_0)IG(\sigma ^ {2} | a_0, b_0)

    see[1], eq. 190 - 191

    [1] - Murphy K., Conjugate Bayesian analysis of the Gaussian distribution(2007)
          https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf

    Return a sample for the node.params drawn from the posterior distribution
    which for conjugacy is still a NIG

    p(\mu, \sigma ^ 2, | X) = NIG(m_n, V_n, a_n, b_n)

    see[1]
    """

    assert isinstance(nig_prior, PriorNormalInverseGamma), nig_prior

    N = len(X)

    # N = len(node.row_ids)

    # eq (197)
    inv_V_0 = 1.0 / nig_prior.V_0
    inv_V_n = inv_V_0 + N
    V_n = 1 / inv_V_n

    # eq (198), just switching from avg to sum to prevent nans in numpy
    # when there are no instances assigned, it should be like sampling from the prior
    # x = X[node.row_ids, node.scope]
    # avg_x = x.mean()
    # m_n = (inv_V_0 * node.m_0 + N * avg_x) * V_n
    sum_x = X.sum()
    avg_x = sum_x / N if N else 0
    m_n = (inv_V_0 * nig_prior.m_0 + sum_x) * V_n

    # eq (199)
    # inv_V_n = 1.0 / V_n
    a_n = nig_prior.a_0 + N / 2
    # mu_n_hat = - m_n * m_n * inv_V_na
    # b_n = node.b_0 + (node.m_0 * node.m_0 * inv_V_0 +
    #                   np.dot(x, x) - m_n * m_n * inv_V_n
    #                   # (x * x - mu_n_hat).sum()
    #                   ) / 2
    b_n = (
        nig_prior.b_0
        + (np.dot(X - avg_x, X - avg_x) + (N * inv_V_0 * (avg_x - nig_prior.m_0) * (avg_x - nig_prior.m_0)) * V_n) / 2
    )

    #
    # sampling
    # first sample the variance from IG, then the mean from a N
    # see eq (191) and
    # TODO, optimize it with numba
    sigma2_sam = scipy.stats.invgamma.rvs(
        a=a_n,
        size=1,
        # scale=1.0 / b_n,
        random_state=rand_gen,
    )
    sigma2_sam = sigma2_sam * b_n
    std_n = np.sqrt(sigma2_sam * V_n)
    mu_sam = sample_parametric_node(Gaussian(m_n, std_n), 1, None, rand_gen)
    # logger.info('sigm', sigma2_sam, 'std_n', std_n, 'v_n', V_n, mu_sam, m_n)

    #
    # updating params
    node.mean = mu_sam[0]
    # node.stdev = np.sqrt(node.variance)
    node.stdev = np.sqrt(sigma2_sam)[0]
Ejemplo n.º 27
0
# # Sum Layer
# s1 = Sum([0.5, 0.5], [p11, p21])
# s2 = Sum([0.5, 0.5], [p12, p22])
# s3 = Sum([0.5, 0.5], [p13, p23])

# # Root Node
# root = Product([s1, s2, s3])

# assign_ids(root)
# rebuild_scopes_bottom_up(root)

# # Plot
# plot_spn(root, "spn-B.png")

g1 = Gaussian(1, 1, scope=3)
g2 = Gaussian(1, 1, scope=5)
g3 = Gaussian(1, 1, scope=1)
g4 = Gaussian(1, 1, scope=4)
g5 = Gaussian(1, 1, scope=2)
g6 = Gaussian(1, 1, scope=6)

p11 = Product([g1, g2])
p12 = Product([g1, g2])
p13 = Product([g1, g2])
p21 = Product([g3, g4])
p22 = Product([g3, g4])
p23 = Product([g3, g4])
p31 = Product([g5, g6])
p32 = Product([g5, g6])
p33 = Product([g5, g6])
Ejemplo n.º 28
0
import matplotlib.pyplot as plt

if __name__ == "__main__":

    show_plots = False

    add_parametric_inference_support()
    add_parametric_text_support()

    x_range = np.array([-10, 10])
    #
    # testing MPE inference for the univ distributions

    #
    # gaussian
    gaussian = Gaussian(mean=0.5, stdev=2, scope=[0])

    pdf_x, pdf_y = approximate_density(gaussian, x_range)
    fig, ax = plt.subplots(1, 1)
    ax.plot(pdf_x, pdf_y, label="gaussian")
    plt.axvline(x=gaussian.mode, color='r')
    if show_plots:
        plt.show()

    #
    # gamma, alpha=1, beta=5
    gamma = Gamma(alpha=1, beta=5, scope=[0])

    pdf_x, pdf_y = approximate_density(gamma, x_range)
    fig, ax = plt.subplots(1, 1)
    ax.plot(pdf_x, pdf_y, label="gamma")
Ejemplo n.º 29
0
    def test_prod(self):
        spn = Gaussian(0.0, 1.0, scope=0) * Gaussian(2.0, 1.0, scope=1)

        spn_text = "(Gaussian(V0|mean=0.0;stdev=1.0) * Gaussian(V1|mean=2.0;stdev=1.0))"

        self.assertEqual(spn_to_str_equation(spn), spn_text)
def test_vector_slp_tree():
    g0 = Gaussian(mean=0.11, stdev=1, scope=0)
    g1 = Gaussian(mean=0.12, stdev=0.75, scope=1)
    g2 = Gaussian(mean=0.13, stdev=0.5, scope=2)
    g3 = Gaussian(mean=0.14, stdev=0.25, scope=3)
    g4 = Gaussian(mean=0.15, stdev=1, scope=4)
    g5 = Gaussian(mean=0.16, stdev=0.25, scope=5)
    g6 = Gaussian(mean=0.17, stdev=0.5, scope=6)
    g7 = Gaussian(mean=0.18, stdev=0.75, scope=7)
    g8 = Gaussian(mean=0.19, stdev=1, scope=8)

    p0 = Product(children=[g0, g1, g2, g4])
    p1 = Product(children=[g3, g4, g4, g5])
    p2 = Product(children=[g6, g4, g7, g8])
    p3 = Product(children=[g8, g6, g4, g2])

    s0 = Sum(children=[g0, g1, g2, p0], weights=[0.25, 0.25, 0.25, 0.25])
    s1 = Sum(children=[g3, g4, g5, p1], weights=[0.25, 0.25, 0.25, 0.25])
    s2 = Sum(children=[g6, g7, g8, p2], weights=[0.25, 0.25, 0.25, 0.25])
    s3 = Sum(children=[g0, g4, g8, p3], weights=[0.25, 0.25, 0.25, 0.25])

    spn = Product(children=[s0, s1, s2, s3])

    # Randomly sample input values from Gaussian (normal) distributions.
    num_samples = 100
    inputs = np.column_stack(
        (np.random.normal(loc=0.5, scale=1, size=num_samples),
         np.random.normal(loc=0.125, scale=0.25, size=num_samples),
         np.random.normal(loc=0.345, scale=0.24, size=num_samples),
         np.random.normal(loc=0.456, scale=0.1, size=num_samples),
         np.random.normal(loc=0.94, scale=0.48, size=num_samples),
         np.random.normal(loc=0.56, scale=0.42, size=num_samples),
         np.random.normal(loc=0.76, scale=0.14, size=num_samples),
         np.random.normal(loc=0.32, scale=0.58, size=num_samples),
         np.random.normal(loc=0.58, scale=0.219, size=num_samples),
         np.random.normal(loc=0.14, scale=0.52, size=num_samples),
         np.random.normal(loc=0.24, scale=0.42, size=num_samples),
         np.random.normal(loc=0.34, scale=0.1, size=num_samples),
         np.random.normal(loc=0.44, scale=0.9, size=num_samples),
         np.random.normal(loc=0.54, scale=0.7, size=num_samples),
         np.random.normal(loc=0.64, scale=0.5, size=num_samples),
         np.random.normal(loc=0.74, scale=0.4,
                          size=num_samples))).astype("float64")

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(num_samples)

    # Compile the kernel with batch size 1 to enable SLP vectorization.
    compiler = CPUCompiler(vectorize=True,
                           computeInLogSpace=True,
                           vectorLibrary="LIBMVEC")
    kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False)

    # Execute the compiled Kernel.
    time_sum = 0
    for i in range(len(reference)):
        # Check the computation results against the reference
        start = time.time()
        result = compiler.execute(kernel, inputs=np.array([inputs[i]]))
        time_sum = time_sum + time.time() - start
        print(
            f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}",
            end='\r')
        if not np.isclose(result, reference[i]):
            print(
                f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}"
            )
            raise AssertionError()
    print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")