def test_categorical_leaf_serialization(tmpdir):
    """Tests the binary serialization of two SPFlow Categorical leaf nodes
    by round-tripping and comparing the parameters before and after serialization
    & deserialization"""
    c1 = Categorical(p=[0.35, 0.55, 0.1], scope=1)
    c2 = Categorical(p=[0.25, 0.625, 0.125], scope=2)
    p = Product(children=[c1, c2])

    binary_file = os.path.join(tmpdir, "test.bin")
    print(f"Test binary file: {binary_file}")

    model = SPNModel(p, "uint8", "test")
    query = JointProbability(model)

    BinarySerializer(binary_file).serialize_to_file(query)

    deserialized = BinaryDeserializer(binary_file).deserialize_from_file()

    assert (isinstance(deserialized, JointProbability))
    assert (isinstance(deserialized.graph, SPNModel))
    assert (deserialized.graph.featureType == model.featureType)
    assert (deserialized.graph.name == model.name)

    deserialized = deserialized.graph.root

    assert isinstance(deserialized, Product)
    assert (len(deserialized.children) == 2)
    assert len(c1.p) == len(deserialized.children[0].p)
    for i, p in enumerate(c1.p):
        assert p == deserialized.children[0].p[i]
    assert len(c2.p) == len(deserialized.children[1].p)
    for i, p in enumerate(c2.p):
        assert p == deserialized.children[1].p[i]
Beispiel #2
0
 def test_conditional_probability(self):
     # test if conditional probability is correct
     # same spn as in entropy test
     # only for generating the ds_context
     train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]])
     # spn
     ds_context = Context(meta_types=[MetaType.DISCRETE] * 3)
     ds_context.add_domains(train_data)
     ds_context.parametric_type = [Categorical] * 3
     spn = 0.64 * (
         (
             Categorical(p=[0.25, 0.75, 0.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     ) + 0.36 * (
         (
             Categorical(p=[0.0, 0.0, 1.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     )
     # tests
     x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1)
     self.assertAlmostEqual(conditional_probability(spn, 2, x_instance)[0][0], 0.9)
     self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.48)
     x_instance = np.array([2, 1, 0], dtype=float).reshape(1, -1)
     self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.36)
Beispiel #3
0
 def test_we_score(self):
     # test if we_score is correct
     """
     # explain how training data and the spn comes
     # number of RVs
     M = 3
     # table of probabilities
     p1 = 0.6
     p2 = 0.3
     p31 = 0.1
     p32 = 0.9
     # generate x1 and x2
     x1 = np.random.binomial(1, p1, size=N) + np.random.binomial(1, p1, size=N)
     x2 = np.random.binomial(1, p2, size=N)
     x3 = np.zeros(N)
     # generate x3
     for i in range(N):
         if x2[i] == 1:
             x3[i] = np.random.binomial(1, p31, size=1)
         else:
             x3[i] = np.random.binomial(1, p32, size=1)
     # form a matrix, rows are instances and columns are RVs
     train_data = np.concatenate((x1, x2, x3)).reshape((M, N)).transpose()
     """
     # only for generating the ds_context
     train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]])
     # spn
     ds_context = Context(meta_types=[MetaType.DISCRETE] * 3)
     ds_context.add_domains(train_data)
     ds_context.parametric_type = [Categorical] * 3
     spn = 0.64 * (
         (
             Categorical(p=[0.25, 0.75, 0.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     ) + 0.36 * (
         (
             Categorical(p=[0.0, 0.0, 1.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     )
     # test
     n = 40000
     x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1)
     y_index = 0
     we = weight_of_evidence(spn, 0, x_instance, n, ds_context.domains[y_index].shape[0])
     we_true = np.array([[np.nan, 0, 0]])
     we = we[~np.isnan(we)]
     we_true = we_true[~np.isnan(we_true)]
     self.assertTrue((we == we_true).all())
def get_gender_spn():
    from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian

    spn1 = Categorical(p=[0.0, 1.0], scope=[0]) * Categorical(p=[0.2, 0.8],
                                                              scope=[1])
    spn2 = Categorical(p=[1.0, 0.0], scope=[0]) * Categorical(p=[0.7, 0.3],
                                                              scope=[1])
    spn3 = 0.4 * spn1 + 0.6 * spn2
    spn = spn3 * Gaussian(mean=20, stdev=3, scope=[2])

    spn.scope = sorted(spn.scope)
    return spn
Beispiel #5
0
    def test_induced_trees(self):
        spn = 0.5 * (Gaussian(mean=10, stdev=1, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \
              0.5 * (Gaussian(mean=50, stdev=1, scope=0) * Categorical(p=[0, 1.0], scope=1))

        data = np.zeros((2, 2))

        data[1, 1] = 1

        data[:, 0] = np.nan

        mpevals = mpe(spn, data)

        self.assertAlmostEqual(mpevals[0, 0], 10)
        self.assertAlmostEqual(mpevals[1, 0], 50)
Beispiel #6
0
def create_SPN2():
    from spn.structure.Base import assign_ids
    from spn.structure.Base import rebuild_scopes_bottom_up

    from spn.algorithms.Validity import is_valid
    from spn.structure.leaves.parametric.Parametric import Categorical

    from spn.structure.Base import Sum, Product

    p0 = Product(children=[
        Categorical(p=[0.3, 0.7], scope=1),
        Categorical(p=[0.4, 0.6], scope=2)
    ])
    p1 = Product(children=[
        Categorical(p=[0.5, 0.5], scope=1),
        Categorical(p=[0.6, 0.4], scope=2)
    ])
    s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
    p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
    p3 = Product(children=[
        Categorical(p=[0.2, 0.8], scope=0),
        Categorical(p=[0.3, 0.7], scope=1)
    ])
    p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])
    spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

    assign_ids(spn)
    rebuild_scopes_bottom_up(spn)

    val, msg = is_valid(spn)
    assert val, msg

    return spn
Beispiel #7
0
    def test_induced_trees(self):
        spn = 0.5 * (Gaussian(mean=10, stdev=0.000000001, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \
              0.5 * (Gaussian(mean=50, stdev=0.000000001, scope=0) * Categorical(p=[0, 1.0], scope=1))

        rand_gen = np.random.RandomState(17)

        data = np.zeros((2, 2))

        data[1, 1] = 1

        data[:, 0] = np.nan

        samples = sample_instances(spn, data, rand_gen)

        self.assertAlmostEqual(samples[0, 0], 10)
        self.assertAlmostEqual(samples[1, 0], 50)
Beispiel #8
0
    def test_eval_parametric(self):
        data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape(
            (1, 7))

        spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) *
               Exponential(l=1.0, scope=[1]) *
               Gamma(alpha=1.0, beta=1.0, scope=[2]) *
               LogNormal(mean=1.0, stdev=1.0, scope=[3]) *
               Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) *
               Categorical(p=[0.1, 0.2, 0.7], scope=[6]))

        ll = log_likelihood(spn, data)

        tf_ll = eval_tf(spn, data)

        self.assertTrue(np.all(np.isclose(ll, tf_ll)))

        spn_copy = Copy(spn)

        tf_graph, data_placeholder, variable_dict = spn_to_tf_graph(
            spn_copy, data, 1)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            tf_graph_to_spn(variable_dict)

        str_val = spn_to_str_equation(spn)
        str_val2 = spn_to_str_equation(spn_copy)

        self.assertEqual(str_val, str_val2)
Beispiel #9
0
    def create_leaf(data, ds_context, scope):
        idx = scope[0]
        meta_type = ds_context.meta_types[idx]
        
        if meta_type == MetaType.REAL:
            if identity_numeric:
                return create_identity_leaf(data, scope)
        
            if prior_weight == 0.:
                return create_piecewise_leaf(data, ds_context, scope, prior_weight=None)
            else:
                return create_piecewise_leaf(data, ds_context, scope, prior_weight=prior_weight)
            

        elif meta_type == MetaType.DISCRETE:
            
            unique, counts = np.unique(data[:,0], return_counts=True)
            
            sorted_counts = np.zeros(len(ds_context.domains[idx]), dtype=np.float64)
            for i, x in enumerate(unique):
                sorted_counts[int(x)] = counts[i] 
            
            p = sorted_counts / data.shape[0]
            
            #Do regularization
            if prior_weight > 0.:
                p += prior_weight
            p = p/np.sum(p)
            
            return Categorical(p, scope)

        else:
            raise Exception("Mehtod learn_mspn_for_aqp(...) cannot create leaf for " + str(meta_type))
Beispiel #10
0
def getSpn1():
    spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) *
             (0.3 * (Categorical(p=[0.3, 0.7], scope=1) *
                     Categorical(p=[0.4, 0.6], scope=2))
            + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) *
                     Categorical(p=[0.6, 0.4], scope=2)))) \
    + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) *
             Categorical(p=[0.3, 0.7], scope=1) *
             Categorical(p=[0.4, 0.6], scope=2))
    return spn
Beispiel #11
0
 def test_mutual_info(self):
     # test if mutual info is correct
     # same spn as in entropy test
     # only for generating the ds_context
     train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]])
     # spn
     ds_context = Context(meta_types=[MetaType.DISCRETE] * 3)
     ds_context.add_domains(train_data)
     ds_context.parametric_type = [Categorical] * 3
     spn = 0.64 * (
         (
             Categorical(p=[0.25, 0.75, 0.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     ) + 0.36 * (
         (
             Categorical(p=[0.0, 0.0, 1.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     )
     # real mutual info
     p2 = 0.3
     p3 = 0.66
     h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2)
     h_x3 = -p3 * np.log(p3) - (1 - p3) * np.log(1 - p3)
     h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1))
     mi_x2x3 = h_x2 + h_x3 - h_x2x3
     self.assertAlmostEqual(mi_x2x3, mutual_information(spn, ds_context, {1}, {2}))
     mi_x1x2 = 0
     self.assertAlmostEqual(mi_x1x2, mutual_information(spn, ds_context, {1}, {0}))
     # test symmetry
     self.assertAlmostEqual(
         mutual_information(spn, ds_context, {2}, {1}), mutual_information(spn, ds_context, {1}, {2})
     )
     self.assertAlmostEqual(
         mutual_information(spn, ds_context, {0, 2}, {1}), mutual_information(spn, ds_context, {1}, {0, 2})
     )
     # rest 0
     self.assertAlmostEqual(0, mutual_information(spn, ds_context, {2, 1}, {0}))
Beispiel #12
0
 def test_conditional_mutual_info(self):
     # test if conditional mutual info is correct
     # same spn as in entropy test
     # only for generating the ds_context
     train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]])
     # spn
     ds_context = Context(meta_types=[MetaType.DISCRETE] * 3)
     ds_context.add_domains(train_data)
     ds_context.parametric_type = [Categorical] * 3
     spn = 0.64 * (
         (
             Categorical(p=[0.25, 0.75, 0.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     ) + 0.36 * (
         (
             Categorical(p=[0.0, 0.0, 1.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     )
     # real mutual info
     p2 = 0.3
     p3 = 0.66
     h_x1 = -(0.16 * np.log(0.16) + 0.36 * np.log(0.36) + 0.48 * np.log(0.48))
     h_x2x1 = -(0.7 * np.log(0.7) + 0.3 * np.log(0.3)) + h_x1
     h_x3x1 = -(0.66 * np.log(0.66) + 0.34 * np.log(0.34)) + h_x1
     h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1))
     h_x2x3x1 = h_x1 + h_x2x3
     cmi_x2x3_x1 = h_x2x1 + h_x3x1 - h_x2x3x1 - h_x1
     self.assertAlmostEqual(cmi_x2x3_x1, conditional_mutual_information(spn, ds_context, {1}, {2}, {0}))
     h_x1x3 = h_x3x1
     h_x1x2x3 = h_x2x3x1
     h_x3 = -p3 * np.log(p3) - (1 - p3) * np.log(1 - p3)
     cmi_x1x2_x3 = h_x1x3 + h_x2x3 - h_x1x2x3 - h_x3
     self.assertAlmostEqual(cmi_x1x2_x3, conditional_mutual_information(spn, ds_context, {1}, {0}, {2}))
     h_x1x2x3 = h_x2x3x1
     h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2)
     cmi_x1x3_x2 = h_x2x1 + h_x2x3 - h_x1x2x3 - h_x2
     self.assertAlmostEqual(cmi_x1x3_x2, conditional_mutual_information(spn, ds_context, {2}, {0}, {1}))
Beispiel #13
0
def create_SPN():
    from spn.structure.leaves.parametric.Parametric import Categorical

    spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * \
                 (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + \
                  0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \
          + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * \
                   Categorical(p=[0.3, 0.7], scope=1) * \
                   Categorical(p=[0.4, 0.6], scope=2))

    return spn
Beispiel #14
0
def mix_categorical(weighted_nodes):
    assert sum([weight for (weight, node) in weighted_nodes]) == 1

    p = np.zeros(len(weighted_nodes[0][1].p))
    scope = weighted_nodes[0][1].scope

    for (weight, node) in weighted_nodes:
        assert isinstance(node, Categorical)
        for i in range(len(p)):
            p[i] += weight * node.p[i]

    return Categorical(p=p, scope=scope)
Beispiel #15
0
def test_cuda_categorical():
    # Construct a minimal SPN
    c1 = Categorical(p=[0.35, 0.55, 0.1], scope=0)
    c2 = Categorical(p=[0.25, 0.625, 0.125], scope=1)
    c3 = Categorical(p=[0.5, 0.2, 0.3], scope=2)
    c4 = Categorical(p=[0.6, 0.15, 0.25], scope=3)
    c5 = Categorical(p=[0.7, 0.11, 0.19], scope=4)
    c6 = Categorical(p=[0.8, 0.14, 0.06], scope=5)
    p = Product(children=[c1, c2, c3, c4, c5, c6])

    # Randomly sample input values.
    inputs = np.column_stack((
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
    )).astype("float64")

    if not CUDACompiler.isAvailable():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CUDACompiler().log_likelihood(p, inputs, supportMarginal=False)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(p, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(
        np.isclose(np.exp(results), np.exp(reference)))
Beispiel #16
0
def create_SPN():
    from spn.algorithms.Validity import is_valid

    from spn.structure.leaves.parametric.Parametric import Categorical

    spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * \
                 (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + \
                  0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \
          + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * \
                   Categorical(p=[0.3, 0.7], scope=1) * \
                   Categorical(p=[0.4, 0.6], scope=2))

    assert is_valid(spn)

    return spn
def get_credit_spn():
    from spn.structure.Base import Product
    from spn.structure.leaves.parametric.Parametric import Categorical

    spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5],
                                                              scope=[3])
    spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9],
                                                              scope=[3])
    spn3 = 0.3 * spn1 + 0.7 * spn2
    spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3

    spn6 = Product([
        Categorical(p=[1.0, 0.0], scope=[1]),
        Categorical(p=[0.0, 1.0], scope=[2]),
        Categorical(p=[1.0, 0.0], scope=[3])
    ])
    spn6.scope = [1, 2, 3]

    spn7 = 0.8 * spn4 + 0.2 * spn6
    spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0])

    spn.scope = sorted(spn.scope)
    return spn
Beispiel #18
0
    def __init__(self):
        p0 = Product(children=[
            Categorical(p=[0.3, 0.7], scope=1),
            Categorical(p=[0.4, 0.6], scope=2)
        ])
        p1 = Product(children=[
            Categorical(p=[0.5, 0.5], scope=1),
            Categorical(p=[0.6, 0.4], scope=2)
        ])
        s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
        p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
        p3 = Product(children=[
            Categorical(p=[0.2, 0.8], scope=0),
            Categorical(p=[0.3, 0.7], scope=1)
        ])
        p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])

        self.spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

        assign_ids(self.spn)
        rebuild_scopes_bottom_up(self.spn)
Beispiel #19
0
    def test_spn_to_torch(self):
        # SPFLow implementation
        n0 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        n1 = Categorical(p=[0.1, 0.3, 0.6])
        n2 = Sum(weights=[0.1, 0.2, 0.3, 0.4], children=[n0, n1])
        n3 = Product(children=[n0, n1])

        torch_n0 = GaussianNode.from_spn(n0)
        torch_n1 = CategoricalNode.from_spn(n1)
        torch_n2 = SumNode.from_spn(n2)
        torch_n3 = ProductNode.from_spn(n3)

        self.assertEqual(torch_n0.mean, n0.mean)
        self.assertEqual(torch_n0.std, n0.stdev)
        self.assertTrue(
            np.isclose(torch_n1.p.detach().numpy(), n1.p, atol=DELTA).all())
        self.assertTrue(
            np.isclose(torch_n2.weights.detach().numpy(),
                       n2.weights,
                       atol=DELTA).all())
def test_cpu_categorical():
    # Construct a minimal SPN
    c1 = Categorical(p=[0.35, 0.55, 0.1], scope=0)
    c2 = Categorical(p=[0.25, 0.625, 0.125], scope=1)
    c3 = Categorical(p=[0.5, 0.2, 0.3], scope=2)
    c4 = Categorical(p=[0.6, 0.15, 0.25], scope=3)
    c5 = Categorical(p=[0.7, 0.11, 0.19], scope=4)
    c6 = Categorical(p=[0.8, 0.14, 0.06], scope=5)
    p = Product(children=[c1, c2, c3, c4, c5, c6])

    # Randomly sample input values.
    inputs = np.column_stack((
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
        np.random.randint(3, size=30),
    )).astype("float64")
    # Insert some NaN in random places into the input data.
    inputs.ravel()[np.random.choice(inputs.size, 10, replace=False)] = np.nan

    if not CPUCompiler.isVectorizationSupported():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CPUCompiler(computeInLogSpace=False,
                          vectorize=False).log_likelihood(p,
                                                          inputs,
                                                          batchSize=10)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(p, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(
        np.isclose(np.exp(results), np.exp(reference)))
Beispiel #21
0
def test_vector_slp_escaping_users():
    g0 = Gaussian(mean=0.00, stdev=1, scope=0)
    g1 = Gaussian(mean=0.01, stdev=0.75, scope=1)
    g2 = Gaussian(mean=0.02, stdev=0.5, scope=2)
    g3 = Gaussian(mean=0.03, stdev=0.25, scope=3)
    g4 = Gaussian(mean=0.04, stdev=1, scope=4)
    g5 = Gaussian(mean=0.05, stdev=0.25, scope=5)
    g6 = Gaussian(mean=0.06, stdev=0.5, scope=6)
    g7 = Gaussian(mean=0.07, stdev=0.75, scope=7)
    g8 = Gaussian(mean=0.08, stdev=1, scope=8)
    g9 = Gaussian(mean=0.09, stdev=0.75, scope=9)
    g10 = Gaussian(mean=0.10, stdev=1, scope=10)
    g11 = Gaussian(mean=0.11, stdev=1, scope=11)

    h0 = Histogram([0., 1., 2.], [0.1, 0.9], [1, 1], scope=12)
    h1 = Histogram([0., 1., 2.], [0.2, 0.8], [1, 1], scope=13)
    h2 = Histogram([0., 1., 2.], [0.3, 0.7], [1, 1], scope=14)
    h3 = Histogram([0., 1., 2.], [0.4, 0.6], [1, 1], scope=15)
    h4 = Histogram([0., 1., 2.], [0.5, 0.5], [1, 1], scope=16)
    h5 = Histogram([0., 1., 2.], [0.6, 0.4], [1, 1], scope=17)
    h6 = Histogram([0., 1., 2.], [0.7, 0.3], [1, 1], scope=18)
    h7 = Histogram([0., 1., 2.], [0.8, 0.2], [1, 1], scope=19)

    c0 = Categorical(p=[0.1, 0.1, 0.8], scope=20)
    c1 = Categorical(p=[0.2, 0.2, 0.6], scope=21)
    c2 = Categorical(p=[0.3, 0.3, 0.4], scope=22)
    c3 = Categorical(p=[0.4, 0.4, 0.2], scope=23)
    c4 = Categorical(p=[0.5, 0.4, 0.1], scope=24)
    c5 = Categorical(p=[0.6, 0.3, 0.1], scope=25)
    c6 = Categorical(p=[0.7, 0.2, 0.1], scope=26)
    c7 = Categorical(p=[0.8, 0.1, 0.1], scope=27)

    s0 = Sum(children=[g8, h4], weights=[0.5, 0.5])
    s1 = Sum(children=[g9, h5], weights=[0.5, 0.5])
    s2 = Sum(children=[g10, c6], weights=[0.5, 0.5])
    s3 = Sum(children=[g11, h7], weights=[0.5, 0.5])

    s4 = Sum(children=[s0, c4], weights=[0.5, 0.5])
    s5 = Sum(children=[s1, c5], weights=[0.5, 0.5])
    s6 = Sum(children=[s2, g6], weights=[0.5, 0.5])
    s7 = Sum(children=[s3, c7], weights=[0.5, 0.5])

    s8 = Sum(children=[s4, g4], weights=[0.5, 0.5])
    s9 = Sum(children=[s5, g5], weights=[0.5, 0.5])
    s10 = Sum(children=[s6, h6], weights=[0.5, 0.5])
    s11 = Sum(children=[s7, g7], weights=[0.5, 0.5])

    p0 = Product(children=[h0, s8])
    p1 = Product(children=[c1, s9])
    p2 = Product(children=[c2, s10])
    p3 = Product(children=[g3, s11])

    p4 = Product(children=[p0, g0])
    p5 = Product(children=[p1, g1])
    p6 = Product(children=[p2, h2])
    p7 = Product(children=[p3, c3])

    p8 = Product(children=[p4, c0])
    p9 = Product(children=[p5, h1])
    p10 = Product(children=[p6, g2])
    p11 = Product(children=[p7, h3])

    s12 = Sum(children=[p8, p9], weights=[0.5, 0.5])
    s13 = Sum(children=[p10, p11], weights=[0.5, 0.5])

    s14 = Sum(children=[s12, p2], weights=[0.5, 0.5])
    s15 = Sum(children=[s13, s2], weights=[0.5, 0.5])

    spn = Product(children=[s14, s15])

    # Randomly sample input values from Gaussian (normal) distributions.
    num_samples = 100
    inputs = np.column_stack((
        # gaussian
        np.random.normal(loc=0.5, scale=1, size=num_samples),
        np.random.normal(loc=0.125, scale=0.25, size=num_samples),
        np.random.normal(loc=0.345, scale=0.24, size=num_samples),
        np.random.normal(loc=0.456, scale=0.1, size=num_samples),
        np.random.normal(loc=0.94, scale=0.48, size=num_samples),
        np.random.normal(loc=0.56, scale=0.42, size=num_samples),
        np.random.normal(loc=0.76, scale=0.14, size=num_samples),
        np.random.normal(loc=0.32, scale=0.58, size=num_samples),
        np.random.normal(loc=0.58, scale=0.219, size=num_samples),
        np.random.normal(loc=0.14, scale=0.52, size=num_samples),
        np.random.normal(loc=0.24, scale=0.42, size=num_samples),
        np.random.normal(loc=0.34, scale=0.1, size=num_samples),
        # histogram
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        np.random.randint(low=0, high=2, size=num_samples),
        # categorical
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples),
        np.random.randint(low=0, high=3, size=num_samples))).astype("float64")

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(num_samples)

    # Compile the kernel with batch size 1 to enable SLP vectorization.
    compiler = CPUCompiler(vectorize=True,
                           computeInLogSpace=True,
                           vectorLibrary="LIBMVEC")
    kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False)

    # Execute the compiled Kernel.
    time_sum = 0
    for i in range(len(reference)):
        # Check the computation results against the reference
        start = time.time()
        result = compiler.execute(kernel, inputs=np.array([inputs[i]]))
        time_sum = time_sum + time.time() - start
        print(
            f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}",
            end='\r')
        if not np.isclose(result, reference[i]):
            print(
                f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}"
            )
            raise AssertionError()
    print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")
Beispiel #22
0
    y = np.array(y).reshape(-1, )
    z = np.random.choice([1, 2], int(1e4), replace=True, p=[0.4, 0.6])
    df = pd.DataFrame(dict(zip(['X', 'Y', 'Z'], [x, y, z]))).astype(str)
    df, vd, pars = fn.transform_dataset(df)
    spn = spn_handler.load_or_create_spn(df,
                                         vd,
                                         pars,
                                         'mini_example',
                                         0.4,
                                         0.5,
                                         nrows=None,
                                         seed=1,
                                         force_create=True,
                                         clustering='km_rule_clustering')
    spn = spn.children[1]
    manspn = ( 0.3 * (Categorical(p=[0.9, 0.1], scope=0) * Categorical(p=[0.55, 0.4, 0.05], scope=1))
               + 0.7 * (Categorical(p=[0., 1.], scope=0) * Categorical(p=[0.1, 0.2, 0.7], scope=1)) ) \
            * (Categorical(p=[0.4, 0.6], scope=2))
    # plot leaves from example
    p = [[0.9, 0.1], [0.4, 0.55, 0.05], [0., 1.], [0.1, 0.2, 0.7], [0.4, 0.6]]
    y = 2
    size = (2.88 * y, y)
    fig, axes = plt.subplots(1, 4, sharey=True, squeeze=True, figsize=size)
    for i, var in enumerate(['X', 'Y', 'X', 'Y']):
        currp = p[i]
        ax = axes[i]
        # if i in [1,2]:
        #     d = df[var].value_counts(sort=False).divide(len(df))
        # if i in [3,4]:

        ticks = list(range(len(currp)))
Beispiel #23
0
    #
    # geometric
    geometric = Geometric(p=.025, scope=[0])

    pdf_x, pdf_y = approximate_density(geometric, x_range)
    fig, ax = plt.subplots(1, 1)
    ax.plot(pdf_x, pdf_y, label="geometric")
    print('Geometric Mode:', geometric.mode)
    plt.axvline(x=geometric.mode, color='r')
    if show_plots:
        plt.show()

    #
    # categorical
    categorical = Categorical(p=[0.1, 0.05, 0.3, 0.05, 0.2, 0.2, 0.1],
                              scope=[0])

    pdf_x, pdf_y = approximate_density(categorical, np.arange(categorical.k))
    fig, ax = plt.subplots(1, 1)
    ax.plot(pdf_x, pdf_y, label="categorical")
    print('Categorical Mode:', categorical.mode)
    plt.axvline(x=categorical.mode, color='r')
    if show_plots:
        plt.show()

    #
    # exponential
    exponential = Exponential(l=5, scope=[0])

    pdf_x, pdf_y = approximate_density(exponential, x_range)
    fig, ax = plt.subplots(1, 1)
Beispiel #24
0
"""
=================================
Domain Specific Language for SPNs
=================================

We start by creating an SPN. Using a Domain-Specific Language (DSL), we can
quickly create an SPN of categorical leave nodes like this:
"""

from spn.structure.leaves.parametric.Parametric import Categorical
from spn.io.Graphics import draw_spn

import matplotlib.pyplot as plt

spn = 0.4 * (
    Categorical(p=[0.2, 0.8], scope=0) *
    (0.3 *
     (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2))
     + 0.7 *
     (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))
) + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * Categorical(
    p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2))

ax = draw_spn(spn)
Beispiel #25
0
from spn.structure.leaves.parametric.Parametric import Categorical

from spn.structure.Base import Sum, Product

from spn.structure.Base import assign_ids, rebuild_scopes_bottom_up

p0 = Product(children=[
    Categorical(p=[0.3, 0.7], scope=1),
    Categorical(p=[0.4, 0.6], scope=2)
])
p1 = Product(children=[
    Categorical(p=[0.5, 0.5], scope=1),
    Categorical(p=[0.6, 0.4], scope=2)
])
s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
p3 = Product(children=[
    Categorical(p=[0.2, 0.8], scope=0),
    Categorical(p=[0.3, 0.7], scope=1)
])
p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])
spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

assign_ids(spn)
rebuild_scopes_bottom_up(spn)
import numpy as np
test_data = np.array([1.0, 0.0, 1.0]).reshape(-1, 3)
from spn.algorithms.Inference import log_likelihood

ll = log_likelihood(spn, test_data)
print(ll, np.exp(ll))
Beispiel #26
0
 def test_entropy(self):
     # test if entropy is correct
     """
     # explain how training data and the spn comes
     # number of RVs
     M = 3
     # table of probabilities
     p1 = 0.6
     p2 = 0.3
     p31 = 0.1
     p32 = 0.9
     # generate x1 and x2
     x1 = np.random.binomial(1, p1, size=N) + np.random.binomial(1, p1, size=N)
     x2 = np.random.binomial(1, p2, size=N)
     x3 = np.zeros(N)
     # generate x3
     for i in range(N):
         if x2[i] == 1:
             x3[i] = np.random.binomial(1, p31, size=1)
         else:
             x3[i] = np.random.binomial(1, p32, size=1)
     # form a matrix, rows are instances and columns are RVs
     train_data = np.concatenate((x1, x2, x3)).reshape((M, N)).transpose()
     """
     # only for generating the ds_context
     train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]])
     # spn
     ds_context = Context(meta_types=[MetaType.DISCRETE] * 3)
     ds_context.add_domains(train_data)
     ds_context.parametric_type = [Categorical] * 3
     spn = 0.64 * (
         (
             Categorical(p=[0.25, 0.75, 0.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     ) + 0.36 * (
         (
             Categorical(p=[0.0, 0.0, 1.0], scope=0)
             * (
                 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2)))
                 + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2)))
             )
         )
     )
     # real entropy
     p2 = 0.3
     h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2)
     self.assertAlmostEqual(h_x2, entropy(spn, ds_context, {1}))
     h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1))
     self.assertAlmostEqual(h_x2x3, entropy(spn, ds_context, {1, 2}))
     h_x1 = -(0.16 * np.log(0.16) + 0.36 * np.log(0.36) + 0.48 * np.log(0.48))
     self.assertAlmostEqual(h_x1, entropy(spn, ds_context, {0}))
     h_x2x1 = -(0.7 * np.log(0.7) + 0.3 * np.log(0.3)) + h_x1
     self.assertAlmostEqual(h_x2x1, entropy(spn, ds_context, {1, 0}))
     h_x3x1 = -(0.66 * np.log(0.66) + 0.34 * np.log(0.34)) + h_x1
     self.assertAlmostEqual(h_x3x1, entropy(spn, ds_context, {2, 0}))
     h_x2x3x1 = h_x1 + h_x2x3
     self.assertAlmostEqual(h_x2x3x1, entropy(spn, ds_context, {1, 2, 0}))
     # test symmetry
     self.assertAlmostEqual(entropy(spn, ds_context, {0, 2}), entropy(spn, ds_context, {2, 0}))
     self.assertAlmostEqual(entropy(spn, ds_context, {1, 2}), entropy(spn, ds_context, {2, 1}))
 def _deserialize_categorical(self, node, node_map):
     probabilities = node.categorical.probabilities
     cat = Categorical(p=probabilities, scope=node.categorical.scope)
     cat.id = node.id
     return cat
Beispiel #28
0
    node = Gaussian(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.mean, np.mean(data))
    assert np.isclose(node.stdev, np.std(data))

    node = Gamma(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.alpha / node.beta, np.mean(data)), node.alpha

    node = LogNormal(np.inf, np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.mean, np.log(data).mean(), atol=0.00001)
    assert np.isclose(node.stdev, np.log(data).std(), atol=0.00001)

    node = Poisson(np.inf)
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.mean, np.mean(data))

    node = Categorical(np.array([1, 1, 1, 1, 1, 1]) / 6)
    data = np.array([0, 0, 1, 3, 5]).reshape(-1, 1)
    update_parametric_parameters_mle(node, data)
    assert np.isclose(node.p[0], 2 / 5)
    assert np.isclose(node.p[1], 1 / 5)
    assert np.isclose(node.p[2], 0)
    assert np.isclose(node.p[3], 1 / 5)
    assert np.isclose(node.p[4], 0)
 from spn.structure.leaves.parametric.Parametric import Categorical
 from spn.structure.leaves.parametric.SamplingRange import sample_categorical_node
 from spn.structure.leaves.parametric.InferenceRange import categorical_likelihood_range
 
 from spn.structure.Base import Context
 from spn.structure.StatisticalTypes import MetaType
 from spn.experiments.AQP.Ranges import NominalRange, NumericRange
 
 from spn.algorithms import SamplingRange
 
 
 
 rand_gen = np.random.RandomState(100)
 
 #Create SPN
 node1 = Categorical(p=[0.9, 0.1], scope=[0])
 node2 = Categorical(p=[0.1, 0.9], scope=[0])
 
 
 x = [0.,  1.,  2.,  3., 4.]
 y = [0., 10., 0., 0., 0.]
 x, y = np.array(x), np.array(y)
 auc = np.trapz(y, x)
 y = y / auc
 node3 = PiecewiseLinear(x_range=x, y_range=y, bin_repr_points=x[1:-1], scope=[1])
 
 x = [0.,  1.,  2.,  3., 4.]
 y = [0., 0., 0., 10., 0.]
 x, y = np.array(x), np.array(y)
 auc = np.trapz(y, x)
 y = y / auc
Beispiel #30
0
    
    pass



    
  

if __name__ == '__main__':
    
   
    from spn.structure.Base import Sum, Product, Leaf
    from spn.structure.leaves.parametric.Parametric import Categorical 
    
    
    spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) 
    spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) 
    spn3 = 0.3 * spn1 + 0.7 * spn2
    spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3
    
    spn6 = Product([Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3])])
    spn6.scope = [1,2,3]
    
    spn7 = 0.8 * spn4 + 0.2 * spn6
    spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0])
    
    #spn_util.plot_spn(spn, "rule_spn.pdf")
    
    
    
    extract_rules(spn)