def test_categorical_leaf_serialization(tmpdir): """Tests the binary serialization of two SPFlow Categorical leaf nodes by round-tripping and comparing the parameters before and after serialization & deserialization""" c1 = Categorical(p=[0.35, 0.55, 0.1], scope=1) c2 = Categorical(p=[0.25, 0.625, 0.125], scope=2) p = Product(children=[c1, c2]) binary_file = os.path.join(tmpdir, "test.bin") print(f"Test binary file: {binary_file}") model = SPNModel(p, "uint8", "test") query = JointProbability(model) BinarySerializer(binary_file).serialize_to_file(query) deserialized = BinaryDeserializer(binary_file).deserialize_from_file() assert (isinstance(deserialized, JointProbability)) assert (isinstance(deserialized.graph, SPNModel)) assert (deserialized.graph.featureType == model.featureType) assert (deserialized.graph.name == model.name) deserialized = deserialized.graph.root assert isinstance(deserialized, Product) assert (len(deserialized.children) == 2) assert len(c1.p) == len(deserialized.children[0].p) for i, p in enumerate(c1.p): assert p == deserialized.children[0].p[i] assert len(c2.p) == len(deserialized.children[1].p) for i, p in enumerate(c2.p): assert p == deserialized.children[1].p[i]
def test_conditional_probability(self): # test if conditional probability is correct # same spn as in entropy test # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # tests x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1) self.assertAlmostEqual(conditional_probability(spn, 2, x_instance)[0][0], 0.9) self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.48) x_instance = np.array([2, 1, 0], dtype=float).reshape(1, -1) self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.36)
def test_we_score(self): # test if we_score is correct """ # explain how training data and the spn comes # number of RVs M = 3 # table of probabilities p1 = 0.6 p2 = 0.3 p31 = 0.1 p32 = 0.9 # generate x1 and x2 x1 = np.random.binomial(1, p1, size=N) + np.random.binomial(1, p1, size=N) x2 = np.random.binomial(1, p2, size=N) x3 = np.zeros(N) # generate x3 for i in range(N): if x2[i] == 1: x3[i] = np.random.binomial(1, p31, size=1) else: x3[i] = np.random.binomial(1, p32, size=1) # form a matrix, rows are instances and columns are RVs train_data = np.concatenate((x1, x2, x3)).reshape((M, N)).transpose() """ # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # test n = 40000 x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1) y_index = 0 we = weight_of_evidence(spn, 0, x_instance, n, ds_context.domains[y_index].shape[0]) we_true = np.array([[np.nan, 0, 0]]) we = we[~np.isnan(we)] we_true = we_true[~np.isnan(we_true)] self.assertTrue((we == we_true).all())
def get_gender_spn(): from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian spn1 = Categorical(p=[0.0, 1.0], scope=[0]) * Categorical(p=[0.2, 0.8], scope=[1]) spn2 = Categorical(p=[1.0, 0.0], scope=[0]) * Categorical(p=[0.7, 0.3], scope=[1]) spn3 = 0.4 * spn1 + 0.6 * spn2 spn = spn3 * Gaussian(mean=20, stdev=3, scope=[2]) spn.scope = sorted(spn.scope) return spn
def test_induced_trees(self): spn = 0.5 * (Gaussian(mean=10, stdev=1, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \ 0.5 * (Gaussian(mean=50, stdev=1, scope=0) * Categorical(p=[0, 1.0], scope=1)) data = np.zeros((2, 2)) data[1, 1] = 1 data[:, 0] = np.nan mpevals = mpe(spn, data) self.assertAlmostEqual(mpevals[0, 0], 10) self.assertAlmostEqual(mpevals[1, 0], 50)
def create_SPN2(): from spn.structure.Base import assign_ids from spn.structure.Base import rebuild_scopes_bottom_up from spn.algorithms.Validity import is_valid from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.Base import Sum, Product p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(spn) rebuild_scopes_bottom_up(spn) val, msg = is_valid(spn) assert val, msg return spn
def test_induced_trees(self): spn = 0.5 * (Gaussian(mean=10, stdev=0.000000001, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \ 0.5 * (Gaussian(mean=50, stdev=0.000000001, scope=0) * Categorical(p=[0, 1.0], scope=1)) rand_gen = np.random.RandomState(17) data = np.zeros((2, 2)) data[1, 1] = 1 data[:, 0] = np.nan samples = sample_instances(spn, data, rand_gen) self.assertAlmostEqual(samples[0, 0], 10) self.assertAlmostEqual(samples[1, 0], 50)
def test_eval_parametric(self): data = np.array([1, 1, 1, 1, 1, 1, 1], dtype=np.float32).reshape( (1, 7)) spn = (Gaussian(mean=1.0, stdev=1.0, scope=[0]) * Exponential(l=1.0, scope=[1]) * Gamma(alpha=1.0, beta=1.0, scope=[2]) * LogNormal(mean=1.0, stdev=1.0, scope=[3]) * Poisson(mean=1.0, scope=[4]) * Bernoulli(p=0.6, scope=[5]) * Categorical(p=[0.1, 0.2, 0.7], scope=[6])) ll = log_likelihood(spn, data) tf_ll = eval_tf(spn, data) self.assertTrue(np.all(np.isclose(ll, tf_ll))) spn_copy = Copy(spn) tf_graph, data_placeholder, variable_dict = spn_to_tf_graph( spn_copy, data, 1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tf_graph_to_spn(variable_dict) str_val = spn_to_str_equation(spn) str_val2 = spn_to_str_equation(spn_copy) self.assertEqual(str_val, str_val2)
def create_leaf(data, ds_context, scope): idx = scope[0] meta_type = ds_context.meta_types[idx] if meta_type == MetaType.REAL: if identity_numeric: return create_identity_leaf(data, scope) if prior_weight == 0.: return create_piecewise_leaf(data, ds_context, scope, prior_weight=None) else: return create_piecewise_leaf(data, ds_context, scope, prior_weight=prior_weight) elif meta_type == MetaType.DISCRETE: unique, counts = np.unique(data[:,0], return_counts=True) sorted_counts = np.zeros(len(ds_context.domains[idx]), dtype=np.float64) for i, x in enumerate(unique): sorted_counts[int(x)] = counts[i] p = sorted_counts / data.shape[0] #Do regularization if prior_weight > 0.: p += prior_weight p = p/np.sum(p) return Categorical(p, scope) else: raise Exception("Mehtod learn_mspn_for_aqp(...) cannot create leaf for " + str(meta_type))
def getSpn1(): spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \ + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) return spn
def test_mutual_info(self): # test if mutual info is correct # same spn as in entropy test # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # real mutual info p2 = 0.3 p3 = 0.66 h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2) h_x3 = -p3 * np.log(p3) - (1 - p3) * np.log(1 - p3) h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1)) mi_x2x3 = h_x2 + h_x3 - h_x2x3 self.assertAlmostEqual(mi_x2x3, mutual_information(spn, ds_context, {1}, {2})) mi_x1x2 = 0 self.assertAlmostEqual(mi_x1x2, mutual_information(spn, ds_context, {1}, {0})) # test symmetry self.assertAlmostEqual( mutual_information(spn, ds_context, {2}, {1}), mutual_information(spn, ds_context, {1}, {2}) ) self.assertAlmostEqual( mutual_information(spn, ds_context, {0, 2}, {1}), mutual_information(spn, ds_context, {1}, {0, 2}) ) # rest 0 self.assertAlmostEqual(0, mutual_information(spn, ds_context, {2, 1}, {0}))
def test_conditional_mutual_info(self): # test if conditional mutual info is correct # same spn as in entropy test # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # real mutual info p2 = 0.3 p3 = 0.66 h_x1 = -(0.16 * np.log(0.16) + 0.36 * np.log(0.36) + 0.48 * np.log(0.48)) h_x2x1 = -(0.7 * np.log(0.7) + 0.3 * np.log(0.3)) + h_x1 h_x3x1 = -(0.66 * np.log(0.66) + 0.34 * np.log(0.34)) + h_x1 h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1)) h_x2x3x1 = h_x1 + h_x2x3 cmi_x2x3_x1 = h_x2x1 + h_x3x1 - h_x2x3x1 - h_x1 self.assertAlmostEqual(cmi_x2x3_x1, conditional_mutual_information(spn, ds_context, {1}, {2}, {0})) h_x1x3 = h_x3x1 h_x1x2x3 = h_x2x3x1 h_x3 = -p3 * np.log(p3) - (1 - p3) * np.log(1 - p3) cmi_x1x2_x3 = h_x1x3 + h_x2x3 - h_x1x2x3 - h_x3 self.assertAlmostEqual(cmi_x1x2_x3, conditional_mutual_information(spn, ds_context, {1}, {0}, {2})) h_x1x2x3 = h_x2x3x1 h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2) cmi_x1x3_x2 = h_x2x1 + h_x2x3 - h_x1x2x3 - h_x2 self.assertAlmostEqual(cmi_x1x3_x2, conditional_mutual_information(spn, ds_context, {2}, {0}, {1}))
def create_SPN(): from spn.structure.leaves.parametric.Parametric import Categorical spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * \ (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + \ 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \ + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * \ Categorical(p=[0.3, 0.7], scope=1) * \ Categorical(p=[0.4, 0.6], scope=2)) return spn
def mix_categorical(weighted_nodes): assert sum([weight for (weight, node) in weighted_nodes]) == 1 p = np.zeros(len(weighted_nodes[0][1].p)) scope = weighted_nodes[0][1].scope for (weight, node) in weighted_nodes: assert isinstance(node, Categorical) for i in range(len(p)): p[i] += weight * node.p[i] return Categorical(p=p, scope=scope)
def test_cuda_categorical(): # Construct a minimal SPN c1 = Categorical(p=[0.35, 0.55, 0.1], scope=0) c2 = Categorical(p=[0.25, 0.625, 0.125], scope=1) c3 = Categorical(p=[0.5, 0.2, 0.3], scope=2) c4 = Categorical(p=[0.6, 0.15, 0.25], scope=3) c5 = Categorical(p=[0.7, 0.11, 0.19], scope=4) c6 = Categorical(p=[0.8, 0.14, 0.06], scope=5) p = Product(children=[c1, c2, c3, c4, c5, c6]) # Randomly sample input values. inputs = np.column_stack(( np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), )).astype("float64") if not CUDACompiler.isAvailable(): print("Test not supported by the compiler installation") return 0 # Execute the compiled Kernel. results = CUDACompiler().log_likelihood(p, inputs, supportMarginal=False) # Compute the reference results using the inference from SPFlow. reference = log_likelihood(p, inputs) reference = reference.reshape(30) # Check the computation results against the reference # Check in normal space if log-results are not very close to each other. assert np.all(np.isclose(results, reference)) or np.all( np.isclose(np.exp(results), np.exp(reference)))
def create_SPN(): from spn.algorithms.Validity import is_valid from spn.structure.leaves.parametric.Parametric import Categorical spn = 0.4 * (Categorical(p=[0.2, 0.8], scope=0) * \ (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + \ 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2)))) \ + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * \ Categorical(p=[0.3, 0.7], scope=1) * \ Categorical(p=[0.4, 0.6], scope=2)) assert is_valid(spn) return spn
def get_credit_spn(): from spn.structure.Base import Product from spn.structure.leaves.parametric.Parametric import Categorical spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) spn3 = 0.3 * spn1 + 0.7 * spn2 spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3 spn6 = Product([ Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3]) ]) spn6.scope = [1, 2, 3] spn7 = 0.8 * spn4 + 0.2 * spn6 spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0]) spn.scope = sorted(spn.scope) return spn
def __init__(self): p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) self.spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(self.spn) rebuild_scopes_bottom_up(self.spn)
def test_spn_to_torch(self): # SPFLow implementation n0 = Gaussian(mean=0.0, stdev=1.0, scope=0) n1 = Categorical(p=[0.1, 0.3, 0.6]) n2 = Sum(weights=[0.1, 0.2, 0.3, 0.4], children=[n0, n1]) n3 = Product(children=[n0, n1]) torch_n0 = GaussianNode.from_spn(n0) torch_n1 = CategoricalNode.from_spn(n1) torch_n2 = SumNode.from_spn(n2) torch_n3 = ProductNode.from_spn(n3) self.assertEqual(torch_n0.mean, n0.mean) self.assertEqual(torch_n0.std, n0.stdev) self.assertTrue( np.isclose(torch_n1.p.detach().numpy(), n1.p, atol=DELTA).all()) self.assertTrue( np.isclose(torch_n2.weights.detach().numpy(), n2.weights, atol=DELTA).all())
def test_cpu_categorical(): # Construct a minimal SPN c1 = Categorical(p=[0.35, 0.55, 0.1], scope=0) c2 = Categorical(p=[0.25, 0.625, 0.125], scope=1) c3 = Categorical(p=[0.5, 0.2, 0.3], scope=2) c4 = Categorical(p=[0.6, 0.15, 0.25], scope=3) c5 = Categorical(p=[0.7, 0.11, 0.19], scope=4) c6 = Categorical(p=[0.8, 0.14, 0.06], scope=5) p = Product(children=[c1, c2, c3, c4, c5, c6]) # Randomly sample input values. inputs = np.column_stack(( np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), np.random.randint(3, size=30), )).astype("float64") # Insert some NaN in random places into the input data. inputs.ravel()[np.random.choice(inputs.size, 10, replace=False)] = np.nan if not CPUCompiler.isVectorizationSupported(): print("Test not supported by the compiler installation") return 0 # Execute the compiled Kernel. results = CPUCompiler(computeInLogSpace=False, vectorize=False).log_likelihood(p, inputs, batchSize=10) # Compute the reference results using the inference from SPFlow. reference = log_likelihood(p, inputs) reference = reference.reshape(30) # Check the computation results against the reference # Check in normal space if log-results are not very close to each other. assert np.all(np.isclose(results, reference)) or np.all( np.isclose(np.exp(results), np.exp(reference)))
def test_vector_slp_escaping_users(): g0 = Gaussian(mean=0.00, stdev=1, scope=0) g1 = Gaussian(mean=0.01, stdev=0.75, scope=1) g2 = Gaussian(mean=0.02, stdev=0.5, scope=2) g3 = Gaussian(mean=0.03, stdev=0.25, scope=3) g4 = Gaussian(mean=0.04, stdev=1, scope=4) g5 = Gaussian(mean=0.05, stdev=0.25, scope=5) g6 = Gaussian(mean=0.06, stdev=0.5, scope=6) g7 = Gaussian(mean=0.07, stdev=0.75, scope=7) g8 = Gaussian(mean=0.08, stdev=1, scope=8) g9 = Gaussian(mean=0.09, stdev=0.75, scope=9) g10 = Gaussian(mean=0.10, stdev=1, scope=10) g11 = Gaussian(mean=0.11, stdev=1, scope=11) h0 = Histogram([0., 1., 2.], [0.1, 0.9], [1, 1], scope=12) h1 = Histogram([0., 1., 2.], [0.2, 0.8], [1, 1], scope=13) h2 = Histogram([0., 1., 2.], [0.3, 0.7], [1, 1], scope=14) h3 = Histogram([0., 1., 2.], [0.4, 0.6], [1, 1], scope=15) h4 = Histogram([0., 1., 2.], [0.5, 0.5], [1, 1], scope=16) h5 = Histogram([0., 1., 2.], [0.6, 0.4], [1, 1], scope=17) h6 = Histogram([0., 1., 2.], [0.7, 0.3], [1, 1], scope=18) h7 = Histogram([0., 1., 2.], [0.8, 0.2], [1, 1], scope=19) c0 = Categorical(p=[0.1, 0.1, 0.8], scope=20) c1 = Categorical(p=[0.2, 0.2, 0.6], scope=21) c2 = Categorical(p=[0.3, 0.3, 0.4], scope=22) c3 = Categorical(p=[0.4, 0.4, 0.2], scope=23) c4 = Categorical(p=[0.5, 0.4, 0.1], scope=24) c5 = Categorical(p=[0.6, 0.3, 0.1], scope=25) c6 = Categorical(p=[0.7, 0.2, 0.1], scope=26) c7 = Categorical(p=[0.8, 0.1, 0.1], scope=27) s0 = Sum(children=[g8, h4], weights=[0.5, 0.5]) s1 = Sum(children=[g9, h5], weights=[0.5, 0.5]) s2 = Sum(children=[g10, c6], weights=[0.5, 0.5]) s3 = Sum(children=[g11, h7], weights=[0.5, 0.5]) s4 = Sum(children=[s0, c4], weights=[0.5, 0.5]) s5 = Sum(children=[s1, c5], weights=[0.5, 0.5]) s6 = Sum(children=[s2, g6], weights=[0.5, 0.5]) s7 = Sum(children=[s3, c7], weights=[0.5, 0.5]) s8 = Sum(children=[s4, g4], weights=[0.5, 0.5]) s9 = Sum(children=[s5, g5], weights=[0.5, 0.5]) s10 = Sum(children=[s6, h6], weights=[0.5, 0.5]) s11 = Sum(children=[s7, g7], weights=[0.5, 0.5]) p0 = Product(children=[h0, s8]) p1 = Product(children=[c1, s9]) p2 = Product(children=[c2, s10]) p3 = Product(children=[g3, s11]) p4 = Product(children=[p0, g0]) p5 = Product(children=[p1, g1]) p6 = Product(children=[p2, h2]) p7 = Product(children=[p3, c3]) p8 = Product(children=[p4, c0]) p9 = Product(children=[p5, h1]) p10 = Product(children=[p6, g2]) p11 = Product(children=[p7, h3]) s12 = Sum(children=[p8, p9], weights=[0.5, 0.5]) s13 = Sum(children=[p10, p11], weights=[0.5, 0.5]) s14 = Sum(children=[s12, p2], weights=[0.5, 0.5]) s15 = Sum(children=[s13, s2], weights=[0.5, 0.5]) spn = Product(children=[s14, s15]) # Randomly sample input values from Gaussian (normal) distributions. num_samples = 100 inputs = np.column_stack(( # gaussian np.random.normal(loc=0.5, scale=1, size=num_samples), np.random.normal(loc=0.125, scale=0.25, size=num_samples), np.random.normal(loc=0.345, scale=0.24, size=num_samples), np.random.normal(loc=0.456, scale=0.1, size=num_samples), np.random.normal(loc=0.94, scale=0.48, size=num_samples), np.random.normal(loc=0.56, scale=0.42, size=num_samples), np.random.normal(loc=0.76, scale=0.14, size=num_samples), np.random.normal(loc=0.32, scale=0.58, size=num_samples), np.random.normal(loc=0.58, scale=0.219, size=num_samples), np.random.normal(loc=0.14, scale=0.52, size=num_samples), np.random.normal(loc=0.24, scale=0.42, size=num_samples), np.random.normal(loc=0.34, scale=0.1, size=num_samples), # histogram np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), np.random.randint(low=0, high=2, size=num_samples), # categorical np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples), np.random.randint(low=0, high=3, size=num_samples))).astype("float64") # Compute the reference results using the inference from SPFlow. reference = log_likelihood(spn, inputs) reference = reference.reshape(num_samples) # Compile the kernel with batch size 1 to enable SLP vectorization. compiler = CPUCompiler(vectorize=True, computeInLogSpace=True, vectorLibrary="LIBMVEC") kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False) # Execute the compiled Kernel. time_sum = 0 for i in range(len(reference)): # Check the computation results against the reference start = time.time() result = compiler.execute(kernel, inputs=np.array([inputs[i]])) time_sum = time_sum + time.time() - start print( f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}", end='\r') if not np.isclose(result, reference[i]): print( f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}" ) raise AssertionError() print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")
y = np.array(y).reshape(-1, ) z = np.random.choice([1, 2], int(1e4), replace=True, p=[0.4, 0.6]) df = pd.DataFrame(dict(zip(['X', 'Y', 'Z'], [x, y, z]))).astype(str) df, vd, pars = fn.transform_dataset(df) spn = spn_handler.load_or_create_spn(df, vd, pars, 'mini_example', 0.4, 0.5, nrows=None, seed=1, force_create=True, clustering='km_rule_clustering') spn = spn.children[1] manspn = ( 0.3 * (Categorical(p=[0.9, 0.1], scope=0) * Categorical(p=[0.55, 0.4, 0.05], scope=1)) + 0.7 * (Categorical(p=[0., 1.], scope=0) * Categorical(p=[0.1, 0.2, 0.7], scope=1)) ) \ * (Categorical(p=[0.4, 0.6], scope=2)) # plot leaves from example p = [[0.9, 0.1], [0.4, 0.55, 0.05], [0., 1.], [0.1, 0.2, 0.7], [0.4, 0.6]] y = 2 size = (2.88 * y, y) fig, axes = plt.subplots(1, 4, sharey=True, squeeze=True, figsize=size) for i, var in enumerate(['X', 'Y', 'X', 'Y']): currp = p[i] ax = axes[i] # if i in [1,2]: # d = df[var].value_counts(sort=False).divide(len(df)) # if i in [3,4]: ticks = list(range(len(currp)))
# # geometric geometric = Geometric(p=.025, scope=[0]) pdf_x, pdf_y = approximate_density(geometric, x_range) fig, ax = plt.subplots(1, 1) ax.plot(pdf_x, pdf_y, label="geometric") print('Geometric Mode:', geometric.mode) plt.axvline(x=geometric.mode, color='r') if show_plots: plt.show() # # categorical categorical = Categorical(p=[0.1, 0.05, 0.3, 0.05, 0.2, 0.2, 0.1], scope=[0]) pdf_x, pdf_y = approximate_density(categorical, np.arange(categorical.k)) fig, ax = plt.subplots(1, 1) ax.plot(pdf_x, pdf_y, label="categorical") print('Categorical Mode:', categorical.mode) plt.axvline(x=categorical.mode, color='r') if show_plots: plt.show() # # exponential exponential = Exponential(l=5, scope=[0]) pdf_x, pdf_y = approximate_density(exponential, x_range) fig, ax = plt.subplots(1, 1)
""" ================================= Domain Specific Language for SPNs ================================= We start by creating an SPN. Using a Domain-Specific Language (DSL), we can quickly create an SPN of categorical leave nodes like this: """ from spn.structure.leaves.parametric.Parametric import Categorical from spn.io.Graphics import draw_spn import matplotlib.pyplot as plt spn = 0.4 * ( Categorical(p=[0.2, 0.8], scope=0) * (0.3 * (Categorical(p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) + 0.7 * (Categorical(p=[0.5, 0.5], scope=1) * Categorical(p=[0.6, 0.4], scope=2))) ) + 0.6 * (Categorical(p=[0.2, 0.8], scope=0) * Categorical( p=[0.3, 0.7], scope=1) * Categorical(p=[0.4, 0.6], scope=2)) ax = draw_spn(spn)
from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.Base import Sum, Product from spn.structure.Base import assign_ids, rebuild_scopes_bottom_up p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(spn) rebuild_scopes_bottom_up(spn) import numpy as np test_data = np.array([1.0, 0.0, 1.0]).reshape(-1, 3) from spn.algorithms.Inference import log_likelihood ll = log_likelihood(spn, test_data) print(ll, np.exp(ll))
def test_entropy(self): # test if entropy is correct """ # explain how training data and the spn comes # number of RVs M = 3 # table of probabilities p1 = 0.6 p2 = 0.3 p31 = 0.1 p32 = 0.9 # generate x1 and x2 x1 = np.random.binomial(1, p1, size=N) + np.random.binomial(1, p1, size=N) x2 = np.random.binomial(1, p2, size=N) x3 = np.zeros(N) # generate x3 for i in range(N): if x2[i] == 1: x3[i] = np.random.binomial(1, p31, size=1) else: x3[i] = np.random.binomial(1, p32, size=1) # form a matrix, rows are instances and columns are RVs train_data = np.concatenate((x1, x2, x3)).reshape((M, N)).transpose() """ # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # real entropy p2 = 0.3 h_x2 = -p2 * np.log(p2) - (1 - p2) * np.log(1 - p2) self.assertAlmostEqual(h_x2, entropy(spn, ds_context, {1})) h_x2x3 = -(p2 * np.log(p2) + (1 - p2) * np.log(1 - p2) + 0.9 * np.log(0.9) + 0.1 * np.log(0.1)) self.assertAlmostEqual(h_x2x3, entropy(spn, ds_context, {1, 2})) h_x1 = -(0.16 * np.log(0.16) + 0.36 * np.log(0.36) + 0.48 * np.log(0.48)) self.assertAlmostEqual(h_x1, entropy(spn, ds_context, {0})) h_x2x1 = -(0.7 * np.log(0.7) + 0.3 * np.log(0.3)) + h_x1 self.assertAlmostEqual(h_x2x1, entropy(spn, ds_context, {1, 0})) h_x3x1 = -(0.66 * np.log(0.66) + 0.34 * np.log(0.34)) + h_x1 self.assertAlmostEqual(h_x3x1, entropy(spn, ds_context, {2, 0})) h_x2x3x1 = h_x1 + h_x2x3 self.assertAlmostEqual(h_x2x3x1, entropy(spn, ds_context, {1, 2, 0})) # test symmetry self.assertAlmostEqual(entropy(spn, ds_context, {0, 2}), entropy(spn, ds_context, {2, 0})) self.assertAlmostEqual(entropy(spn, ds_context, {1, 2}), entropy(spn, ds_context, {2, 1}))
def _deserialize_categorical(self, node, node_map): probabilities = node.categorical.probabilities cat = Categorical(p=probabilities, scope=node.categorical.scope) cat.id = node.id return cat
node = Gaussian(np.inf, np.inf) data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) update_parametric_parameters_mle(node, data) assert np.isclose(node.mean, np.mean(data)) assert np.isclose(node.stdev, np.std(data)) node = Gamma(np.inf, np.inf) data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) update_parametric_parameters_mle(node, data) assert np.isclose(node.alpha / node.beta, np.mean(data)), node.alpha node = LogNormal(np.inf, np.inf) data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) update_parametric_parameters_mle(node, data) assert np.isclose(node.mean, np.log(data).mean(), atol=0.00001) assert np.isclose(node.stdev, np.log(data).std(), atol=0.00001) node = Poisson(np.inf) data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1) update_parametric_parameters_mle(node, data) assert np.isclose(node.mean, np.mean(data)) node = Categorical(np.array([1, 1, 1, 1, 1, 1]) / 6) data = np.array([0, 0, 1, 3, 5]).reshape(-1, 1) update_parametric_parameters_mle(node, data) assert np.isclose(node.p[0], 2 / 5) assert np.isclose(node.p[1], 1 / 5) assert np.isclose(node.p[2], 0) assert np.isclose(node.p[3], 1 / 5) assert np.isclose(node.p[4], 0)
from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.leaves.parametric.SamplingRange import sample_categorical_node from spn.structure.leaves.parametric.InferenceRange import categorical_likelihood_range from spn.structure.Base import Context from spn.structure.StatisticalTypes import MetaType from spn.experiments.AQP.Ranges import NominalRange, NumericRange from spn.algorithms import SamplingRange rand_gen = np.random.RandomState(100) #Create SPN node1 = Categorical(p=[0.9, 0.1], scope=[0]) node2 = Categorical(p=[0.1, 0.9], scope=[0]) x = [0., 1., 2., 3., 4.] y = [0., 10., 0., 0., 0.] x, y = np.array(x), np.array(y) auc = np.trapz(y, x) y = y / auc node3 = PiecewiseLinear(x_range=x, y_range=y, bin_repr_points=x[1:-1], scope=[1]) x = [0., 1., 2., 3., 4.] y = [0., 0., 0., 10., 0.] x, y = np.array(x), np.array(y) auc = np.trapz(y, x) y = y / auc
pass if __name__ == '__main__': from spn.structure.Base import Sum, Product, Leaf from spn.structure.leaves.parametric.Parametric import Categorical spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) spn3 = 0.3 * spn1 + 0.7 * spn2 spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3 spn6 = Product([Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3])]) spn6.scope = [1,2,3] spn7 = 0.8 * spn4 + 0.2 * spn6 spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0]) #spn_util.plot_spn(spn, "rule_spn.pdf") extract_rules(spn)