def create_spflow_spn(n_feats, ctype=Gaussian): children1 = [] children2 = [] for i in range(n_feats): if ctype == Gaussian: c1 = Gaussian(np.random.randn(), np.random.rand(), scope=i) c2 = Gaussian(np.random.randn(), np.random.rand(), scope=i) else: #c1 = Bernoulli(p=1.0, scope=i) #c2 = Bernoulli(p=1.0, scope=i) c1 = Bernoulli(p=np.random.rand(), scope=i) c2 = Bernoulli(p=np.random.rand(), scope=i) children1.append(c1) children2.append(c2) prods1 = [] prods2 = [] for i in range(0, n_feats, 2): p1 = Product([children1[i], children1[i + 1]]) p2 = Product([children2[i], children2[i + 1]]) prods1.append(p1) prods2.append(p2) sums = [] for i in range(n_feats // 2): s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]]) sums.append(s) spflow_spn = Product(sums) assign_ids(spflow_spn) rebuild_scopes_bottom_up(spflow_spn) return spflow_spn
def create_spflow_spn(n_feats): gaussians1 = [] gaussians2 = [] for i in range(n_feats): g1 = Gaussian(np.random.randn(), np.random.rand(), scope=i) g2 = Gaussian(np.random.randn(), np.random.rand(), scope=i) gaussians1.append(g1) gaussians2.append(g2) prods1 = [] prods2 = [] for i in range(0, n_feats, 2): p1 = Product([gaussians1[i], gaussians1[i + 1]]) p2 = Product([gaussians2[i], gaussians2[i + 1]]) prods1.append(p1) prods2.append(p2) sums = [] for i in range(n_feats // 2): s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]]) sums.append(s) spflow_spn = Product(sums) assign_ids(spflow_spn) rebuild_scopes_bottom_up(spflow_spn) return spflow_spn
def marginalize(node, scope): assert isinstance(scope, set), "scope must be a set" def marg_recursive(node): node_scope = set(node.scope) if node_scope.issubset(scope): return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception('Leaf Node with |scope| > 1') return node newNode = node.__class__() #a sum node gets copied with all its children, or gets removed completely if isinstance(node, Sum): newNode.weights.extend(node.weights) for i, c in enumerate(node.children): newChildren = marg_recursive(c) if newChildren is None: continue newNode.children.append(newChildren) return newNode newNode = marg_recursive(node) rebuild_scopes_bottom_up(newNode) newNode = prune(newNode) assert is_valid(newNode) assign_ids(node) return newNode
def create_SPN2(): from spn.structure.Base import assign_ids from spn.structure.Base import rebuild_scopes_bottom_up from spn.algorithms.Validity import is_valid from spn.structure.leaves.parametric.Parametric import Categorical from spn.structure.Base import Sum, Product p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(spn) rebuild_scopes_bottom_up(spn) val, msg = is_valid(spn) assert val, msg return spn
def create_disj(data, scope, assignments, alpha): unq_data, counts = np.unique(data, axis=0, return_counts=True) probs = np.zeros(assignments.shape[0]) for i in range(assignments.shape[0]): index = np.where(np.all(assignments[i] == unq_data, axis=1))[0] if len(index): probs[i] = counts[index[0]] probs = (probs + alpha) / (probs + alpha).sum() indicators = { var: [Bernoulli(scope=[var], p=0), Bernoulli(scope=[var], p=1)] for var in scope } prods = [] for i in range(assignments.shape[0]): children = [] for j in range(assignments.shape[1]): children.append(indicators[scope[j]][assignments[i, j]]) # children.append(Bernoulli(scope=[scope[j]], p=assignments[i, j])) prods.append(Product(children=children)) if len(prods) > 1: disj = Sum(children=prods, weights=probs) else: disj = prods[0] assign_ids(disj) rebuild_scopes_bottom_up(disj) return disj
def test_spn_to_str_and_back(self): self.check_obj_and_reconstruction( Categorical(p=[0.1, 0.2, 0.7], scope=0)) self.check_obj_and_reconstruction(Gaussian(mean=0, stdev=10, scope=0)) self.check_obj_and_reconstruction( Gaussian(mean=1.2, stdev=1.5, scope=0)) self.check_obj_and_reconstruction(Gaussian(mean=-1.2, stdev=1, scope=0)) gamma = Gamma(alpha=1, beta=2, scope=0) lnorm = LogNormal(mean=1, stdev=2, scope=0) self.check_obj_and_reconstruction(gamma) self.check_obj_and_reconstruction(lnorm) root = Sum(children=[gamma, lnorm], weights=[0.2, 0.8]) assign_ids(root) rebuild_scopes_bottom_up(root) self.check_obj_and_reconstruction(root) root = 0.3 * (Gaussian(mean=0, stdev=1, scope=0) * Gaussian( mean=1, stdev=1, scope=1)) + 0.7 * (Gaussian( mean=2, stdev=1, scope=0) * Gaussian(mean=3, stdev=1, scope=1)) self.check_obj_and_reconstruction(root)
def test_ll_matrix(self): add_node_likelihood(Leaf, sum_and_multiplier_ll) node_1_1_1_1 = leaf(2, 1) node_1_1_1_2 = leaf(2, 2) node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2 node_1_1_2 = leaf([0, 1], 3) node_1_1 = node_1_1_1 * node_1_1_2 node_1_2_1_1_1 = leaf(0, 5) node_1_2_1_1_2 = leaf(1, 4) node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2 node_1_2_1_2 = leaf([0, 1], 6) node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2 node_1_2_2 = leaf(2, 3) node_1_2 = node_1_2_1 * node_1_2_2 spn = 0.4 * node_1_1 + 0.6 * node_1_2 assign_ids(spn) max_id = max([n.id for n in get_nodes_by_type(spn)]) data = np.random.rand(10, 10) node_1_1_1_1_r = data[:, 2] * 1 node_1_1_1_2_r = data[:, 2] * 2 node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r node_1_1_2_r = 3 * (data[:, 0] + data[:, 1]) node_1_1_r = node_1_1_1_r * node_1_1_2_r node_1_2_1_1_1_r = data[:, 0] * 5 node_1_2_1_1_2_r = data[:, 1] * 4 node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1]) node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r node_1_2_2_r = data[:, 2] * 3 node_1_2_r = node_1_2_1_r * node_1_2_2_r spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r self.assert_correct(spn, data, spn_r) lls = np.zeros((data.shape[0], max_id + 1)) likelihood(spn, data, lls_matrix=lls) llls = np.zeros((data.shape[0], max_id + 1)) log_likelihood(spn, data, lls_matrix=llls) self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls)))) self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id]))) self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
def marginalize(node, keep, light=False): # keep must be a set of features that you want to keep # Loc.enter() keep = set(keep) # Loc.p('keep:', keep) def marg_recursive(node): # Loc.enter() new_node_scope = keep.intersection(set(node.scope)) # Loc.p("new_node_scope:", new_node_scope) if len(new_node_scope) == 0: # we are summing out this node # Loc.leave(None) return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception("Leaf Node with |scope| > 1") # Loc.leave('Leaf.deepcopy()') if light: return node return copy.deepcopy(node) newNode = node.__class__() newNode.cardinality = node.cardinality if isinstance(node, Sum): newNode.weights.extend(node.weights) if not light: newNode.cluster_centers.extend(node.cluster_centers) if isinstance(node, Product): if hasattr(node, 'binary_bloom_filters'): newNode.binary_bloom_filters = node.binary_bloom_filters for c in node.children: new_c = marg_recursive(c) if new_c is None: continue newNode.children.append(new_c) newNode.scope.extend(new_node_scope) # Loc.leave() return newNode newNode = marg_recursive(node) if not light: assign_ids(newNode) newNode = Prune(newNode, light=light) valid, err = is_valid(newNode, light=light) assert valid, err # Loc.leave() return newNode
def test_sum(self): spn = Product() for s in range(7): spn.children.append(Leaf(scope=s)) assign_ids(spn) rebuild_scopes_bottom_up(spn) new_spn = SPN_Reshape(spn, 2) print(spn)
def generate_code(spn_id, spn, meta_types, floating_data_type): """ Generates inference code for an SPN :param target_path: the path the generated C++ code is written to :param floating_data_type: data type floating numbers are represented in generated C++ code :param spn: root node of an SPN :return: code string """ # make sure we have ids assign_ids(spn) # fill method body according to SPN structure method_body = generate_method_body(spn, spn, floating_data_type, 0) # build parameters used in generated c++ function method_params = [] passed_params = [] for i, type in enumerate(meta_types): if type == MetaType.DISCRETE: method_params += [ f'vector <int> possibleValues{i}', f'int nullValueIdx{i}' ] passed_params += [ f'py::arg("possibleValues{i}")', f'py::arg("nullValueIdx{i}")' ] elif type == MetaType.REAL: method_params += [ f'bool inverse{i}', f'bool leftMinusInf{i}', f'float leftCondition{i}', f'bool rightMinusInf{i}', f'float rightCondition{i}', f'bool leftIncluded{i}', f'bool rightIncluded{i}', f'float nullValue{i}' ] passed_params += [ f'py::arg("inverse{i}")', f'py::arg("leftMinusInf{i}")', f'py::arg("leftCondition{i}")', f'py::arg("rightMinusInf{i}")', f'py::arg("rightCondition{i}")', f'py::arg("leftIncluded{i}")', f'py::arg("rightIncluded{i}")', f'py::arg("nullValue{i}")' ] value_dictionary = { 'spn_id': spn_id, 'method_body': method_body, 'method_params': ', '.join(method_params), 'node_count': get_number_of_nodes(spn), 'passed_params': ', '.join(passed_params), 'floating_data_type': floating_data_type } generated_method = replace_template(TemplatePath.METHOD_MASTER, value_dictionary, 0) registrate_method = replace_template(TemplatePath.REGISTRATION_MASTER, value_dictionary, 0) return generated_method, registrate_method
def create_conj(data, scope, alpha): conj = Product(children=[ Bernoulli(scope=[scope[k]], p=(data[0][k] * data.shape[0] + alpha) / (data.shape[0] + 2 * alpha)) for k in range(len(scope)) ]) assign_ids(conj) rebuild_scopes_bottom_up(conj) return conj
def SPN_Reshape(node, max_children=2): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum)) while len(nodes) > 0: n = nodes.pop() if len(n.children) <= max_children: continue # node has more than 2 nodes, create binary hierarchy new_children = [] new_weights = [] for i in range(0, len(n.children), max_children): children = n.children[i:i + max_children] if len(children) > 1: if isinstance(n, Product): newChild = Product() for c in children: newChild.scope.extend(c.scope) newChild.children.extend(children) new_children.append(newChild) else: # Sum weights = n.weights[i:i + max_children] branch_weight = sum(weights) new_weights.append(branch_weight) newChild = Sum() newChild.scope.extend(children[0].scope) newChild.children.extend(children) newChild.weights.extend( [w / branch_weight for w in weights]) newChild.weights[0] = 1.0 - sum(newChild.weights[1:]) new_children.append(newChild) else: new_children.extend(children) if isinstance(n, Sum): new_weights.append(1.0 - sum(new_weights)) n.children = new_children if isinstance(n, Sum): n.weights = new_weights nodes.append(n) assign_ids(node) v, err = is_valid(node) assert v, err return node
def learn_classifier(data, ds_context, spn_learn_wrapper, label_idx, **kwargs): spn = Sum() for label, count in zip(*np.unique(data[:, label_idx], return_counts=True)): branch = spn_learn_wrapper(data[data[:, label_idx] == label, :], ds_context, **kwargs) spn.children.append(branch) spn.weights.append(count / data.shape[0]) spn.scope.extend(branch.scope) assign_ids(spn) valid, err = is_valid(spn) assert valid, "invalid spn: " + err return spn
def condition(spn, evidence): scope = set( [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]]) node_conditions = { type(leaf): leaf_condition for leaf in get_nodes_by_type(spn, Leaf) } node_conditions.update({Sum: sum_condition, Product: prod_condition}) new_root, val = eval_spn_bottom_up(spn, node_conditions, input_vals=evidence, scope=scope) assign_ids(new_root) return Prune(new_root)
def create_naive_fact(data, scope, alpha): """ It returns a naive factorization of the data. Laplace's correction is not needed, but if not used may cause underflow. """ probs = (np.sum(data, axis=0) + alpha) / (data.shape[0] + 2 * alpha) naive_fact = Product(children=[ Bernoulli(p=probs[k], scope=[scope[k]]) for k in range(len(scope)) ]) assign_ids(naive_fact) rebuild_scopes_bottom_up(naive_fact) return naive_fact
def test_correct_parameters(self): node_1_2_2 = Leaf(0) node_1_2_1 = Leaf(1) node_1_1 = Leaf([0, 1]) node_1_2 = node_1_2_1 * node_1_2_2 spn = 0.1 * node_1_1 + 0.9 * node_1_2 node_1_2.id = 0 rand_gen = RandomState(1234) with self.assertRaises(AssertionError): mpe(spn, rand_gen.rand(10, 3)) assign_ids(spn) node_1_2_2.id += 1 with self.assertRaises(AssertionError): mpe(spn, rand_gen.rand(10, 3))
def test_torch_vs_tf_time(self): # Create sample data from sklearn.datasets.samples_generator import make_blobs import tensorflow as tf from time import time X, y = make_blobs(n_samples=10, centers=3, n_features=2, random_state=0) X = X.astype(np.float32) # SPFLow implementation g00 = Gaussian(mean=0.0, stdev=1.0, scope=0) g10 = Gaussian(mean=1.0, stdev=2.0, scope=1) g01 = Gaussian(mean=3.0, stdev=2.0, scope=0) g11 = Gaussian(mean=5.0, stdev=1.0, scope=1) p0 = Product(children=[g00, g10]) p1 = Product(children=[g01, g11]) s = Sum(weights=[0.2, 0.8], children=[p0, p1]) assign_ids(s) rebuild_scopes_bottom_up(s) # Convert tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X) torch_spn = SumNode.from_spn(s) # Optimizer lr = 0.001 tf_optim = tf.train.AdamOptimizer(lr) torch_optim = optim.Adam(torch_spn.parameters(), lr) t0 = time() epochs = 10 optimize_tf_graph(tf_spn, variable_dict, data_placeholder, X, epochs=epochs, optimizer=tf_optim) t1 = time() optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim) t2 = time() print("Tensorflow took: ", t1 - t0) print("PyTorch took: ", t2 - t1)
def Prune(node): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum, Max)) while len(nodes) > 0: n = nodes.pop() n_type = type(n) is_sum = n_type == Sum i = 0 while i < len(n.children): c = n.children[i] # if my children has only one node, we can get rid of it # and link directly to that grandchildren if not (isinstance(c, Leaf) or isinstance(c, Max)) and \ len(c.children) == 1: n.children[i] = c.children[0] continue if n_type == type(c): del n.children[i] n.children.extend(c.children) if is_sum: w = n.weights[i] del n.weights[i] n.weights.extend([cw * w for cw in c.weights]) continue i += 1 if is_sum and i > 0: n.weights[0] = 1.0 - sum(n.weights[1:]) if isinstance(node, (Product, Sum)) and len(node.children) == 1: node = node.children[0] assign_ids(node) v, err = is_valid(node) assert v, err return node
def Prune(node, contract_single_parents=True, ds_context=None): v, err = is_valid(node) assert v, err nodes = get_nodes_by_type(node, (Product, Sum)) while len(nodes) > 0: n = nodes.pop() n_type = type(n) is_sum = n_type == Sum i = 0 while i < len(n.children): c = n.children[i] # if my children has only one node, we can get rid of it and link directly to that grandchildren if contract_single_parents and not isinstance(c, Leaf) and len( c.children) == 1: n.children[i] = c.children[0] continue if n_type == type(c): del n.children[i] n.children.extend(c.children) if is_sum: w = n.weights[i] del n.weights[i] # #merge rules # n.rule = n.rule.merge(c.rule, ds_context) n.weights.extend([cw * w for cw in c.weights]) continue i += 1 if is_sum and i > 0: n.weights[0] = 1.0 - sum(n.weights[1:]) if contract_single_parents and isinstance(node, (Product, Sum)) and len( node.children) == 1: node = node.children[0] assign_ids(node) v, err = is_valid(node) assert v, err return node
def Compress(node): all_parents = get_parents(node) cache = {} for n in get_topological_order(node): params = (n.parameters, tuple(sorted(n.scope))) cached_node = cache.get(params, None) if cached_node is None: cache[params] = n else: for parent, pos in all_parents[n]: parent.children[pos] = cached_node assign_ids(node) val, msg = is_valid(node) assert val, msg return node
def complete_layers(layer_nodes, current_node_type=Sum, depth=None): # all leaves should be at same depth root_layer = False if depth is None: root_layer = True depth = get_depth(layer_nodes[0]) if depth == 2: return children_layer = [] if current_node_type == Sum: for i in range(len(layer_nodes)): n = layer_nodes[i] assert isinstance(n, Sum) for j in range(len(n.children)): c = n.children[j] if not isinstance(c, Product): n.children[j] = Product([c]) children_layer.extend(n.children) children_layer_type = Product elif current_node_type == Product: for i in range(len(layer_nodes)): n = layer_nodes[i] assert isinstance(n, Product) for j in range(len(n.children)): c = n.children[j] if not isinstance(c, Sum): n.children[j] = Sum([1.0], [c]) children_layer.extend(n.children) children_layer_type = Sum else: raise Exception('node type' + str(current_node_type)) complete_layers(children_layer, current_node_type=children_layer_type, depth=depth - 1) if root_layer: rebuild_scopes_bottom_up(layer_nodes[0]) assign_ids(layer_nodes[0])
def __init__(self): p0 = Product(children=[ Categorical(p=[0.3, 0.7], scope=1), Categorical(p=[0.4, 0.6], scope=2) ]) p1 = Product(children=[ Categorical(p=[0.5, 0.5], scope=1), Categorical(p=[0.6, 0.4], scope=2) ]) s1 = Sum(weights=[0.3, 0.7], children=[p0, p1]) p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1]) p3 = Product(children=[ Categorical(p=[0.2, 0.8], scope=0), Categorical(p=[0.3, 0.7], scope=1) ]) p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)]) self.spn = Sum(weights=[0.4, 0.6], children=[p2, p4]) assign_ids(self.spn) rebuild_scopes_bottom_up(self.spn)
def marginalize(node, keep): #keep must be a set of features that you want to keep keep = set(keep) def marg_recursive(node): new_node_scope = keep.intersection(set(node.scope)) if len(new_node_scope) == 0: # we are summing out this node return None if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception('Leaf Node with |scope| > 1') return deepcopy(node) newNode = node.__class__() if isinstance(node, Sum): newNode.weights.extend(node.weights) for c in node.children: new_c = marg_recursive(c) if new_c is None: continue newNode.children.append(new_c) newNode.scope.extend(new_node_scope) return newNode newNode = marg_recursive(node) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return newNode
def test_equal_to_tf(self): # SPFLow implementation g00 = Gaussian(mean=0.0, stdev=1.0, scope=0) g10 = Gaussian(mean=1.0, stdev=2.0, scope=1) g01 = Gaussian(mean=3.0, stdev=2.0, scope=0) g11 = Gaussian(mean=5.0, stdev=1.0, scope=1) p0 = Product(children=[g00, g10]) p1 = Product(children=[g01, g11]) s = Sum(weights=[0.2, 0.8], children=[p0, p1]) assign_ids(s) rebuild_scopes_bottom_up(s) # Test for 100 random samples data = np.random.randn(100, 2) # LL from SPN ll = log_likelihood(s, data) # PyTorch implementation g00 = GaussianNode(mean=0.0, std=1.0, scope=0) g10 = GaussianNode(mean=1.0, std=2.0, scope=1) g01 = GaussianNode(mean=3.0, std=2.0, scope=0) g11 = GaussianNode(mean=5.0, std=1.0, scope=1) p0 = ProductNode(children=[g00, g10]) p1 = ProductNode(children=[g01, g11]) rootnode = SumNode(weights=[0.2, 0.8], children=[p0, p1]) datatensor = torch.Tensor(data) # LL from pytorch ll_torch = rootnode(datatensor) # Assert equality self.assertTrue( np.isclose(np.array(ll).squeeze(), ll_torch.detach().numpy(), atol=DELTA).all())
def _serialize_model(self, model): msg = spflow_capnp.Model.new_message() assert is_valid(model.root), "SPN invalid before serialization" # Assign (new) IDs to the nodes # Keep track of already assigned IDs, so the IDs are # unique for the whole file. assign_ids(model.root, self.assignedIDs) # Rebuild scopes bottom-up rebuild_scopes_bottom_up(model.root) msg.rootNode = model.root.id msg.numFeatures = len(model.root.scope) msg.featureType = model.featureType scope = msg.init("scope", len(model.root.scope)) for i,v in enumerate(model.root.scope): scope[i] = self._unwrap_value(v) name = "" if model.name is not None: name = model.name msg.name = name numNodes = get_number_of_nodes(model.root) nodes = msg.init("nodes", numNodes) nodeList = ListHandler(nodes) self._serialize_graph([model.root], nodeList) return msg
def Prune(node, check_cluster_centers=False): """ Prunes spn. Ensures that nodes have at least one child and that types of node and children differ. Adapts weigths and optionally bloom filters accordingly. :param node: :return: """ # v, err = is_valid(node) # assert v, err nodes = get_nodes_by_type(node, (Product, Sum)) while len(nodes) > 0: n = nodes.pop() n_type = type(n) is_sum = n_type == Sum i = 0 while i < len(n.children): c = n.children[i] # if my child has only one node, we can get rid of it and link directly to that grandchildren # in this case, no bloom filters can be lost since we do not split if not isinstance(c, Leaf) and len(c.children) == 1: n.children[i] = c.children[0] continue # if the type is similar to the type of the child if n_type == type(c): if is_sum: # cluster centers learned? if len(n.cluster_centers) > 0: old_len = len(n.cluster_centers) len_child_cluster = len(c.cluster_centers) del n.cluster_centers[i] n.cluster_centers.extend(c.cluster_centers) if check_cluster_centers: assert old_len - 1 + len_child_cluster == len( n.cluster_centers ), "cluster_center length mismatch, node " + n + c del n.children[i] n.children.extend(c.children) if is_sum: w = n.weights[i] del n.weights[i] n.weights.extend([cw * w for cw in c.weights]) continue i += 1 if is_sum and i > 0: n.weights[0] = 1.0 - sum(n.weights[1:]) if isinstance(node, (Product, Sum)) and len(node.children) == 1: node = node.children[0] assign_ids(node) v, err = is_valid(node, check_cluster_centers=check_cluster_centers) assert v, err return node
def learn_structure( dataset, ds_context, split_rows, split_cols, create_leaf, next_operation=get_next_operation(), initial_scope=None, data_slicer=default_slicer, ): assert dataset is not None assert ds_context is not None assert split_rows is not None assert split_cols is not None assert create_leaf is not None assert next_operation is not None root = Product() root.children.append(None) if initial_scope is None: initial_scope = list(range(dataset.shape[1])) num_conditional_cols = None elif len(initial_scope) < dataset.shape[1]: num_conditional_cols = dataset.shape[1] - len(initial_scope) else: num_conditional_cols = None assert len(initial_scope) > dataset.shape[ 1], "check initial scope: %s" % initial_scope tasks = deque() tasks.append((dataset, root, 0, initial_scope, False, False)) while tasks: local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft( ) operation, op_params = next_operation( local_data, scope, create_leaf, no_clusters=no_clusters, no_independencies=no_independencies, is_first=(parent is root), ) logging.debug("OP: {} on slice {} (remaining tasks {})".format( operation, local_data.shape, len(tasks))) if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES: node = Product() node.scope.extend(scope) parent.children[children_pos] = node rest_scope = set(range(len(scope))) for col in op_params: rest_scope.remove(col) node.children.append(None) tasks.append(( data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True, )) next_final = False if len(rest_scope) == 0: continue elif len(rest_scope) == 1: next_final = True node.children.append(None) c_pos = len(node.children) - 1 rest_cols = list(rest_scope) rest_scope = [scope[col] for col in rest_scope] tasks.append(( data_slicer(local_data, rest_cols, num_conditional_cols), node, c_pos, rest_scope, next_final, next_final, )) continue elif operation == Operation.SPLIT_ROWS: split_start_t = perf_counter() data_slices = split_rows(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, True, False)) continue node = Sum() node.scope.extend(scope) parent.children[children_pos] = node # assert parent.scope == node.scope for data_slice, scope_slice, proportion in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) node.weights.append(proportion) tasks.append((data_slice, node, len(node.children) - 1, scope, False, False)) continue elif operation == Operation.SPLIT_COLUMNS: split_start_t = perf_counter() data_slices = split_cols(local_data, ds_context, scope) split_end_t = perf_counter() logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format( len(data_slices), split_end_t - split_start_t)) if len(data_slices) == 1: tasks.append( (local_data, parent, children_pos, scope, False, True)) assert np.shape(data_slices[0][0]) == np.shape(local_data) assert data_slices[0][1] == scope continue node = Product() node.scope.extend(scope) parent.children[children_pos] = node for data_slice, scope_slice, _ in data_slices: assert isinstance(scope_slice, list), "slice must be a list" node.children.append(None) tasks.append((data_slice, node, len(node.children) - 1, scope_slice, False, False)) continue elif operation == Operation.NAIVE_FACTORIZATION: node = Product() node.scope.extend(scope) parent.children[children_pos] = node local_tasks = [] local_children_params = [] split_start_t = perf_counter() for col in range(len(scope)): node.children.append(None) # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True)) local_tasks.append(len(node.children) - 1) child_data_slice = data_slicer(local_data, [col], num_conditional_cols) local_children_params.append( (child_data_slice, ds_context, [scope[col]])) result_nodes = pool.starmap(create_leaf, local_children_params) # result_nodes = [] # for l in tqdm(local_children_params): # result_nodes.append(create_leaf(*l)) # result_nodes = [create_leaf(*l) for l in local_children_params] for child_pos, child in zip(local_tasks, result_nodes): node.children[child_pos] = child split_end_t = perf_counter() logging.debug( "\t\tnaive factorization {} columns (in {:.5f} secs)".format( len(scope), split_end_t - split_start_t)) continue elif operation == Operation.CREATE_LEAF: leaf_start_t = perf_counter() node = create_leaf(local_data, ds_context, scope) parent.children[children_pos] = node leaf_end_t = perf_counter() logging.debug( "\t\t created leaf {} for scope={} (in {:.5f} secs)".format( node.__class__.__name__, scope, leaf_end_t - leaf_start_t)) else: raise Exception("Invalid operation: " + operation) node = root.children[0] assign_ids(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err node = Prune(node) valid, err = is_valid(node) assert valid, "invalid spn: " + err return node
def spn_for_evidence(spn, evidence_ranges, node_likelihood=None, distribution_update_ranges=None): from spn.structure.Base import Sum, Product, Leaf, assign_ids from spn.algorithms.TransformStructure import Prune from spn.algorithms.Validity import is_valid from copy import deepcopy def spn_for_evidence_recursive(node): if isinstance(node, Leaf): if len(node.scope) > 1: raise Exception("Leaf Node with |scope| > 1") if evidence_ranges[node.scope[0]] is not None: t_node = type(node) if t_node in node_likelihood: ranges = np.array([evidence_ranges]) prob = node_likelihood[t_node]( node, ranges, node_likelihood=node_likelihood)[0][0] if prob == 0: newNode = deepcopy(node) else: newNode = deepcopy(node) distribution_update_ranges[t_node]( newNode, evidence_ranges[node.scope[0]]) else: raise Exception( 'No log-likelihood method specified for node type: ' + str(type(node))) else: prob = 1 newNode = deepcopy(node) return prob, newNode newNode = node.__class__() newNode.scope = node.scope if isinstance(node, Sum): new_weights = [] new_childs = [] for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob = prob * node.weights[i] if new_prob > 0: new_weights.append(new_prob) new_childs.append(new_child) new_weights = np.array(new_weights) newNode.weights = new_weights / np.sum(new_weights) newNode.children = new_childs return np.sum(new_weights), newNode elif isinstance(node, Product): new_childs = [] new_prob = 1. for i, c in enumerate(node.children): prob, new_child = spn_for_evidence_recursive(c) new_prob *= prob new_childs.append(new_child) newNode.children = new_childs return new_prob, newNode prob, newNode = spn_for_evidence_recursive(spn) assign_ids(newNode) newNode = Prune(newNode) valid, err = is_valid(newNode) assert valid, err return prob, newNode
def train_spn(window_size=3, min_instances_slice=10000, features=None, number_of_classes=3): if features is None: features = [20, 120] add_parametric_inference_support() add_parametric_text_support() data = get_data_in_window(window_size=window_size, features=features, three_classes=number_of_classes == 3) sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2, train_size=0.8, random_state=42) for train_index, test_index in sss.split( data[:, 0:window_size * window_size * len(features)], data[:, (window_size * window_size * len(features)) + (int(window_size * window_size / 2))]): X_train, X_test = data[train_index], data[test_index] context_list = list() parametric_list = list() number_of_features = len(features) for _ in range(number_of_features * window_size * window_size): context_list.append(MetaType.REAL) parametric_list.append(Gaussian) for _ in range(window_size * window_size): context_list.append(MetaType.DISCRETE) parametric_list.append(Categorical) ds_context = Context(meta_types=context_list) ds_context.add_domains(data) ds_context.parametric_types = parametric_list spn = load_spn(window_size, features, min_instances_slice, number_of_classes) if spn is None: spn = Sum() for class_pixel in tqdm(range(-window_size * window_size, 0)): for label, count in zip( *np.unique(data[:, class_pixel], return_counts=True)): train_data = X_train[X_train[:, class_pixel] == label, :] branch = learn_parametric( train_data, ds_context, min_instances_slice=min_instances_slice) spn.children.append(branch) spn.weights.append(train_data.shape[0]) spn.scope.extend(branch.scope) spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist() assign_ids(spn) save_spn(spn, window_size, features, min_instances_slice, number_of_classes) res = np.ndarray((X_test.shape[0], number_of_classes)) for i in tqdm(range(number_of_classes)): tmp = X_test.copy() tmp[:, -int((window_size**2) / 2)] = i res[:, i] = log_likelihood(spn, tmp)[:, 0] predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1)) correct_predicted = 0 for x, y in zip(X_test[:, -5], predicted_classes): if x == y[0]: correct_predicted += 1 accuracy = correct_predicted / X_test.shape[0] return spn, accuracy
scope=[0], init_weights=b_lf_1_init_weights) b_lf_2_init_weights = {Gaussian: 0.3, Gamma: 0.7} # b_lf_2_init_weights = np.array([.3, .7]) b_fat_right_leaf_2, _priors = type_mixture_leaf_factory( leaf_type='pm', leaf_meta_type=MetaType.REAL, type_to_param_map=pm_continuous_param_map, scope=[1], init_weights=b_lf_2_init_weights) l_r_prod.children = [b_fat_right_leaf_1, b_fat_right_leaf_2] # # composing rebuild_scopes_bottom_up(root) assign_ids(root) print(root) print(spn_to_str_equation(root)) global_W = compute_global_type_weights(root) print('GLOBAL_W', global_W) global_W = compute_global_type_weights(root, aggr_type=True) print('GLOBAL_W', global_W) gw_map = compute_leaf_global_mix_weights(root) print('G MIX W', gw_map) part_map = compute_partition_id_map(root) print('PARTITION MAP', part_map)