def test_categorical_input_layer(): print('categorical input layer') # I could loop through alpha as well alpha = 0.1 for var_id1 in range(len(vars)): for var_id2 in range(len(vars)): for var_val1 in range(vars[var_id1]): print('varid1, varid2, varval1', var_id1, var_id2, var_val1) # var_id1 = 0 # var_val1 = 0 node1 = CategoricalIndicatorNode(var_id1, var_val1) # var_id2 = 0 var_vals2 = vars[var_id2] node2 = CategoricalSmoothedNode( var_id2, var_vals2, alpha, freqs[var_id2]) # creating the generic input layer input_layer = CategoricalInputLayer([node1, node2]) # evaluating according to an observation input_layer.eval(obs) layer_evals = input_layer.node_values() print('layer eval nodes') print(layer_evals) # computing evaluation by hand val1 = 1 if var_val1 == obs[var_id1] or obs[ var_id1] == MARG_IND else 0 logval1 = log(val1) if val1 == 1 else LOG_ZERO logval2 = compute_smoothed_ll( obs[var_id2], freqs[var_id2], vars[var_id2], alpha) logvals = [logval1, logval2] print('log vals') print(logvals) for logval, eval in zip(logvals, layer_evals): if logval == LOG_ZERO: # for zero log check this way for correctness assert IS_LOG_ZERO(eval) is True else: assert_almost_equal(logval, eval, PRECISION)
def test_categorical_smoothed_node_data_smooth(): data_1 = numpy.array([[1], [0], [1], [0], [1]]) data_2 = numpy.array([[1, 0], [0, 1], [1, 1], [0, 1], [1, 0]]) alpha = 0 freqs = CategoricalSmoothedNode.smooth_freq_from_data(data_1, alpha) print('freqs', freqs) exp_freqs = CategoricalSmoothedNode.smooth_ll([2 / 5, 3 / 5], alpha) print('exp freqs', exp_freqs) assert_array_almost_equal(exp_freqs, freqs) # now create a node input_node = CategoricalSmoothedNode(var=0, var_values=2, instances={0, 2, 4}) input_node.smooth_probs(alpha, data=data_1) exp_probs = CategoricalSmoothedNode.smooth_ll([0, 1], alpha) print('exp probs', exp_probs) print('probs', input_node._var_probs) assert_log_array_almost_equal(exp_probs, input_node._var_probs) input_node.smooth_probs(alpha, data=data_2) assert_log_array_almost_equal(exp_probs, input_node._var_probs)
def test_categorical_smoothed_node_resmooth(): for i, var in enumerate(vars): alpha = alphas[0] var_freq = freqs[i] smo = CategoricalSmoothedNode(i, var, alpha, var_freq) smo.eval(obs[i]) print('smo values') print(smo.log_val) # checking the right value ll = compute_smoothed_ll(obs[i], var_freq, var, alpha) print('log values') print(ll) assert_almost_equal(ll, smo.log_val, 15) # now setting another alpha print('Changing smooth level') for alpha_new in alphas: smo.smooth_probs(alpha_new) smo.eval(obs[i]) print('smo values') print(smo.log_val) ll = compute_smoothed_ll(obs[i], var_freq, var, alpha_new) print('log values') print(ll) assert_almost_equal(ll, smo.log_val, 15)
def test_product_layer_is_decomposable(): # creating scopes and nodes scope1 = frozenset({0, 2, 3}) scope2 = frozenset({10, 9}) prod_node_1 = ProductNode(var_scope=scope1) prod_node_2 = ProductNode(var_scope=scope2) # creating children manually (argh=) for var in scope1: prod_node_1.add_child(SumNode(var_scope=frozenset({var}))) for var in scope2: prod_node_2.add_child(CategoricalSmoothedNode(var=var, var_values=2)) # creating layer prod_layer = ProductLayer(nodes=[prod_node_1, prod_node_2]) assert prod_layer.is_decomposable() # making it not decomposable anymore scope3 = frozenset({2}) prod_node_1.add_child(SumNode(var_scope=scope3)) assert not prod_layer.is_decomposable()
def test_categorical_smoothed_node_create_and_eval_keras(): alpha = 0.0 data = numpy.array([[1, 1, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [1, 0, 0, 0], [1, 0, 1, 0], [0, 1, 1, 0], [MARG_IND, 0, 0, 1], [MARG_IND, MARG_IND, MARG_IND, MARG_IND]]).astype(numpy.int32) input = K.placeholder(ndim=2, dtype='int32') for i, var in enumerate(vars): log_vals = [] var_freq = freqs[i] smo = CategoricalSmoothedNode(i, var, alpha, var_freq) smo.build_k(input) for d in data: smo.eval(d) log_vals.append(smo.log_val) print('smo values') print(smo.log_val) eval_input_node_f = K.function([input], [smo.log_vals]) keras_log_vals = eval_input_node_f([data])[0] print('keras vals') print(keras_log_vals) assert_array_almost_equal(numpy.array(log_vals)[:, numpy.newaxis], keras_log_vals, decimal=4)
def test_product_node_is_decomposable(): # create a prod node with a scope scope = frozenset({0, 2, 7, 13}) # creating sub scopes sub_scope_1 = frozenset({0}) sub_scope_2 = frozenset({0, 2}) sub_scope_3 = frozenset({7}) sub_scope_4 = frozenset({17}) sub_scope_5 = frozenset({7, 13}) # now with decomposable children child1 = SumNode(var_scope=sub_scope_2) child2 = SumNode(var_scope=sub_scope_5) child3 = SumNode(var_scope=sub_scope_2) child4 = SumNode(var_scope=sub_scope_1) prod_node = ProductNode(var_scope=scope) prod_node.add_child(child1) prod_node.add_child(child2) assert prod_node.is_decomposable() prod_node = ProductNode(var_scope=scope) prod_node.add_child(child4) prod_node.add_child(child1) prod_node.add_child(child2) assert not prod_node.is_decomposable() prod_node = ProductNode(var_scope=scope) prod_node.add_child(child4) prod_node.add_child(child2) assert not prod_node.is_decomposable() # now with input nodes child5 = CategoricalSmoothedNode(var=0, var_values=2) child6 = CategoricalSmoothedNode(var=2, var_values=2) child7 = CategoricalSmoothedNode(var=7, var_values=2) child8 = CategoricalSmoothedNode(var=13, var_values=2) child9 = CategoricalSmoothedNode(var=17, var_values=2) prod_node = ProductNode(var_scope=scope) prod_node.add_child(child5) prod_node.add_child(child6) prod_node.add_child(child7) prod_node.add_child(child8) assert prod_node.is_decomposable() prod_node = ProductNode(var_scope=scope) prod_node.add_child(child5) prod_node.add_child(child6) prod_node.add_child(child7) prod_node.add_child(child9) assert not prod_node.is_decomposable() prod_node = ProductNode(var_scope=scope) prod_node.add_child(child5) prod_node.add_child(child6) prod_node.add_child(child8) assert not prod_node.is_decomposable()
def test_layered_pruned_linked_spn_cltree(): # # creating all the data slices # the slicing is a fake stub rows = 5 cols = 5 var = 1 values = 2 vars = [2, 3] var_values = [2, 2] s_data = numpy.array([[0, 1], [1, 1], [1, 0], [0, 0]]) node_1 = SumNode() node_1.id = 1 node_2 = ProductNode() node_2.id = 2 node_3 = SumNode() node_3.id = 3 # adding first level weight_12 = 0.4 weight_13 = 0.6 node_1.add_child(node_2, weight_12) node_1.add_child(node_3, weight_13) node_4 = ProductNode() node_4.id = 4 leaf_5 = CategoricalSmoothedNode(var, values) leaf_5.id = 5 # not adding the slice to the stack node_2.add_child(node_4) node_2.add_child(leaf_5) node_6 = SumNode() node_6.id = 6 node_7 = SumNode() node_7.id = 7 weight_36 = 0.1 weight_37 = 0.9 node_3.add_child(node_6, weight_36) node_3.add_child(node_7, weight_37) node_8 = ProductNode() node_8.id = 8 # # this is a cltree leaf_15 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_15.id = 15 node_4.add_child(node_8) node_4.add_child(leaf_15) leaf_13 = CategoricalSmoothedNode(var, values) leaf_13.id = 13 leaf_14 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_14.id = 14 node_8.add_child(leaf_13) node_8.add_child(leaf_14) leaf_9 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_9.id = 9 node_10 = ProductNode() node_10.id = 10 leaf_18 = CategoricalSmoothedNode(var, values) leaf_18.id = 18 leaf_19 = CategoricalSmoothedNode(var, values) leaf_19.id = 19 node_10.add_child(leaf_18) node_10.add_child(leaf_19) weight_69 = 0.3 weight_610 = 0.7 node_6.add_child(leaf_9, weight_69) node_6.add_child(node_10, weight_610) node_11 = ProductNode() node_11.id = 11 leaf_20 = CategoricalSmoothedNode(var, values) leaf_20.id = 20 leaf_21 = CategoricalSmoothedNode(var, values) leaf_21.id = 21 node_11.add_child(leaf_20) node_11.add_child(leaf_21) node_12 = ProductNode() node_12.id = 12 leaf_22 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_22.id = 22 leaf_23 = CategoricalSmoothedNode(var, values) leaf_23.id = 23 node_12.add_child(leaf_22) node_12.add_child(leaf_23) weight_711 = 0.5 weight_712 = 0.5 node_7.add_child(node_11, weight_711) node_7.add_child(node_12, weight_712) print('Added nodes') root_node = SpnFactory.layered_pruned_linked_spn(node_1) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 4 elif i == 2: assert layer.n_nodes() == 10
def test_pruned_spn_from_slices(): # # creating all the data slices # the slicing is a fake stub rows = 5 cols = 5 var = 1 values = 2 node_assoc = {} building_stack = deque() slice_1 = DataSlice.whole_slice(rows, cols) slice_1.type = SumNode node_1 = SumNode() node_1.id = slice_1.id node_assoc[node_1.id] = node_1 building_stack.append(slice_1) slice_2 = DataSlice.whole_slice(rows, cols) slice_2.type = ProductNode node_2 = ProductNode() node_2.id = slice_2.id node_assoc[node_2.id] = node_2 building_stack.append(slice_2) slice_3 = DataSlice.whole_slice(rows, cols) slice_3.type = SumNode node_3 = SumNode() node_3.id = slice_3.id node_assoc[node_3.id] = node_3 building_stack.append(slice_3) # adding first level slice_1.add_child(slice_2, 0.8) slice_1.add_child(slice_3, 0.2) slice_4 = DataSlice.whole_slice(rows, cols) slice_4.type = ProductNode node_4 = ProductNode() node_4.id = slice_4.id node_assoc[node_4.id] = node_4 building_stack.append(slice_4) leaf_5 = CategoricalSmoothedNode(var, values) slice_5 = DataSlice.whole_slice(rows, cols) leaf_5.id = slice_5.id node_assoc[leaf_5.id] = leaf_5 # not adding the slice to the stack slice_2.add_child(slice_4) slice_2.add_child(slice_5) slice_6 = DataSlice.whole_slice(rows, cols) slice_6.type = SumNode node_6 = SumNode() node_6.id = slice_6.id node_assoc[node_6.id] = node_6 building_stack.append(slice_6) slice_7 = DataSlice.whole_slice(rows, cols) slice_7.type = SumNode node_7 = SumNode() node_7.id = slice_7.id node_assoc[node_7.id] = node_7 building_stack.append(slice_7) slice_3.add_child(slice_6, 0.4) slice_3.add_child(slice_7, 0.6) slice_8 = DataSlice.whole_slice(rows, cols) slice_8.type = ProductNode node_8 = ProductNode() node_8.id = slice_8.id node_assoc[node_8.id] = node_8 building_stack.append(slice_8) leaf_15 = CategoricalSmoothedNode(var, values) slice_15 = DataSlice.whole_slice(rows, cols) leaf_15.id = slice_15.id node_assoc[leaf_15.id] = leaf_15 slice_4.add_child(slice_8) slice_4.add_child(slice_15) leaf_13 = CategoricalSmoothedNode(var, values) slice_13 = DataSlice.whole_slice(rows, cols) leaf_13.id = slice_13.id node_assoc[leaf_13.id] = leaf_13 leaf_14 = CategoricalSmoothedNode(var, values) slice_14 = DataSlice.whole_slice(rows, cols) leaf_14.id = slice_14.id node_assoc[leaf_14.id] = leaf_14 slice_8.add_child(slice_13) slice_8.add_child(slice_14) slice_9 = DataSlice.whole_slice(rows, cols) slice_9.type = ProductNode node_9 = ProductNode() node_9.id = slice_9.id node_assoc[node_9.id] = node_9 building_stack.append(slice_9) leaf_16 = CategoricalSmoothedNode(var, values) slice_16 = DataSlice.whole_slice(rows, cols) leaf_16.id = slice_16.id node_assoc[leaf_16.id] = leaf_16 leaf_17 = CategoricalSmoothedNode(var, values) slice_17 = DataSlice.whole_slice(rows, cols) leaf_17.id = slice_17.id node_assoc[leaf_17.id] = leaf_17 slice_9.add_child(slice_16) slice_9.add_child(slice_17) slice_10 = DataSlice.whole_slice(rows, cols) slice_10.type = ProductNode node_10 = ProductNode() node_10.id = slice_10.id node_assoc[node_10.id] = node_10 building_stack.append(slice_10) leaf_18 = CategoricalSmoothedNode(var, values) slice_18 = DataSlice.whole_slice(rows, cols) leaf_18.id = slice_18.id node_assoc[leaf_18.id] = leaf_18 leaf_19 = CategoricalSmoothedNode(var, values) slice_19 = DataSlice.whole_slice(rows, cols) leaf_19.id = slice_19.id node_assoc[leaf_19.id] = leaf_19 slice_10.add_child(slice_18) slice_10.add_child(slice_19) slice_6.add_child(slice_9, 0.1) slice_6.add_child(slice_10, 0.9) slice_11 = DataSlice.whole_slice(rows, cols) slice_11.type = ProductNode node_11 = ProductNode() node_11.id = slice_11.id node_assoc[node_11.id] = node_11 building_stack.append(slice_11) leaf_20 = CategoricalSmoothedNode(var, values) slice_20 = DataSlice.whole_slice(rows, cols) leaf_20.id = slice_20.id node_assoc[leaf_20.id] = leaf_20 leaf_21 = CategoricalSmoothedNode(var, values) slice_21 = DataSlice.whole_slice(rows, cols) leaf_21.id = slice_21.id node_assoc[leaf_21.id] = leaf_21 slice_11.add_child(slice_20) slice_11.add_child(slice_21) slice_12 = DataSlice.whole_slice(rows, cols) slice_12.type = ProductNode node_12 = ProductNode() node_12.id = slice_12.id node_assoc[node_12.id] = node_12 building_stack.append(slice_12) leaf_22 = CategoricalSmoothedNode(var, values) slice_22 = DataSlice.whole_slice(rows, cols) leaf_22.id = slice_22.id node_assoc[leaf_22.id] = leaf_22 leaf_23 = CategoricalSmoothedNode(var, values) slice_23 = DataSlice.whole_slice(rows, cols) leaf_23.id = slice_23.id node_assoc[leaf_23.id] = leaf_23 slice_12.add_child(slice_22) slice_12.add_child(slice_23) slice_7.add_child(slice_11, 0.2) slice_7.add_child(slice_12, 0.7) root_node = SpnFactory.pruned_spn_from_slices(node_assoc, building_stack) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 5 elif i == 2: assert layer.n_nodes() == 12
def fit_structure(self, data, feature_sizes): """ data is a numpy array of size {n_instances X n_features} feature_sizes is an array of integers representing feature ranges """ # # resetting the data slice ids (just in case) DataSlice.reset_id_counter() tot_n_instances = data.shape[0] tot_n_features = data.shape[1] logging.info('Learning SPN structure on a (%d X %d) dataset', tot_n_instances, tot_n_features) learn_start_t = perf_counter() # # a queue containing the data slices to process slices_to_process = deque() # a stack for building nodes building_stack = deque() # a dict to keep track of id->nodes node_id_assoc = {} # creating the first slice whole_slice = DataSlice.whole_slice(tot_n_instances, tot_n_features) slices_to_process.append(whole_slice) first_run = True # # iteratively process & split slices # while slices_to_process: # process a slice current_slice = slices_to_process.popleft() # pointers to the current data slice current_instances = current_slice.instance_ids current_features = current_slice.feature_ids current_id = current_slice.id n_instances = len(current_instances) n_features = len(current_features) logging.info('\n*** Processing slice %d (%d X %d)', current_id, n_instances, n_features) logging.debug('\tinstances:%s\n\tfeatures:%s', current_instances, current_features) # # is this a leaf node or we can split? if n_features == 1: logging.info('---> Adding a leaf (just one feature)') (feature_id, ) = current_features feature_size = feature_sizes[feature_id] # slicing from the original dataset slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] # create the node leaf_node = CategoricalSmoothedNode( var=feature_id, var_values=feature_size, data=current_slice_data, instances=current_instances, alpha=self._alpha) # print('lnvf', leaf_node._var_freqs) # storing links # input_nodes.append(leaf_node) leaf_node.id = current_id node_id_assoc[current_id] = leaf_node logging.debug('\tCreated Smooth Node %s', leaf_node) elif (n_instances <= self._min_instances_slice and n_features > 1): # # splitting the slice on each feature logging.info('---> Few instances (%d), decompose all features', n_instances) # # shall put a cltree or if self._cltree_leaves: logging.info('into a Chow-Liu tree') # # slicing data slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] current_feature_sizes = [ feature_sizes[i] for i in current_features ] # # creating a Chow-Liu tree as leaf leaf_node = CLTreeNode(vars=current_features, var_values=current_feature_sizes, data=current_slice_data, alpha=self._alpha) # # storing links leaf_node.id = current_id node_id_assoc[current_id] = leaf_node logging.debug('\tCreated Chow-Liu Tree Node %s', leaf_node) elif self._kde and n_instances > 1: estimate_kernel_density_spn(current_slice, feature_sizes, data, self._alpha, node_id_assoc, building_stack, slices_to_process) # elif n_instances == 1: # FIXME: there is a bug here else: current_slice, slices_to_process, building_stack, node_id_assoc = \ self.make_naive_factorization(current_slice, slices_to_process, building_stack, node_id_assoc) else: # # slicing from the original dataset slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] split_on_features = False # # first run is a split on rows if first_run: logging.info('-- FIRST RUN --') first_run = False else: # # try clustering on cols # logging.debug('...trying to split on columns') split_start_t = perf_counter() print(data.shape) dependent_features, other_features = greedy_feature_split( data, current_slice, feature_sizes, self._g_factor, self._rand_gen) split_end_t = perf_counter() logging.info('...tried to split on columns in {}'.format( split_end_t - split_start_t)) if len(other_features) > 0: split_on_features = True # # have dependent components been found? if split_on_features: # # splitting on columns logging.info( '---> Splitting on features' + ' {} -> ({}, {})'.format(len(current_features), len(dependent_features), len(other_features))) # # creating two new data slices and putting them on queue first_slice = DataSlice(current_instances, dependent_features) second_slice = DataSlice(current_instances, other_features) slices_to_process.append(first_slice) slices_to_process.append(second_slice) children_ids = [first_slice.id, second_slice.id] # # storing link parent children current_slice.type = ProductNode building_stack.append(current_slice) current_slice.add_child(first_slice) current_slice.add_child(second_slice) # # creating product node prod_node = ProductNode( var_scope=frozenset(current_features)) prod_node.id = current_id node_id_assoc[current_id] = prod_node logging.debug('\tCreated Prod Node %s (with children %s)', prod_node, children_ids) else: # # clustering on rows logging.info('---> Splitting on rows') # # at most n_rows clusters, for sklearn k_row_clusters = min(self._n_cluster_splits, n_instances - 1) clustering = cluster_rows( data, current_slice, n_clusters=k_row_clusters, cluster_method=self._row_cluster_method, n_iters=self._n_iters, n_restarts=self._n_restarts, cluster_penalty=self._cluster_penalty, rand_gen=self._rand_gen, sklearn_args=self._sklearn_args) if len(clustering) < 2: logging.info('\n\n\nLess than 2 clusters\n\n (%d)', len(clustering)) logging.info('forcing a naive factorization') current_slice, slices_to_process, building_stack, node_id_assoc = \ self.make_naive_factorization(current_slice, slices_to_process, building_stack, node_id_assoc) else: # logging.debug('obtained clustering %s', clustering) logging.info('clustered into %d parts (min %d)', len(clustering), k_row_clusters) # splitting cluster_slices = [ DataSlice(cluster, current_features) for cluster in clustering ] cluster_slices_ids = [ slice.id for slice in cluster_slices ] # cluster_prior = 5.0 # cluster_weights = [(slice.n_instances() + cluster_prior) / # (n_instances + cluster_prior * len(cluster_slices)) # for slice in cluster_slices] cluster_weights = [ slice.n_instances() / n_instances for slice in cluster_slices ] # # appending for processing slices_to_process.extend(cluster_slices) # # storing links # current_slice.children = cluster_slices_ids # current_slice.weights = cluster_weights current_slice.type = SumNode building_stack.append(current_slice) for child_slice, child_weight in zip( cluster_slices, cluster_weights): current_slice.add_child(child_slice, child_weight) # # building a sum node SCOPES_DICT[frozenset(current_features)] += 1 sum_node = SumNode( var_scope=frozenset(current_features)) sum_node.id = current_id node_id_assoc[current_id] = sum_node logging.debug( '\tCreated Sum Node %s (with children %s)', sum_node, cluster_slices_ids) learn_end_t = perf_counter() logging.info('\n\n\tStructure learned in %f secs', (learn_end_t - learn_start_t)) # # linking the spn graph (parent -> children) # logging.info('===> Building tree') link_start_t = perf_counter() root_build_node = building_stack[0] root_node = node_id_assoc[root_build_node.id] logging.debug('root node: %s', root_node) root_node = SpnFactory.pruned_spn_from_slices(node_id_assoc, building_stack) link_end_t = perf_counter() logging.info('\tLinked the spn in %f secs (root_node %s)', (link_end_t - link_start_t), root_node) # # building layers # logging.info('===> Layering spn') layer_start_t = perf_counter() spn = SpnFactory.layered_linked_spn(root_node) layer_end_t = perf_counter() logging.info('\tLayered the spn in %f secs', (layer_end_t - layer_start_t)) logging.info('\nLearned SPN\n\n%s', spn.stats()) #logging.info('%s', SCOPES_DICT.most_common(30)) return spn
def test_categorical_to_indicator_input_layer(): # # creating all the data slices # the slicing is a fake stub # rows = 5 # cols = 5 var_1 = 0 values_1 = 2 var_2 = 1 values_2 = 3 var_3 = 2 values_3 = 4 node_1 = SumNode() node_1.id = 1 node_2 = ProductNode() node_2.id = 2 node_3 = SumNode() node_3.id = 3 # adding first level weight_12 = 0.4 weight_13 = 0.6 node_1.add_child(node_2, weight_12) node_1.add_child(node_3, weight_13) node_4 = ProductNode() node_4.id = 4 leaf_5 = CategoricalSmoothedNode(var_1, values_1) leaf_5.id = 5 # not adding the slice to the stack node_2.add_child(node_4) node_2.add_child(leaf_5) node_6 = SumNode() node_6.id = 6 node_7 = SumNode() node_7.id = 7 weight_36 = 0.1 weight_37 = 0.9 node_3.add_child(node_6, weight_36) node_3.add_child(node_7, weight_37) node_8 = ProductNode() node_8.id = 8 leaf_15 = CategoricalSmoothedNode(var_2, values_2) leaf_15.id = 15 node_4.add_child(node_8) node_4.add_child(leaf_15) leaf_13 = CategoricalSmoothedNode(var_3, values_3) leaf_13.id = 13 leaf_14 = CategoricalSmoothedNode(var_1, values_1) leaf_14.id = 14 node_8.add_child(leaf_13) node_8.add_child(leaf_14) node_9 = ProductNode() node_9.id = 9 leaf_16 = CategoricalSmoothedNode(var_2, values_2) leaf_16.id = 16 leaf_17 = CategoricalSmoothedNode(var_3, values_3) leaf_17.id = 17 node_9.add_child(leaf_16) node_9.add_child(leaf_17) node_10 = ProductNode() node_10.id = 10 leaf_18 = CategoricalSmoothedNode(var_2, values_2) leaf_18.id = 18 leaf_19 = CategoricalSmoothedNode(var_2, values_2) leaf_19.id = 19 node_10.add_child(leaf_18) node_10.add_child(leaf_19) weight_69 = 0.3 weight_610 = 0.7 node_6.add_child(node_9, weight_69) node_6.add_child(node_10, weight_610) node_11 = ProductNode() node_11.id = 11 leaf_20 = CategoricalSmoothedNode(var_1, values_1) leaf_20.id = 20 leaf_21 = CategoricalSmoothedNode(var_3, values_3) leaf_21.id = 21 node_11.add_child(leaf_20) node_11.add_child(leaf_21) node_12 = ProductNode() node_12.id = 12 leaf_22 = CategoricalSmoothedNode(var_1, values_1) leaf_22.id = 22 leaf_23 = CategoricalSmoothedNode(var_3, values_3) leaf_23.id = 23 node_12.add_child(leaf_22) node_12.add_child(leaf_23) weight_711 = 0.5 weight_712 = 0.5 node_7.add_child(node_11, weight_711) node_7.add_child(node_12, weight_712) root_node = SpnFactory.layered_pruned_linked_spn(node_1) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 5 elif i == 2: assert layer.n_nodes() == 12 # # changing input layer spn = linked_categorical_input_to_indicators(spn) print('Changed input layer to indicator variables') print(spn)