def estimate_kernel_density_spn(data_slice, feature_sizes, data, alpha, node_id_assoc, building_stack, slices_to_process): """ A mixture with one component for each instance """ instance_ids = data_slice.instance_ids feature_ids = data_slice.feature_ids current_id = data_slice.id n_instances = len(instance_ids) n_features = len(feature_ids) logging.info('Adding a kernel density estimation ' + 'over a slice {0} X {1}'.format(n_instances, n_features)) # # create sum node root_sum_node = SumNode(var_scope=frozenset(feature_ids)) data_slice.type = SumNode building_stack.append(data_slice) root_sum_node.id = current_id node_id_assoc[current_id] = root_sum_node # # for each instance for i in instance_ids: # # create a slice instance_slice = DataSlice(numpy.array([i]), feature_ids) slices_to_process.append(instance_slice) # # linking with appropriate weight data_slice.add_child(instance_slice, 1.0 / n_instances) return root_sum_node, node_id_assoc, building_stack, slices_to_process
def fit_structure(self, data): # # a queue containing the data slices to process slices_to_process = deque() # a stack for building nodes building_stack = deque() # a dict to keep track of id->nodes node_id_assoc = {} # creating the first slice whole_slice = DataSlice.whole_slice(data.shape[0], data.shape[1]) slices_to_process.append(whole_slice) cluster_first = self._cluster_first # # iteratively process & split slices # while slices_to_process: # process a slice current_slice = slices_to_process.popleft() # pointers to the current data slice current_instances = current_slice.instance_ids current_features = current_slice.feature_ids current_id = current_slice.id n_features = len(current_features) # if n_features > 1: # # # print("removing Zeros") # datarowsIdx = numpy.sum(data[current_instances, :][:, current_features], 1) > 0 # if not any(datarowsIdx): # datarowsIdx[0] = True # current_instances = current_slice.instance_ids[datarowsIdx] n_instances = len(current_instances) # if n_instances == 0: # #too strong cutting the zeroes # current_instances = [current_slice.instance_ids[0]] # n_instances = len(current_instances) slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] # is this a leaf node or we can split? if n_features == 1 and (current_slice.doNotCluster or n_instances <= self._min_instances_slice): (feature_id, ) = current_features if self.family == "poisson": leaf_node = PoissonNode(data, current_instances, current_features) elif self.family == "gaussian": leaf_node = GaussianNode(data, current_instances, current_features) # storing links # input_nodes.append(leaf_node) leaf_node.id = current_id node_id_assoc[current_id] = leaf_node # elif (current_slice_data.shape[0] < self._min_instances_slice): # elif ( (n_instances <= self._min_instances_slice and n_features > 1) and current_slice_data.shape[0] < self._min_instances_slice): # elif ((n_instances <= self._min_instances_slice and n_features > 1)): elif n_features > 1 and (current_slice.doNotCluster or n_instances <= self._min_instances_slice): # print('into naive factorization') child_slices = [ DataSlice(current_instances, [feature_id]) for feature_id in current_features ] slices_to_process.extend(child_slices) #children_ids = [child.id for child in child_slices] for child_slice in child_slices: child_slice.doNotCluster = current_slice.doNotCluster current_slice.add_child(child_slice) current_slice.type = ProductNode building_stack.append(current_slice) prod_node = ProductNode(data, current_instances, current_features) prod_node.id = current_id node_id_assoc[current_id] = prod_node else: split_on_features = False # first_run = False # # first run is a split on rows if n_features == 1 or cluster_first: cluster_first = False else: if self._ind_test_method == "pairwise_treeglm" or self._ind_test_method == "subsample": fcdata = current_slice_data if self._ind_test_method == "subsample": #sampled_rows = 2000 #sampled_rows = math.floor(current_slice_data.shape[0]*10/100) sampled_rows = self._sub_sample_rows if sampled_rows < current_slice_data.shape[0]: fcdata = current_slice_data[ numpy.random.choice( current_slice_data.shape[0], sampled_rows, replace=False)] else: fcdata = current_slice_data #Using R #from pdn.independenceptest import getIndependentGroups #feature_clusters = retrieve_clustering(getIndependentGroups(fcdata, alpha=self._alpha, family=self.family), current_features) feature_clusters = retrieve_clustering( getIndependentGroupsStabilityTest( fcdata, alpha=self._alpha), current_features) elif self._ind_test_method == "KMeans": feature_clusters = retrieve_clustering( cluster_rows( (data[current_instances, :][:, current_features] ).T, n_clusters=2, cluster_method=self._row_cluster_method, n_iters=self._n_iters, n_restarts=self._n_restarts, cluster_prep_method="sqrt", cluster_penalty=self._cluster_penalty, rand_gen=self._rand_gen, sklearn_args=self._sklearn_args), current_instances) split_on_features = len(feature_clusters) > 1 # # have dependent components been found? if split_on_features: # # splitting on columns # print('---> Splitting on features') # print(feature_clusters) slices = [ DataSlice(current_instances, cluster) for cluster in feature_clusters ] slices_to_process.extend(slices) current_slice.type = ProductNode building_stack.append(current_slice) for child_slice in slices: current_slice.add_child(child_slice) prod_node = ProductNode(data, current_instances, current_features) prod_node.id = current_id node_id_assoc[current_id] = prod_node else: # print('---> Splitting on rows') k_row_clusters = min(self._n_cluster_splits, n_instances - 1) if n_features == 1: # do one kmeans run with K large enough to split into N min instances k_row_clusters = math.floor( n_instances / self._min_instances_slice) + 1 k_row_clusters = min(k_row_clusters, n_instances - 1) clustering = retrieve_clustering( cluster_rows( data[current_instances, :][:, current_features], n_clusters=k_row_clusters, cluster_method=self._row_cluster_method, n_iters=self._n_iters, n_restarts=self._n_restarts, cluster_prep_method=self._cluster_prep_method, cluster_penalty=self._cluster_penalty, rand_gen=self._rand_gen, sklearn_args=self._sklearn_args), current_instances) cluster_slices = [ DataSlice(cluster, current_features) for cluster in clustering ] if len(clustering) < k_row_clusters: for cluster_slice in cluster_slices: cluster_slice.doNotCluster = True n_instances_clusters = sum( [len(cluster) for cluster in clustering]) cluster_weights = [ len(cluster) / n_instances_clusters for cluster in clustering ] slices_to_process.extend(cluster_slices) current_slice.type = SumNode building_stack.append(current_slice) for child_slice, child_weight in zip( cluster_slices, cluster_weights): current_slice.add_child(child_slice, child_weight) sum_node = SumNode(data, current_instances, current_features) sum_node.id = current_id node_id_assoc[current_id] = sum_node root_node = SpnFactory.pruned_spn_from_slices(node_id_assoc, building_stack, True) spn = SpnFactory.layered_linked_spn(root_node, data, self.config) return spn
def test_layered_pruned_linked_spn_cltree(): # # creating all the data slices # the slicing is a fake stub rows = 5 cols = 5 var = 1 values = 2 vars = [2, 3] var_values = [2, 2] s_data = numpy.array([[0, 1], [1, 1], [1, 0], [0, 0]]) node_1 = SumNode() node_1.id = 1 node_2 = ProductNode() node_2.id = 2 node_3 = SumNode() node_3.id = 3 # adding first level weight_12 = 0.4 weight_13 = 0.6 node_1.add_child(node_2, weight_12) node_1.add_child(node_3, weight_13) node_4 = ProductNode() node_4.id = 4 leaf_5 = CategoricalSmoothedNode(var, values) leaf_5.id = 5 # not adding the slice to the stack node_2.add_child(node_4) node_2.add_child(leaf_5) node_6 = SumNode() node_6.id = 6 node_7 = SumNode() node_7.id = 7 weight_36 = 0.1 weight_37 = 0.9 node_3.add_child(node_6, weight_36) node_3.add_child(node_7, weight_37) node_8 = ProductNode() node_8.id = 8 # # this is a cltree leaf_15 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_15.id = 15 node_4.add_child(node_8) node_4.add_child(leaf_15) leaf_13 = CategoricalSmoothedNode(var, values) leaf_13.id = 13 leaf_14 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_14.id = 14 node_8.add_child(leaf_13) node_8.add_child(leaf_14) leaf_9 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_9.id = 9 node_10 = ProductNode() node_10.id = 10 leaf_18 = CategoricalSmoothedNode(var, values) leaf_18.id = 18 leaf_19 = CategoricalSmoothedNode(var, values) leaf_19.id = 19 node_10.add_child(leaf_18) node_10.add_child(leaf_19) weight_69 = 0.3 weight_610 = 0.7 node_6.add_child(leaf_9, weight_69) node_6.add_child(node_10, weight_610) node_11 = ProductNode() node_11.id = 11 leaf_20 = CategoricalSmoothedNode(var, values) leaf_20.id = 20 leaf_21 = CategoricalSmoothedNode(var, values) leaf_21.id = 21 node_11.add_child(leaf_20) node_11.add_child(leaf_21) node_12 = ProductNode() node_12.id = 12 leaf_22 = CLTreeNode(vars=vars, var_values=var_values, data=s_data) leaf_22.id = 22 leaf_23 = CategoricalSmoothedNode(var, values) leaf_23.id = 23 node_12.add_child(leaf_22) node_12.add_child(leaf_23) weight_711 = 0.5 weight_712 = 0.5 node_7.add_child(node_11, weight_711) node_7.add_child(node_12, weight_712) print('Added nodes') root_node = SpnFactory.layered_pruned_linked_spn(node_1) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 4 elif i == 2: assert layer.n_nodes() == 10
def test_pruned_spn_from_slices(): # # creating all the data slices # the slicing is a fake stub rows = 5 cols = 5 var = 1 values = 2 node_assoc = {} building_stack = deque() slice_1 = DataSlice.whole_slice(rows, cols) slice_1.type = SumNode node_1 = SumNode() node_1.id = slice_1.id node_assoc[node_1.id] = node_1 building_stack.append(slice_1) slice_2 = DataSlice.whole_slice(rows, cols) slice_2.type = ProductNode node_2 = ProductNode() node_2.id = slice_2.id node_assoc[node_2.id] = node_2 building_stack.append(slice_2) slice_3 = DataSlice.whole_slice(rows, cols) slice_3.type = SumNode node_3 = SumNode() node_3.id = slice_3.id node_assoc[node_3.id] = node_3 building_stack.append(slice_3) # adding first level slice_1.add_child(slice_2, 0.8) slice_1.add_child(slice_3, 0.2) slice_4 = DataSlice.whole_slice(rows, cols) slice_4.type = ProductNode node_4 = ProductNode() node_4.id = slice_4.id node_assoc[node_4.id] = node_4 building_stack.append(slice_4) leaf_5 = CategoricalSmoothedNode(var, values) slice_5 = DataSlice.whole_slice(rows, cols) leaf_5.id = slice_5.id node_assoc[leaf_5.id] = leaf_5 # not adding the slice to the stack slice_2.add_child(slice_4) slice_2.add_child(slice_5) slice_6 = DataSlice.whole_slice(rows, cols) slice_6.type = SumNode node_6 = SumNode() node_6.id = slice_6.id node_assoc[node_6.id] = node_6 building_stack.append(slice_6) slice_7 = DataSlice.whole_slice(rows, cols) slice_7.type = SumNode node_7 = SumNode() node_7.id = slice_7.id node_assoc[node_7.id] = node_7 building_stack.append(slice_7) slice_3.add_child(slice_6, 0.4) slice_3.add_child(slice_7, 0.6) slice_8 = DataSlice.whole_slice(rows, cols) slice_8.type = ProductNode node_8 = ProductNode() node_8.id = slice_8.id node_assoc[node_8.id] = node_8 building_stack.append(slice_8) leaf_15 = CategoricalSmoothedNode(var, values) slice_15 = DataSlice.whole_slice(rows, cols) leaf_15.id = slice_15.id node_assoc[leaf_15.id] = leaf_15 slice_4.add_child(slice_8) slice_4.add_child(slice_15) leaf_13 = CategoricalSmoothedNode(var, values) slice_13 = DataSlice.whole_slice(rows, cols) leaf_13.id = slice_13.id node_assoc[leaf_13.id] = leaf_13 leaf_14 = CategoricalSmoothedNode(var, values) slice_14 = DataSlice.whole_slice(rows, cols) leaf_14.id = slice_14.id node_assoc[leaf_14.id] = leaf_14 slice_8.add_child(slice_13) slice_8.add_child(slice_14) slice_9 = DataSlice.whole_slice(rows, cols) slice_9.type = ProductNode node_9 = ProductNode() node_9.id = slice_9.id node_assoc[node_9.id] = node_9 building_stack.append(slice_9) leaf_16 = CategoricalSmoothedNode(var, values) slice_16 = DataSlice.whole_slice(rows, cols) leaf_16.id = slice_16.id node_assoc[leaf_16.id] = leaf_16 leaf_17 = CategoricalSmoothedNode(var, values) slice_17 = DataSlice.whole_slice(rows, cols) leaf_17.id = slice_17.id node_assoc[leaf_17.id] = leaf_17 slice_9.add_child(slice_16) slice_9.add_child(slice_17) slice_10 = DataSlice.whole_slice(rows, cols) slice_10.type = ProductNode node_10 = ProductNode() node_10.id = slice_10.id node_assoc[node_10.id] = node_10 building_stack.append(slice_10) leaf_18 = CategoricalSmoothedNode(var, values) slice_18 = DataSlice.whole_slice(rows, cols) leaf_18.id = slice_18.id node_assoc[leaf_18.id] = leaf_18 leaf_19 = CategoricalSmoothedNode(var, values) slice_19 = DataSlice.whole_slice(rows, cols) leaf_19.id = slice_19.id node_assoc[leaf_19.id] = leaf_19 slice_10.add_child(slice_18) slice_10.add_child(slice_19) slice_6.add_child(slice_9, 0.1) slice_6.add_child(slice_10, 0.9) slice_11 = DataSlice.whole_slice(rows, cols) slice_11.type = ProductNode node_11 = ProductNode() node_11.id = slice_11.id node_assoc[node_11.id] = node_11 building_stack.append(slice_11) leaf_20 = CategoricalSmoothedNode(var, values) slice_20 = DataSlice.whole_slice(rows, cols) leaf_20.id = slice_20.id node_assoc[leaf_20.id] = leaf_20 leaf_21 = CategoricalSmoothedNode(var, values) slice_21 = DataSlice.whole_slice(rows, cols) leaf_21.id = slice_21.id node_assoc[leaf_21.id] = leaf_21 slice_11.add_child(slice_20) slice_11.add_child(slice_21) slice_12 = DataSlice.whole_slice(rows, cols) slice_12.type = ProductNode node_12 = ProductNode() node_12.id = slice_12.id node_assoc[node_12.id] = node_12 building_stack.append(slice_12) leaf_22 = CategoricalSmoothedNode(var, values) slice_22 = DataSlice.whole_slice(rows, cols) leaf_22.id = slice_22.id node_assoc[leaf_22.id] = leaf_22 leaf_23 = CategoricalSmoothedNode(var, values) slice_23 = DataSlice.whole_slice(rows, cols) leaf_23.id = slice_23.id node_assoc[leaf_23.id] = leaf_23 slice_12.add_child(slice_22) slice_12.add_child(slice_23) slice_7.add_child(slice_11, 0.2) slice_7.add_child(slice_12, 0.7) root_node = SpnFactory.pruned_spn_from_slices(node_assoc, building_stack) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 5 elif i == 2: assert layer.n_nodes() == 12
def fit_structure(self, data, feature_sizes): """ data is a numpy array of size {n_instances X n_features} feature_sizes is an array of integers representing feature ranges """ # # resetting the data slice ids (just in case) DataSlice.reset_id_counter() tot_n_instances = data.shape[0] tot_n_features = data.shape[1] logging.info('Learning SPN structure on a (%d X %d) dataset', tot_n_instances, tot_n_features) learn_start_t = perf_counter() # # a queue containing the data slices to process slices_to_process = deque() # a stack for building nodes building_stack = deque() # a dict to keep track of id->nodes node_id_assoc = {} # creating the first slice whole_slice = DataSlice.whole_slice(tot_n_instances, tot_n_features) slices_to_process.append(whole_slice) first_run = True # # iteratively process & split slices # while slices_to_process: # process a slice current_slice = slices_to_process.popleft() # pointers to the current data slice current_instances = current_slice.instance_ids current_features = current_slice.feature_ids current_id = current_slice.id n_instances = len(current_instances) n_features = len(current_features) logging.info('\n*** Processing slice %d (%d X %d)', current_id, n_instances, n_features) logging.debug('\tinstances:%s\n\tfeatures:%s', current_instances, current_features) # # is this a leaf node or we can split? if n_features == 1: logging.info('---> Adding a leaf (just one feature)') (feature_id, ) = current_features feature_size = feature_sizes[feature_id] # slicing from the original dataset slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] # create the node leaf_node = CategoricalSmoothedNode( var=feature_id, var_values=feature_size, data=current_slice_data, instances=current_instances, alpha=self._alpha) # print('lnvf', leaf_node._var_freqs) # storing links # input_nodes.append(leaf_node) leaf_node.id = current_id node_id_assoc[current_id] = leaf_node logging.debug('\tCreated Smooth Node %s', leaf_node) elif (n_instances <= self._min_instances_slice and n_features > 1): # # splitting the slice on each feature logging.info('---> Few instances (%d), decompose all features', n_instances) # # shall put a cltree or if self._cltree_leaves: logging.info('into a Chow-Liu tree') # # slicing data slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] current_feature_sizes = [ feature_sizes[i] for i in current_features ] # # creating a Chow-Liu tree as leaf leaf_node = CLTreeNode(vars=current_features, var_values=current_feature_sizes, data=current_slice_data, alpha=self._alpha) # # storing links leaf_node.id = current_id node_id_assoc[current_id] = leaf_node logging.debug('\tCreated Chow-Liu Tree Node %s', leaf_node) elif self._kde and n_instances > 1: estimate_kernel_density_spn(current_slice, feature_sizes, data, self._alpha, node_id_assoc, building_stack, slices_to_process) # elif n_instances == 1: # FIXME: there is a bug here else: current_slice, slices_to_process, building_stack, node_id_assoc = \ self.make_naive_factorization(current_slice, slices_to_process, building_stack, node_id_assoc) else: # # slicing from the original dataset slice_data_rows = data[current_instances, :] current_slice_data = slice_data_rows[:, current_features] split_on_features = False # # first run is a split on rows if first_run: logging.info('-- FIRST RUN --') first_run = False else: # # try clustering on cols # logging.debug('...trying to split on columns') split_start_t = perf_counter() print(data.shape) dependent_features, other_features = greedy_feature_split( data, current_slice, feature_sizes, self._g_factor, self._rand_gen) split_end_t = perf_counter() logging.info('...tried to split on columns in {}'.format( split_end_t - split_start_t)) if len(other_features) > 0: split_on_features = True # # have dependent components been found? if split_on_features: # # splitting on columns logging.info( '---> Splitting on features' + ' {} -> ({}, {})'.format(len(current_features), len(dependent_features), len(other_features))) # # creating two new data slices and putting them on queue first_slice = DataSlice(current_instances, dependent_features) second_slice = DataSlice(current_instances, other_features) slices_to_process.append(first_slice) slices_to_process.append(second_slice) children_ids = [first_slice.id, second_slice.id] # # storing link parent children current_slice.type = ProductNode building_stack.append(current_slice) current_slice.add_child(first_slice) current_slice.add_child(second_slice) # # creating product node prod_node = ProductNode( var_scope=frozenset(current_features)) prod_node.id = current_id node_id_assoc[current_id] = prod_node logging.debug('\tCreated Prod Node %s (with children %s)', prod_node, children_ids) else: # # clustering on rows logging.info('---> Splitting on rows') # # at most n_rows clusters, for sklearn k_row_clusters = min(self._n_cluster_splits, n_instances - 1) clustering = cluster_rows( data, current_slice, n_clusters=k_row_clusters, cluster_method=self._row_cluster_method, n_iters=self._n_iters, n_restarts=self._n_restarts, cluster_penalty=self._cluster_penalty, rand_gen=self._rand_gen, sklearn_args=self._sklearn_args) if len(clustering) < 2: logging.info('\n\n\nLess than 2 clusters\n\n (%d)', len(clustering)) logging.info('forcing a naive factorization') current_slice, slices_to_process, building_stack, node_id_assoc = \ self.make_naive_factorization(current_slice, slices_to_process, building_stack, node_id_assoc) else: # logging.debug('obtained clustering %s', clustering) logging.info('clustered into %d parts (min %d)', len(clustering), k_row_clusters) # splitting cluster_slices = [ DataSlice(cluster, current_features) for cluster in clustering ] cluster_slices_ids = [ slice.id for slice in cluster_slices ] # cluster_prior = 5.0 # cluster_weights = [(slice.n_instances() + cluster_prior) / # (n_instances + cluster_prior * len(cluster_slices)) # for slice in cluster_slices] cluster_weights = [ slice.n_instances() / n_instances for slice in cluster_slices ] # # appending for processing slices_to_process.extend(cluster_slices) # # storing links # current_slice.children = cluster_slices_ids # current_slice.weights = cluster_weights current_slice.type = SumNode building_stack.append(current_slice) for child_slice, child_weight in zip( cluster_slices, cluster_weights): current_slice.add_child(child_slice, child_weight) # # building a sum node SCOPES_DICT[frozenset(current_features)] += 1 sum_node = SumNode( var_scope=frozenset(current_features)) sum_node.id = current_id node_id_assoc[current_id] = sum_node logging.debug( '\tCreated Sum Node %s (with children %s)', sum_node, cluster_slices_ids) learn_end_t = perf_counter() logging.info('\n\n\tStructure learned in %f secs', (learn_end_t - learn_start_t)) # # linking the spn graph (parent -> children) # logging.info('===> Building tree') link_start_t = perf_counter() root_build_node = building_stack[0] root_node = node_id_assoc[root_build_node.id] logging.debug('root node: %s', root_node) root_node = SpnFactory.pruned_spn_from_slices(node_id_assoc, building_stack) link_end_t = perf_counter() logging.info('\tLinked the spn in %f secs (root_node %s)', (link_end_t - link_start_t), root_node) # # building layers # logging.info('===> Layering spn') layer_start_t = perf_counter() spn = SpnFactory.layered_linked_spn(root_node) layer_end_t = perf_counter() logging.info('\tLayered the spn in %f secs', (layer_end_t - layer_start_t)) logging.info('\nLearned SPN\n\n%s', spn.stats()) #logging.info('%s', SCOPES_DICT.most_common(30)) return spn
def test_categorical_to_indicator_input_layer(): # # creating all the data slices # the slicing is a fake stub # rows = 5 # cols = 5 var_1 = 0 values_1 = 2 var_2 = 1 values_2 = 3 var_3 = 2 values_3 = 4 node_1 = SumNode() node_1.id = 1 node_2 = ProductNode() node_2.id = 2 node_3 = SumNode() node_3.id = 3 # adding first level weight_12 = 0.4 weight_13 = 0.6 node_1.add_child(node_2, weight_12) node_1.add_child(node_3, weight_13) node_4 = ProductNode() node_4.id = 4 leaf_5 = CategoricalSmoothedNode(var_1, values_1) leaf_5.id = 5 # not adding the slice to the stack node_2.add_child(node_4) node_2.add_child(leaf_5) node_6 = SumNode() node_6.id = 6 node_7 = SumNode() node_7.id = 7 weight_36 = 0.1 weight_37 = 0.9 node_3.add_child(node_6, weight_36) node_3.add_child(node_7, weight_37) node_8 = ProductNode() node_8.id = 8 leaf_15 = CategoricalSmoothedNode(var_2, values_2) leaf_15.id = 15 node_4.add_child(node_8) node_4.add_child(leaf_15) leaf_13 = CategoricalSmoothedNode(var_3, values_3) leaf_13.id = 13 leaf_14 = CategoricalSmoothedNode(var_1, values_1) leaf_14.id = 14 node_8.add_child(leaf_13) node_8.add_child(leaf_14) node_9 = ProductNode() node_9.id = 9 leaf_16 = CategoricalSmoothedNode(var_2, values_2) leaf_16.id = 16 leaf_17 = CategoricalSmoothedNode(var_3, values_3) leaf_17.id = 17 node_9.add_child(leaf_16) node_9.add_child(leaf_17) node_10 = ProductNode() node_10.id = 10 leaf_18 = CategoricalSmoothedNode(var_2, values_2) leaf_18.id = 18 leaf_19 = CategoricalSmoothedNode(var_2, values_2) leaf_19.id = 19 node_10.add_child(leaf_18) node_10.add_child(leaf_19) weight_69 = 0.3 weight_610 = 0.7 node_6.add_child(node_9, weight_69) node_6.add_child(node_10, weight_610) node_11 = ProductNode() node_11.id = 11 leaf_20 = CategoricalSmoothedNode(var_1, values_1) leaf_20.id = 20 leaf_21 = CategoricalSmoothedNode(var_3, values_3) leaf_21.id = 21 node_11.add_child(leaf_20) node_11.add_child(leaf_21) node_12 = ProductNode() node_12.id = 12 leaf_22 = CategoricalSmoothedNode(var_1, values_1) leaf_22.id = 22 leaf_23 = CategoricalSmoothedNode(var_3, values_3) leaf_23.id = 23 node_12.add_child(leaf_22) node_12.add_child(leaf_23) weight_711 = 0.5 weight_712 = 0.5 node_7.add_child(node_11, weight_711) node_7.add_child(node_12, weight_712) root_node = SpnFactory.layered_pruned_linked_spn(node_1) print('ROOT nODE', root_node) spn = SpnFactory.layered_linked_spn(root_node) print('SPN', spn) assert spn.n_layers() == 3 for i, layer in enumerate(spn.top_down_layers()): if i == 0: assert layer.n_nodes() == 1 elif i == 1: assert layer.n_nodes() == 5 elif i == 2: assert layer.n_nodes() == 12 # # changing input layer spn = linked_categorical_input_to_indicators(spn) print('Changed input layer to indicator variables') print(spn)