def naive_factorization(data=None, node_id=0, context=None, scope=None, **kwargs): assert scope is not None, "No scope" prod_node = Product() prod_node.scope = scope prod_node.id = node_id y, x = get_YX(data, context.feature_size) result = [] for i, rv in enumerate(scope): prod_node.children.append(None) data_slice = concatenate_yx(y[:, i].reshape(-1, 1), x) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) return prod_node, result
def prod_condition(node, children, input_vals=None, scope=None): if not scope.intersection(node.scope): return Copy(node), 0 new_node = Product() new_node.scope = list(set(node.scope) - scope) probability = 0 for c in children: if c[0]: new_node.children.append(c[0]) probability += float(c[1]) return new_node, probability
def remove_non_informative_features(data=None, node_id=0, scope=None, context=0, uninformative_features_idx=None, **kwargs): assert uninformative_features_idx is not None, "parameter uninformative_features_idx can't be None" prod_node = Product() prod_node.scope = scope prod_node.id = node_id y, x = get_YX(data, context.feature_size) non_zero_variance_rvs = [] non_zero_variance_idx = [] result = [] for idx, zero_var in enumerate(uninformative_features_idx): rv = scope[idx] if not zero_var: non_zero_variance_rvs.append(rv) non_zero_variance_idx.append(idx) continue prod_node.children.append(None) data_slice = concatenate_yx(y[:, idx].reshape(-1, 1), x) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) assert len(result) > 0 if len(non_zero_variance_idx) > 0: prod_node.children.append(None) result.append(( SplittingOperations.GET_NEXT_OP, { "data": concatenate_yx(data[:, non_zero_variance_idx], x), "parent_id": prod_node.id, "pos": len(prod_node.children) - 1, "scope": non_zero_variance_rvs, }, )) return prod_node, result
def remove_non_informative_features(data=None, node_id=0, scope=None, **kwargs): prod_node = Product() prod_node.scope = scope prod_node.id = node_id uninformative_features_idx = np.var(data[:, scope], 0) == 0 zero_variance_rvs = [s for s in scope] result = [] for idx, zero_var in enumerate(uninformative_features_idx): if not zero_var: continue prod_node.children.append(None) rv = scope[idx] data_slice = data[:, rv].reshape(-1, 1) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": node_id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) del zero_variance_rvs[idx] assert len(result) > 0 prod_node.children.append(None) result.append(( SplittingOperations.GET_NEXT_OP, { "data": data[:, zero_variance_rvs], "parent_id": node_id, "pos": len(prod_node.children) - 1, "scope": zero_variance_rvs, }, )) return prod_node, result
def naive_factorization(data=None, node_id=0, scope=None, **kwargs): assert scope is not None, "No scope" prod_node = Product() prod_node.scope = scope prod_node.node_id = node_id result = [] for rv in scope: prod_node.children.append(None) data_slice = data[:, rv].reshape(-1, 1) result.append(( SplittingOperations.CREATE_LEAF_NODE, { "data": data_slice, "parent_id": node_id, "pos": len(prod_node.children) - 1, "scope": [rv], }, )) return prod_node, result
def get_credit_spn(): from spn.structure.Base import Product from spn.structure.leaves.parametric.Parametric import Categorical spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) spn3 = 0.3 * spn1 + 0.7 * spn2 spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3 spn6 = Product([ Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3]) ]) spn6.scope = [1, 2, 3] spn7 = 0.8 * spn4 + 0.2 * spn6 spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0]) spn.scope = sorted(spn.scope) return spn
def create_flat_spn_recursive(node, distribution_mix, prob=1.0, independent_nodes=[]): if isinstance(node, Sum): for i, c in enumerate(node.children): forwarded_weight = node.weights[i] * prob create_flat_spn_recursive(c, distribution_mix, forwarded_weight, independent_nodes.copy()) elif isinstance(node, Product): stop = False next_node = None for c in node.children: if target_id in c.scope: if len(c.scope) == 1: stop = True independent_nodes.append(deepcopy(c)) else: next_node = c else: for feature_id in c.scope: weighted_nodes = get_nodes_with_weight(c, feature_id) t_node = type(weighted_nodes[0][1]) mixed_node = distribution_mix[t_node](weighted_nodes) independent_nodes.append(mixed_node) if stop: flat_spn.weights.append(prob) prod = Product(children=independent_nodes) prod.scope = spn.scope flat_spn.children.append(prod) else: create_flat_spn_recursive(next_node, distribution_mix, prob, independent_nodes) else: raise Exception("Can only iterate over Sum and Product nodes")
if __name__ == '__main__': from spn.structure.Base import Sum, Product, Leaf from spn.structure.leaves.parametric.Parametric import Categorical spn1 = Categorical(p=[0.0, 1.0], scope=[2]) * Categorical(p=[0.5, 0.5], scope=[3]) spn2 = Categorical(p=[1.0, 0.0], scope=[2]) * Categorical(p=[0.1, 0.9], scope=[3]) spn3 = 0.3 * spn1 + 0.7 * spn2 spn4 = Categorical(p=[0.0, 1.0], scope=[1]) * spn3 spn6 = Product([Categorical(p=[1.0, 0.0], scope=[1]), Categorical(p=[0.0, 1.0], scope=[2]), Categorical(p=[1.0, 0.0], scope=[3])]) spn6.scope = [1,2,3] spn7 = 0.8 * spn4 + 0.2 * spn6 spn = spn7 * Categorical(p=[0.2, 0.8], scope=[0]) #spn_util.plot_spn(spn, "rule_spn.pdf") extract_rules(spn) #res = get_frequent_items(spn) #print(res)