Beispiel #1
0
def create_spflow_spn(n_feats, ctype=Gaussian):
    children1 = []
    children2 = []
    for i in range(n_feats):
        if ctype == Gaussian:
            c1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
            c2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        else:
            #c1 = Bernoulli(p=1.0, scope=i)
            #c2 = Bernoulli(p=1.0, scope=i)
            c1 = Bernoulli(p=np.random.rand(), scope=i)
            c2 = Bernoulli(p=np.random.rand(), scope=i)

        children1.append(c1)
        children2.append(c2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([children1[i], children1[i + 1]])
        p2 = Product([children2[i], children2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
Beispiel #2
0
def create_spflow_spn(n_feats):
    gaussians1 = []
    gaussians2 = []
    for i in range(n_feats):
        g1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        g2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        gaussians1.append(g1)
        gaussians2.append(g2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([gaussians1[i], gaussians1[i + 1]])
        p2 = Product([gaussians2[i], gaussians2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
Beispiel #3
0
def marginalize(node, scope):
    assert isinstance(scope, set), "scope must be a set"

    def marg_recursive(node):
        node_scope = set(node.scope)

        if node_scope.issubset(scope):
            return None

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception('Leaf Node with |scope| > 1')

            return node

        newNode = node.__class__()

        #a sum node gets copied with all its children, or gets removed completely
        if isinstance(node, Sum):
            newNode.weights.extend(node.weights)

        for i, c in enumerate(node.children):
            newChildren = marg_recursive(c)
            if newChildren is None:
                continue

            newNode.children.append(newChildren)
        return newNode

    newNode = marg_recursive(node)
    rebuild_scopes_bottom_up(newNode)
    newNode = prune(newNode)
    assert is_valid(newNode)
    assign_ids(node)
    return newNode
Beispiel #4
0
def create_SPN2():
    from spn.structure.Base import assign_ids
    from spn.structure.Base import rebuild_scopes_bottom_up

    from spn.algorithms.Validity import is_valid
    from spn.structure.leaves.parametric.Parametric import Categorical

    from spn.structure.Base import Sum, Product

    p0 = Product(children=[
        Categorical(p=[0.3, 0.7], scope=1),
        Categorical(p=[0.4, 0.6], scope=2)
    ])
    p1 = Product(children=[
        Categorical(p=[0.5, 0.5], scope=1),
        Categorical(p=[0.6, 0.4], scope=2)
    ])
    s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
    p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
    p3 = Product(children=[
        Categorical(p=[0.2, 0.8], scope=0),
        Categorical(p=[0.3, 0.7], scope=1)
    ])
    p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])
    spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

    assign_ids(spn)
    rebuild_scopes_bottom_up(spn)

    val, msg = is_valid(spn)
    assert val, msg

    return spn
def create_disj(data, scope, assignments, alpha):

    unq_data, counts = np.unique(data, axis=0, return_counts=True)
    probs = np.zeros(assignments.shape[0])
    for i in range(assignments.shape[0]):
        index = np.where(np.all(assignments[i] == unq_data, axis=1))[0]
        if len(index):
            probs[i] = counts[index[0]]
    probs = (probs + alpha) / (probs + alpha).sum()

    indicators = {
        var: [Bernoulli(scope=[var], p=0),
              Bernoulli(scope=[var], p=1)]
        for var in scope
    }

    prods = []
    for i in range(assignments.shape[0]):
        children = []
        for j in range(assignments.shape[1]):
            children.append(indicators[scope[j]][assignments[i, j]])
            # children.append(Bernoulli(scope=[scope[j]], p=assignments[i, j]))
        prods.append(Product(children=children))

    if len(prods) > 1:
        disj = Sum(children=prods, weights=probs)
    else:
        disj = prods[0]

    assign_ids(disj)
    rebuild_scopes_bottom_up(disj)

    return disj
Beispiel #6
0
    def test_spn_to_str_and_back(self):
        self.check_obj_and_reconstruction(
            Categorical(p=[0.1, 0.2, 0.7], scope=0))

        self.check_obj_and_reconstruction(Gaussian(mean=0, stdev=10, scope=0))
        self.check_obj_and_reconstruction(
            Gaussian(mean=1.2, stdev=1.5, scope=0))

        self.check_obj_and_reconstruction(Gaussian(mean=-1.2, stdev=1,
                                                   scope=0))

        gamma = Gamma(alpha=1, beta=2, scope=0)
        lnorm = LogNormal(mean=1, stdev=2, scope=0)

        self.check_obj_and_reconstruction(gamma)

        self.check_obj_and_reconstruction(lnorm)

        root = Sum(children=[gamma, lnorm], weights=[0.2, 0.8])
        assign_ids(root)
        rebuild_scopes_bottom_up(root)
        self.check_obj_and_reconstruction(root)

        root = 0.3 * (Gaussian(mean=0, stdev=1, scope=0) * Gaussian(
            mean=1, stdev=1, scope=1)) + 0.7 * (Gaussian(
                mean=2, stdev=1, scope=0) * Gaussian(mean=3, stdev=1, scope=1))

        self.check_obj_and_reconstruction(root)
Beispiel #7
0
    def test_ll_matrix(self):
        add_node_likelihood(Leaf, sum_and_multiplier_ll)

        node_1_1_1_1 = leaf(2, 1)
        node_1_1_1_2 = leaf(2, 2)
        node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2
        node_1_1_2 = leaf([0, 1], 3)
        node_1_1 = node_1_1_1 * node_1_1_2
        node_1_2_1_1_1 = leaf(0, 5)
        node_1_2_1_1_2 = leaf(1, 4)
        node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2
        node_1_2_1_2 = leaf([0, 1], 6)
        node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2
        node_1_2_2 = leaf(2, 3)
        node_1_2 = node_1_2_1 * node_1_2_2
        spn = 0.4 * node_1_1 + 0.6 * node_1_2

        assign_ids(spn)

        max_id = max([n.id for n in get_nodes_by_type(spn)])

        data = np.random.rand(10, 10)

        node_1_1_1_1_r = data[:, 2] * 1
        node_1_1_1_2_r = data[:, 2] * 2
        node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r
        node_1_1_2_r = 3 * (data[:, 0] + data[:, 1])
        node_1_1_r = node_1_1_1_r * node_1_1_2_r
        node_1_2_1_1_1_r = data[:, 0] * 5
        node_1_2_1_1_2_r = data[:, 1] * 4
        node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r
        node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1])
        node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r
        node_1_2_2_r = data[:, 2] * 3
        node_1_2_r = node_1_2_1_r * node_1_2_2_r
        spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r

        self.assert_correct(spn, data, spn_r)

        lls = np.zeros((data.shape[0], max_id + 1))
        likelihood(spn, data, lls_matrix=lls)
        llls = np.zeros((data.shape[0], max_id + 1))
        log_likelihood(spn, data, lls_matrix=llls)

        self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls))))

        self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
Beispiel #8
0
def marginalize(node, keep, light=False):
    # keep must be a set of features that you want to keep
    # Loc.enter()
    keep = set(keep)

    # Loc.p('keep:', keep)

    def marg_recursive(node):
        # Loc.enter()
        new_node_scope = keep.intersection(set(node.scope))
        # Loc.p("new_node_scope:", new_node_scope)
        if len(new_node_scope) == 0:
            # we are summing out this node
            # Loc.leave(None)
            return None

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception("Leaf Node with |scope| > 1")
            # Loc.leave('Leaf.deepcopy()')
            if light:
                return node
            return copy.deepcopy(node)

        newNode = node.__class__()
        newNode.cardinality = node.cardinality

        if isinstance(node, Sum):
            newNode.weights.extend(node.weights)
            if not light:
                newNode.cluster_centers.extend(node.cluster_centers)
        if isinstance(node, Product):
            if hasattr(node, 'binary_bloom_filters'):
                newNode.binary_bloom_filters = node.binary_bloom_filters

        for c in node.children:
            new_c = marg_recursive(c)
            if new_c is None:
                continue
            newNode.children.append(new_c)

        newNode.scope.extend(new_node_scope)

        # Loc.leave()
        return newNode

    newNode = marg_recursive(node)

    if not light:
        assign_ids(newNode)
        newNode = Prune(newNode, light=light)

        valid, err = is_valid(newNode, light=light)
        assert valid, err
    # Loc.leave()
    return newNode
    def test_sum(self):
        spn = Product()
        for s in range(7):
            spn.children.append(Leaf(scope=s))

        assign_ids(spn)
        rebuild_scopes_bottom_up(spn)

        new_spn = SPN_Reshape(spn, 2)

        print(spn)
Beispiel #10
0
def generate_code(spn_id, spn, meta_types, floating_data_type):
    """
    Generates inference code for an SPN
    :param target_path: the path the generated C++ code is written to
    :param floating_data_type: data type floating numbers are represented in generated C++ code
    :param spn: root node of an SPN
    :return: code string
    """

    # make sure we have ids
    assign_ids(spn)

    # fill method body according to SPN structure
    method_body = generate_method_body(spn, spn, floating_data_type, 0)

    # build parameters used in generated c++ function
    method_params = []
    passed_params = []
    for i, type in enumerate(meta_types):
        if type == MetaType.DISCRETE:
            method_params += [
                f'vector <int> possibleValues{i}', f'int nullValueIdx{i}'
            ]
            passed_params += [
                f'py::arg("possibleValues{i}")', f'py::arg("nullValueIdx{i}")'
            ]
        elif type == MetaType.REAL:
            method_params += [
                f'bool inverse{i}', f'bool leftMinusInf{i}',
                f'float leftCondition{i}', f'bool rightMinusInf{i}',
                f'float rightCondition{i}', f'bool leftIncluded{i}',
                f'bool rightIncluded{i}', f'float nullValue{i}'
            ]
            passed_params += [
                f'py::arg("inverse{i}")', f'py::arg("leftMinusInf{i}")',
                f'py::arg("leftCondition{i}")', f'py::arg("rightMinusInf{i}")',
                f'py::arg("rightCondition{i}")', f'py::arg("leftIncluded{i}")',
                f'py::arg("rightIncluded{i}")', f'py::arg("nullValue{i}")'
            ]

    value_dictionary = {
        'spn_id': spn_id,
        'method_body': method_body,
        'method_params': ', '.join(method_params),
        'node_count': get_number_of_nodes(spn),
        'passed_params': ', '.join(passed_params),
        'floating_data_type': floating_data_type
    }
    generated_method = replace_template(TemplatePath.METHOD_MASTER,
                                        value_dictionary, 0)
    registrate_method = replace_template(TemplatePath.REGISTRATION_MASTER,
                                         value_dictionary, 0)

    return generated_method, registrate_method
def create_conj(data, scope, alpha):

    conj = Product(children=[
        Bernoulli(scope=[scope[k]],
                  p=(data[0][k] * data.shape[0] + alpha) /
                  (data.shape[0] + 2 * alpha)) for k in range(len(scope))
    ])

    assign_ids(conj)
    rebuild_scopes_bottom_up(conj)

    return conj
Beispiel #12
0
def SPN_Reshape(node, max_children=2):
    v, err = is_valid(node)
    assert v, err
    nodes = get_nodes_by_type(node, (Product, Sum))

    while len(nodes) > 0:
        n = nodes.pop()

        if len(n.children) <= max_children:
            continue

        # node has more than 2 nodes, create binary hierarchy
        new_children = []
        new_weights = []
        for i in range(0, len(n.children), max_children):
            children = n.children[i:i + max_children]

            if len(children) > 1:
                if isinstance(n, Product):
                    newChild = Product()
                    for c in children:
                        newChild.scope.extend(c.scope)
                    newChild.children.extend(children)
                    new_children.append(newChild)
                else:  # Sum
                    weights = n.weights[i:i + max_children]
                    branch_weight = sum(weights)
                    new_weights.append(branch_weight)

                    newChild = Sum()
                    newChild.scope.extend(children[0].scope)
                    newChild.children.extend(children)
                    newChild.weights.extend(
                        [w / branch_weight for w in weights])
                    newChild.weights[0] = 1.0 - sum(newChild.weights[1:])
                    new_children.append(newChild)
            else:
                new_children.extend(children)

                if isinstance(n, Sum):
                    new_weights.append(1.0 - sum(new_weights))

        n.children = new_children
        if isinstance(n, Sum):
            n.weights = new_weights
        nodes.append(n)

    assign_ids(node)
    v, err = is_valid(node)
    assert v, err
    return node
Beispiel #13
0
def learn_classifier(data, ds_context, spn_learn_wrapper, label_idx, **kwargs):
    spn = Sum()
    for label, count in zip(*np.unique(data[:, label_idx], return_counts=True)):
        branch = spn_learn_wrapper(data[data[:, label_idx] == label, :], ds_context, **kwargs)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)
    assign_ids(spn)

    valid, err = is_valid(spn)
    assert valid, "invalid spn: " + err

    return spn
Beispiel #14
0
def condition(spn, evidence):
    scope = set(
        [i for i in range(len(spn.scope)) if not np.isnan(evidence)[0][i]])
    node_conditions = {
        type(leaf): leaf_condition
        for leaf in get_nodes_by_type(spn, Leaf)
    }
    node_conditions.update({Sum: sum_condition, Product: prod_condition})

    new_root, val = eval_spn_bottom_up(spn,
                                       node_conditions,
                                       input_vals=evidence,
                                       scope=scope)
    assign_ids(new_root)
    return Prune(new_root)
def create_naive_fact(data, scope, alpha):
    """
    It returns a naive factorization of the data.
    Laplace's correction is not needed, but if not used may cause underflow.
    """

    probs = (np.sum(data, axis=0) + alpha) / (data.shape[0] + 2 * alpha)

    naive_fact = Product(children=[
        Bernoulli(p=probs[k], scope=[scope[k]]) for k in range(len(scope))
    ])

    assign_ids(naive_fact)
    rebuild_scopes_bottom_up(naive_fact)

    return naive_fact
Beispiel #16
0
    def test_correct_parameters(self):
        node_1_2_2 = Leaf(0)
        node_1_2_1 = Leaf(1)
        node_1_1 = Leaf([0, 1])
        node_1_2 = node_1_2_1 * node_1_2_2
        spn = 0.1 * node_1_1 + 0.9 * node_1_2
        node_1_2.id = 0

        rand_gen = RandomState(1234)
        with self.assertRaises(AssertionError):
            mpe(spn, rand_gen.rand(10, 3))

        assign_ids(spn)
        node_1_2_2.id += 1

        with self.assertRaises(AssertionError):
            mpe(spn, rand_gen.rand(10, 3))
Beispiel #17
0
    def test_torch_vs_tf_time(self):
        # Create sample data
        from sklearn.datasets.samples_generator import make_blobs
        import tensorflow as tf
        from time import time

        X, y = make_blobs(n_samples=10,
                          centers=3,
                          n_features=2,
                          random_state=0)
        X = X.astype(np.float32)

        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])
        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Convert
        tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X)
        torch_spn = SumNode.from_spn(s)

        # Optimizer
        lr = 0.001
        tf_optim = tf.train.AdamOptimizer(lr)
        torch_optim = optim.Adam(torch_spn.parameters(), lr)

        t0 = time()
        epochs = 10
        optimize_tf_graph(tf_spn,
                          variable_dict,
                          data_placeholder,
                          X,
                          epochs=epochs,
                          optimizer=tf_optim)
        t1 = time()
        optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim)
        t2 = time()

        print("Tensorflow took: ", t1 - t0)
        print("PyTorch took: ", t2 - t1)
Beispiel #18
0
def Prune(node):
    v, err = is_valid(node)
    assert v, err
    nodes = get_nodes_by_type(node, (Product, Sum, Max))

    while len(nodes) > 0:
        n = nodes.pop()

        n_type = type(n)
        is_sum = n_type == Sum

        i = 0
        while i < len(n.children):
            c = n.children[i]

            # if my children has only one node, we can get rid of it
            # and link directly to that grandchildren
            if not (isinstance(c, Leaf) or isinstance(c, Max)) and \
                    len(c.children) == 1:

                n.children[i] = c.children[0]
                continue

            if n_type == type(c):
                del n.children[i]
                n.children.extend(c.children)

                if is_sum:
                    w = n.weights[i]
                    del n.weights[i]

                    n.weights.extend([cw * w for cw in c.weights])
                continue

            i += 1
        if is_sum and i > 0:
            n.weights[0] = 1.0 - sum(n.weights[1:])

    if isinstance(node, (Product, Sum)) and len(node.children) == 1:
        node = node.children[0]

    assign_ids(node)
    v, err = is_valid(node)
    assert v, err
    return node
def Prune(node, contract_single_parents=True, ds_context=None):
    v, err = is_valid(node)
    assert v, err
    nodes = get_nodes_by_type(node, (Product, Sum))

    while len(nodes) > 0:
        n = nodes.pop()

        n_type = type(n)
        is_sum = n_type == Sum

        i = 0
        while i < len(n.children):
            c = n.children[i]

            # if my children has only one node, we can get rid of it and link directly to that grandchildren
            if contract_single_parents and not isinstance(c, Leaf) and len(
                    c.children) == 1:
                n.children[i] = c.children[0]
                continue

            if n_type == type(c):
                del n.children[i]
                n.children.extend(c.children)

                if is_sum:
                    w = n.weights[i]
                    del n.weights[i]
                    # #merge rules
                    # n.rule = n.rule.merge(c.rule, ds_context)
                    n.weights.extend([cw * w for cw in c.weights])
                continue

            i += 1
        if is_sum and i > 0:
            n.weights[0] = 1.0 - sum(n.weights[1:])

    if contract_single_parents and isinstance(node, (Product, Sum)) and len(
            node.children) == 1:
        node = node.children[0]

    assign_ids(node)
    v, err = is_valid(node)
    assert v, err
    return node
Beispiel #20
0
def Compress(node):
    all_parents = get_parents(node)

    cache = {}

    for n in get_topological_order(node):

        params = (n.parameters, tuple(sorted(n.scope)))

        cached_node = cache.get(params, None)
        if cached_node is None:
            cache[params] = n
        else:
            for parent, pos in all_parents[n]:
                parent.children[pos] = cached_node

    assign_ids(node)
    val, msg = is_valid(node)
    assert val, msg
    return node
Beispiel #21
0
def complete_layers(layer_nodes, current_node_type=Sum, depth=None):
    # all leaves should be at same depth
    root_layer = False
    if depth is None:
        root_layer = True
        depth = get_depth(layer_nodes[0])

    if depth == 2:
        return

    children_layer = []
    if current_node_type == Sum:
        for i in range(len(layer_nodes)):
            n = layer_nodes[i]
            assert isinstance(n, Sum)
            for j in range(len(n.children)):
                c = n.children[j]
                if not isinstance(c, Product):
                    n.children[j] = Product([c])
            children_layer.extend(n.children)
        children_layer_type = Product
    elif current_node_type == Product:
        for i in range(len(layer_nodes)):
            n = layer_nodes[i]
            assert isinstance(n, Product)
            for j in range(len(n.children)):
                c = n.children[j]
                if not isinstance(c, Sum):
                    n.children[j] = Sum([1.0], [c])
            children_layer.extend(n.children)
        children_layer_type = Sum
    else:
        raise Exception('node type' + str(current_node_type))

    complete_layers(children_layer,
                    current_node_type=children_layer_type,
                    depth=depth - 1)

    if root_layer:
        rebuild_scopes_bottom_up(layer_nodes[0])
        assign_ids(layer_nodes[0])
Beispiel #22
0
    def __init__(self):
        p0 = Product(children=[
            Categorical(p=[0.3, 0.7], scope=1),
            Categorical(p=[0.4, 0.6], scope=2)
        ])
        p1 = Product(children=[
            Categorical(p=[0.5, 0.5], scope=1),
            Categorical(p=[0.6, 0.4], scope=2)
        ])
        s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
        p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
        p3 = Product(children=[
            Categorical(p=[0.2, 0.8], scope=0),
            Categorical(p=[0.3, 0.7], scope=1)
        ])
        p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])

        self.spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

        assign_ids(self.spn)
        rebuild_scopes_bottom_up(self.spn)
def marginalize(node, keep):
    #keep must be a set of features that you want to keep
    
    keep = set(keep)

    def marg_recursive(node):
        new_node_scope = keep.intersection(set(node.scope))

        if len(new_node_scope) == 0:
            # we are summing out this node
            return None

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception('Leaf Node with |scope| > 1')

            return deepcopy(node)

        newNode = node.__class__()

        if isinstance(node, Sum):
            newNode.weights.extend(node.weights)

        for c in node.children:
            new_c = marg_recursive(c)
            if new_c is None:
                continue
            newNode.children.append(new_c)

        newNode.scope.extend(new_node_scope)
        return newNode


    newNode = marg_recursive(node)
    assign_ids(newNode)
    newNode = Prune(newNode)
    valid, err = is_valid(newNode)
    assert valid, err

    return newNode
Beispiel #24
0
    def test_equal_to_tf(self):
        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])

        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Test for 100 random samples
        data = np.random.randn(100, 2)

        # LL from SPN
        ll = log_likelihood(s, data)

        # PyTorch implementation
        g00 = GaussianNode(mean=0.0, std=1.0, scope=0)
        g10 = GaussianNode(mean=1.0, std=2.0, scope=1)
        g01 = GaussianNode(mean=3.0, std=2.0, scope=0)
        g11 = GaussianNode(mean=5.0, std=1.0, scope=1)
        p0 = ProductNode(children=[g00, g10])
        p1 = ProductNode(children=[g01, g11])
        rootnode = SumNode(weights=[0.2, 0.8], children=[p0, p1])

        datatensor = torch.Tensor(data)
        # LL from pytorch
        ll_torch = rootnode(datatensor)

        # Assert equality
        self.assertTrue(
            np.isclose(np.array(ll).squeeze(),
                       ll_torch.detach().numpy(),
                       atol=DELTA).all())
 def _serialize_model(self, model):
     msg = spflow_capnp.Model.new_message()
     assert is_valid(model.root), "SPN invalid before serialization"
     # Assign (new) IDs to the nodes
     # Keep track of already assigned IDs, so the IDs are 
     # unique for the whole file.
     assign_ids(model.root, self.assignedIDs)
     # Rebuild scopes bottom-up
     rebuild_scopes_bottom_up(model.root)
     msg.rootNode = model.root.id
     msg.numFeatures = len(model.root.scope)
     msg.featureType = model.featureType
     scope = msg.init("scope", len(model.root.scope))
     for i,v in enumerate(model.root.scope):
         scope[i] = self._unwrap_value(v)
     name = ""
     if model.name is not None:
         name = model.name
     msg.name = name
     numNodes = get_number_of_nodes(model.root)
     nodes = msg.init("nodes", numNodes)
     nodeList = ListHandler(nodes)
     self._serialize_graph([model.root], nodeList)
     return msg
Beispiel #26
0
def Prune(node, check_cluster_centers=False):
    """
    Prunes spn. Ensures that nodes have at least one child and that types of node and children differ.
    Adapts weigths and optionally bloom filters accordingly.
    :param node:
    :return:
    """

    # v, err = is_valid(node)
    # assert v, err
    nodes = get_nodes_by_type(node, (Product, Sum))

    while len(nodes) > 0:
        n = nodes.pop()

        n_type = type(n)
        is_sum = n_type == Sum

        i = 0
        while i < len(n.children):
            c = n.children[i]

            # if my child has only one node, we can get rid of it and link directly to that grandchildren
            # in this case, no bloom filters can be lost since we do not split
            if not isinstance(c, Leaf) and len(c.children) == 1:
                n.children[i] = c.children[0]
                continue

            # if the type is similar to the type of the child
            if n_type == type(c):

                if is_sum:
                    # cluster centers learned?
                    if len(n.cluster_centers) > 0:
                        old_len = len(n.cluster_centers)
                        len_child_cluster = len(c.cluster_centers)
                        del n.cluster_centers[i]
                        n.cluster_centers.extend(c.cluster_centers)

                        if check_cluster_centers:
                            assert old_len - 1 + len_child_cluster == len(
                                n.cluster_centers
                            ), "cluster_center length mismatch, node " + n + c

                del n.children[i]
                n.children.extend(c.children)

                if is_sum:
                    w = n.weights[i]
                    del n.weights[i]

                    n.weights.extend([cw * w for cw in c.weights])

                continue

            i += 1
        if is_sum and i > 0:
            n.weights[0] = 1.0 - sum(n.weights[1:])

    if isinstance(node, (Product, Sum)) and len(node.children) == 1:
        node = node.children[0]

    assign_ids(node)
    v, err = is_valid(node, check_cluster_centers=check_cluster_centers)
    assert v, err

    return node
Beispiel #27
0
def learn_structure(
    dataset,
    ds_context,
    split_rows,
    split_cols,
    create_leaf,
    next_operation=get_next_operation(),
    initial_scope=None,
    data_slicer=default_slicer,
):
    assert dataset is not None
    assert ds_context is not None
    assert split_rows is not None
    assert split_cols is not None
    assert create_leaf is not None
    assert next_operation is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))
        num_conditional_cols = None
    elif len(initial_scope) < dataset.shape[1]:
        num_conditional_cols = dataset.shape[1] - len(initial_scope)
    else:
        num_conditional_cols = None
        assert len(initial_scope) > dataset.shape[
            1], "check initial scope: %s" % initial_scope

    tasks = deque()
    tasks.append((dataset, root, 0, initial_scope, False, False))

    while tasks:

        local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft(
        )

        operation, op_params = next_operation(
            local_data,
            scope,
            create_leaf,
            no_clusters=no_clusters,
            no_independencies=no_independencies,
            is_first=(parent is root),
        )

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(
            operation, local_data.shape, len(tasks)))

        if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            rest_scope = set(range(len(scope)))
            for col in op_params:
                rest_scope.remove(col)
                node.children.append(None)
                tasks.append((
                    data_slicer(local_data, [col], num_conditional_cols),
                    node,
                    len(node.children) - 1,
                    [scope[col]],
                    True,
                    True,
                ))

            next_final = False

            if len(rest_scope) == 0:
                continue
            elif len(rest_scope) == 1:
                next_final = True

            node.children.append(None)
            c_pos = len(node.children) - 1

            rest_cols = list(rest_scope)
            rest_scope = [scope[col] for col in rest_scope]

            tasks.append((
                data_slicer(local_data, rest_cols, num_conditional_cols),
                node,
                c_pos,
                rest_scope,
                next_final,
                next_final,
            ))

            continue

        elif operation == Operation.SPLIT_ROWS:

            split_start_t = perf_counter()
            data_slices = split_rows(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, True, False))
                continue

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node
            # assert parent.scope == node.scope

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                node.weights.append(proportion)
                tasks.append((data_slice, node, len(node.children) - 1, scope,
                              False, False))

            continue

        elif operation == Operation.SPLIT_COLUMNS:
            split_start_t = perf_counter()
            data_slices = split_cols(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, False, True))
                assert np.shape(data_slices[0][0]) == np.shape(local_data)
                assert data_slices[0][1] == scope
                continue

            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, _ in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                tasks.append((data_slice, node, len(node.children) - 1,
                              scope_slice, False, False))

            continue

        elif operation == Operation.NAIVE_FACTORIZATION:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            local_tasks = []
            local_children_params = []
            split_start_t = perf_counter()
            for col in range(len(scope)):
                node.children.append(None)
                # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True))
                local_tasks.append(len(node.children) - 1)
                child_data_slice = data_slicer(local_data, [col],
                                               num_conditional_cols)
                local_children_params.append(
                    (child_data_slice, ds_context, [scope[col]]))

            result_nodes = pool.starmap(create_leaf, local_children_params)
            # result_nodes = []
            # for l in tqdm(local_children_params):
            #    result_nodes.append(create_leaf(*l))
            # result_nodes = [create_leaf(*l) for l in local_children_params]
            for child_pos, child in zip(local_tasks, result_nodes):
                node.children[child_pos] = child

            split_end_t = perf_counter()

            logging.debug(
                "\t\tnaive factorization {} columns (in {:.5f} secs)".format(
                    len(scope), split_end_t - split_start_t))

            continue

        elif operation == Operation.CREATE_LEAF:
            leaf_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            leaf_end_t = perf_counter()

            logging.debug(
                "\t\t created leaf {} for scope={} (in {:.5f} secs)".format(
                    node.__class__.__name__, scope, leaf_end_t - leaf_start_t))

        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node
def spn_for_evidence(spn,
                     evidence_ranges,
                     node_likelihood=None,
                     distribution_update_ranges=None):
    from spn.structure.Base import Sum, Product, Leaf, assign_ids
    from spn.algorithms.TransformStructure import Prune
    from spn.algorithms.Validity import is_valid
    from copy import deepcopy

    def spn_for_evidence_recursive(node):

        if isinstance(node, Leaf):
            if len(node.scope) > 1:
                raise Exception("Leaf Node with |scope| > 1")

            if evidence_ranges[node.scope[0]] is not None:
                t_node = type(node)
                if t_node in node_likelihood:
                    ranges = np.array([evidence_ranges])
                    prob = node_likelihood[t_node](
                        node, ranges, node_likelihood=node_likelihood)[0][0]
                    if prob == 0:
                        newNode = deepcopy(node)
                    else:
                        newNode = deepcopy(node)
                        distribution_update_ranges[t_node](
                            newNode, evidence_ranges[node.scope[0]])
                else:
                    raise Exception(
                        'No log-likelihood method specified for node type: ' +
                        str(type(node)))
            else:
                prob = 1
                newNode = deepcopy(node)

            return prob, newNode

        newNode = node.__class__()
        newNode.scope = node.scope

        if isinstance(node, Sum):
            new_weights = []
            new_childs = []

            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob = prob * node.weights[i]
                if new_prob > 0:
                    new_weights.append(new_prob)
                    new_childs.append(new_child)

            new_weights = np.array(new_weights)
            newNode.weights = new_weights / np.sum(new_weights)
            newNode.children = new_childs
            return np.sum(new_weights), newNode

        elif isinstance(node, Product):
            new_childs = []

            new_prob = 1.
            for i, c in enumerate(node.children):
                prob, new_child = spn_for_evidence_recursive(c)
                new_prob *= prob
                new_childs.append(new_child)

            newNode.children = new_childs
            return new_prob, newNode

    prob, newNode = spn_for_evidence_recursive(spn)
    assign_ids(newNode)
    newNode = Prune(newNode)
    valid, err = is_valid(newNode)
    assert valid, err

    return prob, newNode
def train_spn(window_size=3,
              min_instances_slice=10000,
              features=None,
              number_of_classes=3):
    if features is None:
        features = [20, 120]

    add_parametric_inference_support()
    add_parametric_text_support()

    data = get_data_in_window(window_size=window_size,
                              features=features,
                              three_classes=number_of_classes == 3)

    sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2,
                                                    train_size=0.8,
                                                    random_state=42)
    for train_index, test_index in sss.split(
            data[:, 0:window_size * window_size * len(features)],
            data[:, (window_size * window_size * len(features)) +
                 (int(window_size * window_size / 2))]):
        X_train, X_test = data[train_index], data[test_index]

    context_list = list()
    parametric_list = list()
    number_of_features = len(features)
    for _ in range(number_of_features * window_size * window_size):
        context_list.append(MetaType.REAL)
        parametric_list.append(Gaussian)

    for _ in range(window_size * window_size):
        context_list.append(MetaType.DISCRETE)
        parametric_list.append(Categorical)

    ds_context = Context(meta_types=context_list)
    ds_context.add_domains(data)
    ds_context.parametric_types = parametric_list

    spn = load_spn(window_size, features, min_instances_slice,
                   number_of_classes)
    if spn is None:
        spn = Sum()
        for class_pixel in tqdm(range(-window_size * window_size, 0)):
            for label, count in zip(
                    *np.unique(data[:, class_pixel], return_counts=True)):
                train_data = X_train[X_train[:, class_pixel] == label, :]
                branch = learn_parametric(
                    train_data,
                    ds_context,
                    min_instances_slice=min_instances_slice)
                spn.children.append(branch)
                spn.weights.append(train_data.shape[0])

        spn.scope.extend(branch.scope)
        spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist()

        assign_ids(spn)
        save_spn(spn, window_size, features, min_instances_slice,
                 number_of_classes)

    res = np.ndarray((X_test.shape[0], number_of_classes))

    for i in tqdm(range(number_of_classes)):
        tmp = X_test.copy()
        tmp[:, -int((window_size**2) / 2)] = i
        res[:, i] = log_likelihood(spn, tmp)[:, 0]

    predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1))

    correct_predicted = 0
    for x, y in zip(X_test[:, -5], predicted_classes):
        if x == y[0]:
            correct_predicted += 1
    accuracy = correct_predicted / X_test.shape[0]
    return spn, accuracy
Beispiel #30
0
    scope=[0],
    init_weights=b_lf_1_init_weights)
b_lf_2_init_weights = {Gaussian: 0.3, Gamma: 0.7}
# b_lf_2_init_weights = np.array([.3, .7])
b_fat_right_leaf_2, _priors = type_mixture_leaf_factory(
    leaf_type='pm',
    leaf_meta_type=MetaType.REAL,
    type_to_param_map=pm_continuous_param_map,
    scope=[1],
    init_weights=b_lf_2_init_weights)
l_r_prod.children = [b_fat_right_leaf_1, b_fat_right_leaf_2]

#
# composing
rebuild_scopes_bottom_up(root)
assign_ids(root)
print(root)
print(spn_to_str_equation(root))

global_W = compute_global_type_weights(root)
print('GLOBAL_W', global_W)

global_W = compute_global_type_weights(root, aggr_type=True)
print('GLOBAL_W', global_W)

gw_map = compute_leaf_global_mix_weights(root)
print('G MIX W', gw_map)

part_map = compute_partition_id_map(root)
print('PARTITION MAP', part_map)