コード例 #1
0
ファイル: basics.py プロジェクト: SwarajPawar/SPFlow
def create_SPN2():
    from spn.structure.Base import assign_ids
    from spn.structure.Base import rebuild_scopes_bottom_up

    from spn.algorithms.Validity import is_valid
    from spn.structure.leaves.parametric.Parametric import Categorical

    from spn.structure.Base import Sum, Product

    p0 = Product(children=[
        Categorical(p=[0.3, 0.7], scope=1),
        Categorical(p=[0.4, 0.6], scope=2)
    ])
    p1 = Product(children=[
        Categorical(p=[0.5, 0.5], scope=1),
        Categorical(p=[0.6, 0.4], scope=2)
    ])
    s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
    p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
    p3 = Product(children=[
        Categorical(p=[0.2, 0.8], scope=0),
        Categorical(p=[0.3, 0.7], scope=1)
    ])
    p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])
    spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

    assign_ids(spn)
    rebuild_scopes_bottom_up(spn)

    val, msg = is_valid(spn)
    assert val, msg

    return spn
コード例 #2
0
 def _deserialize_sum(self, node, node_map):
     child_ids = node.sum.children
     # Resolve references to child nodes by ID.
     children = [node_map.get(id) for id in child_ids]
     # Check all childs have been resolved.
     assert None not in children, "Child node ID could not be resolved"
     sum = Sum(children = children, weights=node.sum.weights)
     sum.id = node.id
     return sum
コード例 #3
0
def create_sum(data=None,
               node_id=0,
               parent_id=0,
               pos=0,
               context=None,
               scope=None,
               split_rows=None,
               split_on_sum=True,
               **kwargs):
    assert split_rows is not None, "No split_rows lambda"
    assert scope is not None, "No scope"

    result = []

    data_slices = split_rows(data, context, scope)

    if len(data_slices) == 1:
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": data,
                "parent_id": parent_id,
                "pos": pos,
                "no_clusters": True,
                "scope": scope,
            },
        ))
        return result

    node = Sum()
    node.scope.extend(scope)
    node.id = node_id
    # assert parent.scope == node.scope

    for data_slice, scope_slice, proportion in data_slices:
        assert isinstance(scope_slice, list), "slice must be a list"

        child_data = data
        if split_on_sum:
            child_data = data_slice

        node.children.append(None)
        node.weights.append(proportion)
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": child_data,
                "parent_id": node.id,
                "pos": len(node.children) - 1,
                "scope": scope,
            },
        ))

    return node, result
コード例 #4
0
    def tree_to_spn(tree, features):
        tnode = tree.data

        if tnode == "sumnode":
            node = Sum()
            for i in range(int(len(tree.children) / 2)):
                j = 2 * i
                w, c = tree.children[j], tree.children[j + 1]
                node.weights.append(float(w))
                node.children.append(tree_to_spn(c, features))
            return node

        if tnode == "prodnode":
            if len(tree.children) == 1:
                return tree_to_spn(tree.children[0], features)
            node = Product()
            for c in tree.children:
                node.children.append(tree_to_spn(c, features))
            return node

        if tnode in str_to_spn_lambdas:
            return str_to_spn_lambdas[tnode][0](tree, features,
                                                str_to_spn_lambdas[tnode][2],
                                                tree_to_spn)

        raise Exception('Node type not registered: ' + tnode)
コード例 #5
0
    def build_recursive(dep_tree, table_keys, scopes, attribute_owners, path_constraints=None, cache=None):
        if path_constraints is None:
            path_constraints = []

        new_node = Sum()
        for (table_names_keys, dep_node) in get_dependncy_keys(dep_tree, table_keys, attribute_owners,
                                                               path_constraints):

            for constraint_configuration, cached_node_count in get_constraint_values(table_names_keys, path_constraints,
                                                                                     cache):
                p_node = Product()
                new_node.children.append(p_node)
                count_value = 1

                for cached_node, node_count in cached_node_count:
                    p_node.children.append(cached_node)
                    count_value *= node_count

                for dep_children_node in dep_node.children:
                    if dep_children_node.name[0] == '@':
                        continue

                    node, count = build_recursive(dep_children_node, table_keys, scopes, attribute_owners,
                                                  path_constraints=constraint_configuration,
                                                  cache=cache)
                    p_node.children.append(node)
                    count_value *= count
                new_node.weights.append(count_value)

        wsum = np.sum(new_node.weights)
        # new_node.weights = [w / wsum for w in new_node.weights]

        return new_node, wsum
コード例 #6
0
def create_spflow_spn(n_feats):
    gaussians1 = []
    gaussians2 = []
    for i in range(n_feats):
        g1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        g2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        gaussians1.append(g1)
        gaussians2.append(g2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([gaussians1[i], gaussians1[i + 1]])
        p2 = Product([gaussians2[i], gaussians2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
コード例 #7
0
    def test_spn_to_str_and_back(self):
        self.check_obj_and_reconstruction(
            Categorical(p=[0.1, 0.2, 0.7], scope=0))

        self.check_obj_and_reconstruction(Gaussian(mean=0, stdev=10, scope=0))
        self.check_obj_and_reconstruction(
            Gaussian(mean=1.2, stdev=1.5, scope=0))

        self.check_obj_and_reconstruction(Gaussian(mean=-1.2, stdev=1,
                                                   scope=0))

        gamma = Gamma(alpha=1, beta=2, scope=0)
        lnorm = LogNormal(mean=1, stdev=2, scope=0)

        self.check_obj_and_reconstruction(gamma)

        self.check_obj_and_reconstruction(lnorm)

        root = Sum(children=[gamma, lnorm], weights=[0.2, 0.8])
        assign_ids(root)
        rebuild_scopes_bottom_up(root)
        self.check_obj_and_reconstruction(root)

        root = 0.3 * (Gaussian(mean=0, stdev=1, scope=0) * Gaussian(
            mean=1, stdev=1, scope=1)) + 0.7 * (Gaussian(
                mean=2, stdev=1, scope=0) * Gaussian(mean=3, stdev=1, scope=1))

        self.check_obj_and_reconstruction(root)
コード例 #8
0
def create_spflow_spn(n_feats, ctype=Gaussian):
    children1 = []
    children2 = []
    for i in range(n_feats):
        if ctype == Gaussian:
            c1 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
            c2 = Gaussian(np.random.randn(), np.random.rand(), scope=i)
        else:
            #c1 = Bernoulli(p=1.0, scope=i)
            #c2 = Bernoulli(p=1.0, scope=i)
            c1 = Bernoulli(p=np.random.rand(), scope=i)
            c2 = Bernoulli(p=np.random.rand(), scope=i)

        children1.append(c1)
        children2.append(c2)

    prods1 = []
    prods2 = []
    for i in range(0, n_feats, 2):
        p1 = Product([children1[i], children1[i + 1]])
        p2 = Product([children2[i], children2[i + 1]])
        prods1.append(p1)
        prods2.append(p2)

    sums = []
    for i in range(n_feats // 2):
        s = Sum(weights=[0.5, 0.5], children=[prods1[i], prods2[i]])
        sums.append(s)

    spflow_spn = Product(sums)
    assign_ids(spflow_spn)
    rebuild_scopes_bottom_up(spflow_spn)
    return spflow_spn
コード例 #9
0
def test_cpu_histogram():
    # Construct a minimal SPN.
    h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=0)
    h2 = Histogram([0., 3., 6., 8.], [0.35, 0.1, 0.55], [1, 1], scope=1)
    h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=0)
    h4 = Histogram([0., 5., 8.], [0.875, 0.125], [1, 1], scope=1)

    p0 = Product(children=[h1, h2])
    p1 = Product(children=[h3, h4])
    spn = Sum([0.3, 0.7], [p0, p1])

    inputs = np.column_stack((
        np.random.randint(2, size=30),
        np.random.randint(8, size=30),
    )).astype("float64")

    # Insert some NaN in random places into the input data.
    inputs.ravel()[np.random.choice(inputs.size, 5, replace=False)] = np.nan

    if not CUDACompiler.isAvailable():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CUDACompiler().log_likelihood(spn, inputs)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(
        np.isclose(np.exp(results), np.exp(reference)))
コード例 #10
0
    def test_naive_factorization(self):
        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)
        result = naive_factorization(data=data2,
                                     parent=parent,
                                     pos=0,
                                     context=ctx,
                                     scope=list(scope))

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(parent.children[0], result[0][1]['parent'])

        y, x = get_YX(data, 4)

        self.assertEqual(len(result), len(scope))
        for i, s in enumerate(scope):
            r = result[i]
            self.assertEqual(len(r), 2)
            self.assertEqual(r[0], SplittingOperations.CREATE_LEAF_NODE)
            self.assertEqual(type(r[1]['parent']), Product)
            self.assertEqual(r[1]['pos'], i)
            self.assertListEqual(r[1]['scope'], [s])
            self.assertListEqual(r[1]['data'].tolist(),
                                 concatenate_yx(y[:, i], x).tolist())
コード例 #11
0
def create_disj(data, scope, assignments, alpha):

    unq_data, counts = np.unique(data, axis=0, return_counts=True)
    probs = np.zeros(assignments.shape[0])
    for i in range(assignments.shape[0]):
        index = np.where(np.all(assignments[i] == unq_data, axis=1))[0]
        if len(index):
            probs[i] = counts[index[0]]
    probs = (probs + alpha) / (probs + alpha).sum()

    indicators = {
        var: [Bernoulli(scope=[var], p=0),
              Bernoulli(scope=[var], p=1)]
        for var in scope
    }

    prods = []
    for i in range(assignments.shape[0]):
        children = []
        for j in range(assignments.shape[1]):
            children.append(indicators[scope[j]][assignments[i, j]])
            # children.append(Bernoulli(scope=[scope[j]], p=assignments[i, j]))
        prods.append(Product(children=children))

    if len(prods) > 1:
        disj = Sum(children=prods, weights=probs)
    else:
        disj = prods[0]

    assign_ids(disj)
    rebuild_scopes_bottom_up(disj)

    return disj
def test_log_vector_histogram():
    # Construct a minimal SPN.
    h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=0)
    h2 = Histogram([0., 1., 2.], [0.45, 0.55], [1, 1], scope=1)
    h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=0)
    h4 = Histogram([0., 1., 2.], [0.875, 0.125], [1, 1], scope=1)

    p0 = Product(children=[h1, h2])
    p1 = Product(children=[h3, h4])
    spn = Sum([0.3, 0.7], [p0, p1])

    inputs = np.column_stack((
        np.random.randint(2, size=30),
        np.random.randint(2, size=30),
    )).astype("float64")

    if not CPUCompiler.isVectorizationSupported():
        print("Test not supported by the compiler installation")
        return 0

    # Execute the compiled Kernel.
    results = CPUCompiler(maxTaskSize=5).log_likelihood(spn, inputs, supportMarginal=False)

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(30)

    # Check the computation results against the reference
    # Check in normal space if log-results are not very close to each other.
    assert np.all(np.isclose(results, reference)) or np.all(np.isclose(np.exp(results), np.exp(reference)))
コード例 #13
0
def sum_condition(node, children, input_vals=None, scope=None):
    if not scope.intersection(node.scope):
        return Copy(node), 0
    new_node = Sum()
    new_node.scope = list(set(node.scope) - scope)
    new_weights = []
    probs = []
    for i, c in enumerate(children):
        if c[0]:
            new_node.children.append(c[0])
            new_weights.append(node.weights[i] * np.exp(c[1]))
        else:
            probs.append(node.weights[i] * np.exp(c[1]))
    new_node.weights = [w / sum(new_weights) for w in new_weights]
    assert np.all(np.logical_not(np.isnan(
        new_node.weights))), 'Found nan weights'
    if not new_node.scope:
        return None, np.log(sum(probs))
    return new_node, np.log(sum(new_weights))
コード例 #14
0
    def test_create_conditional(self):

        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)

        K = int(data.shape[0] * 0.25)
        split_idx = np.array([0] * K + [1] * (data.shape[0] - K))
        np.random.shuffle(split_idx)

        y, x = get_YX(data, 4)

        def label_conditional(local_y, local_x):
            self.assertListEqual(local_y.tolist(), y.tolist())
            self.assertListEqual(local_x.tolist(), x.tolist())
            return split_idx

        result = create_conditional(data=data2,
                                    parent=parent,
                                    pos=0,
                                    context=ctx,
                                    scope=list(scope),
                                    label_conditional=label_conditional)

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(len(result), 2)

        for i, r in enumerate(result):
            self.assertEqual(r[0], SplittingOperations.GET_NEXT_OP)
            self.assertIn('data', r[1])
            self.assertEqual(parent.children[0], r[1]['parent'])
            self.assertEqual(r[1]['pos'], i)
            self.assertListEqual(scope, r[1]['scope'])
            self.assertEqual(r[1]['data'].shape[1], data.shape[1])

        conditional_node = result[0][1]['parent']

        child_idx = conditional_supervised_likelihood(
            conditional_node,
            [np.zeros((data.shape[0], 1)),
             np.ones((data.shape[0], 1))], data)

        self.assertListEqual(result[0][1]['data'].tolist(),
                             data[child_idx[:, 0] == 0, :].tolist())
        self.assertListEqual(result[1][1]['data'].tolist(),
                             data[child_idx[:, 0] == 1, :].tolist())
コード例 #15
0
    def __init__(self):
        p0 = Product(children=[
            Categorical(p=[0.3, 0.7], scope=1),
            Categorical(p=[0.4, 0.6], scope=2)
        ])
        p1 = Product(children=[
            Categorical(p=[0.5, 0.5], scope=1),
            Categorical(p=[0.6, 0.4], scope=2)
        ])
        s1 = Sum(weights=[0.3, 0.7], children=[p0, p1])
        p2 = Product(children=[Categorical(p=[0.2, 0.8], scope=0), s1])
        p3 = Product(children=[
            Categorical(p=[0.2, 0.8], scope=0),
            Categorical(p=[0.3, 0.7], scope=1)
        ])
        p4 = Product(children=[p3, Categorical(p=[0.4, 0.6], scope=2)])

        self.spn = Sum(weights=[0.4, 0.6], children=[p2, p4])

        assign_ids(self.spn)
        rebuild_scopes_bottom_up(self.spn)
コード例 #16
0
def SPN_Reshape(node, max_children=2):
    v, err = is_valid(node)
    assert v, err
    nodes = get_nodes_by_type(node, (Product, Sum))

    while len(nodes) > 0:
        n = nodes.pop()

        if len(n.children) <= max_children:
            continue

        # node has more than 2 nodes, create binary hierarchy
        new_children = []
        new_weights = []
        for i in range(0, len(n.children), max_children):
            children = n.children[i:i + max_children]

            if len(children) > 1:
                if isinstance(n, Product):
                    newChild = Product()
                    for c in children:
                        newChild.scope.extend(c.scope)
                    newChild.children.extend(children)
                    new_children.append(newChild)
                else:  # Sum
                    weights = n.weights[i:i + max_children]
                    branch_weight = sum(weights)
                    new_weights.append(branch_weight)

                    newChild = Sum()
                    newChild.scope.extend(children[0].scope)
                    newChild.children.extend(children)
                    newChild.weights.extend(
                        [w / branch_weight for w in weights])
                    newChild.weights[0] = 1.0 - sum(newChild.weights[1:])
                    new_children.append(newChild)
            else:
                new_children.extend(children)

                if isinstance(n, Sum):
                    new_weights.append(1.0 - sum(new_weights))

        n.children = new_children
        if isinstance(n, Sum):
            n.weights = new_weights
        nodes.append(n)

    assign_ids(node)
    v, err = is_valid(node)
    assert v, err
    return node
コード例 #17
0
def learn_classifier(data, ds_context, spn_learn_wrapper, label_idx, **kwargs):
    spn = Sum()
    for label, count in zip(*np.unique(data[:, label_idx], return_counts=True)):
        branch = spn_learn_wrapper(data[data[:, label_idx] == label, :], ds_context, **kwargs)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)
    assign_ids(spn)

    valid, err = is_valid(spn)
    assert valid, "invalid spn: " + err

    return spn
コード例 #18
0
    def test_create_sum_with_split(self):
        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)

        K = int(data.shape[0] * 0.25)
        split_idx = np.array([0] * K + [1] * (data.shape[0] - K))
        np.random.shuffle(split_idx)

        def split_rows(data, context, scope):
            result = []
            result.append((data[split_idx == 0, :], scope, 0.25))
            result.append((data[split_idx == 1, :], scope, 0.75))
            return result

        result = create_sum(data=data2,
                            parent=parent,
                            pos=0,
                            context=ctx,
                            scope=list(scope),
                            split_rows=split_rows,
                            split_on_sum=True)

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(len(result), 2)
        for i, r in enumerate(result):
            self.assertEqual(r[0], SplittingOperations.GET_NEXT_OP)
            self.assertIn('data', r[1])
            self.assertEqual(parent.children[0], r[1]['parent'])
            self.assertEqual(r[1]['pos'], i)
            self.assertListEqual(scope, r[1]['scope'])
            self.assertEqual(r[1]['data'].shape[1], data.shape[1])
            self.assertEqual(r[1]['data'].shape[0],
                             int(np.sum(split_idx == i)))

        self.assertListEqual(result[0][1]['data'].tolist(),
                             data[split_idx == 0, :].tolist())
        self.assertListEqual(result[1][1]['data'].tolist(),
                             data[split_idx == 1, :].tolist())
        self.assertAlmostEqual(np.sum(parent.children[0].weights), 1.0)
コード例 #19
0
    def test_torch_vs_tf_time(self):
        # Create sample data
        from sklearn.datasets.samples_generator import make_blobs
        import tensorflow as tf
        from time import time

        X, y = make_blobs(n_samples=10,
                          centers=3,
                          n_features=2,
                          random_state=0)
        X = X.astype(np.float32)

        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])
        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Convert
        tf_spn, data_placeholder, variable_dict = spn_to_tf_graph(s, data=X)
        torch_spn = SumNode.from_spn(s)

        # Optimizer
        lr = 0.001
        tf_optim = tf.train.AdamOptimizer(lr)
        torch_optim = optim.Adam(torch_spn.parameters(), lr)

        t0 = time()
        epochs = 10
        optimize_tf_graph(tf_spn,
                          variable_dict,
                          data_placeholder,
                          X,
                          epochs=epochs,
                          optimizer=tf_optim)
        t1 = time()
        optimize_torch(torch_spn, X, epochs=epochs, optimizer=torch_optim)
        t2 = time()

        print("Tensorflow took: ", t1 - t0)
        print("PyTorch took: ", t2 - t1)
コード例 #20
0
    def test_remove_non_informative_features(self):
        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)
        data[:, 1] = 1
        data[:, 3] = 3

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)

        y, x = get_YX(data, 4)

        uninformative_features_idx = np.var(y, 0) == 0
        result = remove_non_informative_features(
            data=data2,
            parent=parent,
            pos=0,
            context=ctx,
            scope=list(scope),
            uninformative_features_idx=uninformative_features_idx)

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(len(parent.children[0].children), len(result))

        resulting_scopes = [[3], [6], [1, 4]]
        resulting_data_y = [y[:, 1], y[:, 3], y[:, [0, 2]]]

        for i, r in enumerate(result):
            self.assertEqual(len(r), 2)
            self.assertEqual(type(r[1]['parent']), Product)
            self.assertEqual(parent.children[0], r[1]['parent'])
            self.assertListEqual(r[1]['scope'], resulting_scopes[i])
            self.assertEqual(r[1]['pos'], i)

            self.assertListEqual(
                r[1]['data'].tolist(),
                concatenate_yx(resulting_data_y[i], x).tolist())
コード例 #21
0
def test_vector_slp_mini():
    g0 = Gaussian(mean=0.13, stdev=0.5, scope=0)
    g1 = Gaussian(mean=0.14, stdev=0.25, scope=2)
    g2 = Gaussian(mean=0.11, stdev=1.0, scope=3)
    g3 = Gaussian(mean=0.12, stdev=0.75, scope=1)

    spn = Sum(children=[g0, g1, g2, g3], weights=[0.2, 0.4, 0.1, 0.3])

    # Randomly sample input values from Gaussian (normal) distributions.
    num_samples = 100
    inputs = np.column_stack(
        (np.random.normal(loc=0.5, scale=1, size=num_samples),
         np.random.normal(loc=0.125, scale=0.25, size=num_samples),
         np.random.normal(loc=0.345, scale=0.24, size=num_samples),
         np.random.normal(loc=0.456, scale=0.1,
                          size=num_samples))).astype("float64")

    # Compute the reference results using the inference from SPFlow.
    reference = log_likelihood(spn, inputs)
    reference = reference.reshape(num_samples)

    # Compile the kernel with batch size 1 to enable SLP vectorization.
    compiler = CPUCompiler(vectorize=True,
                           computeInLogSpace=True,
                           vectorLibrary="LIBMVEC")
    kernel = compiler.compile_ll(spn=spn, batchSize=1, supportMarginal=False)

    # Execute the compiled Kernel.
    time_sum = 0
    for i in range(len(reference)):
        # Check the computation results against the reference
        start = time.time()
        result = compiler.execute(kernel, inputs=np.array([inputs[i]]))
        time_sum = time_sum + time.time() - start
        print(
            f"evaluation #{i}: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}",
            end='\r')
        if not np.isclose(result, reference[i]):
            print(
                f"\nevaluation #{i} failed: result: {result[0]:16.8f}, reference: {reference[i]:16.8f}"
            )
            raise AssertionError()
    print(f"\nExecution of {len(reference)} samples took {time_sum} seconds.")
コード例 #22
0
    def test_spn_to_torch(self):
        # SPFLow implementation
        n0 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        n1 = Categorical(p=[0.1, 0.3, 0.6])
        n2 = Sum(weights=[0.1, 0.2, 0.3, 0.4], children=[n0, n1])
        n3 = Product(children=[n0, n1])

        torch_n0 = GaussianNode.from_spn(n0)
        torch_n1 = CategoricalNode.from_spn(n1)
        torch_n2 = SumNode.from_spn(n2)
        torch_n3 = ProductNode.from_spn(n3)

        self.assertEqual(torch_n0.mean, n0.mean)
        self.assertEqual(torch_n0.std, n0.stdev)
        self.assertTrue(
            np.isclose(torch_n1.p.detach().numpy(), n1.p, atol=DELTA).all())
        self.assertTrue(
            np.isclose(torch_n2.weights.detach().numpy(),
                       n2.weights,
                       atol=DELTA).all())
コード例 #23
0
def complete_layers(layer_nodes, current_node_type=Sum, depth=None):
    # all leaves should be at same depth
    root_layer = False
    if depth is None:
        root_layer = True
        depth = get_depth(layer_nodes[0])

    if depth == 2:
        return

    children_layer = []
    if current_node_type == Sum:
        for i in range(len(layer_nodes)):
            n = layer_nodes[i]
            assert isinstance(n, Sum)
            for j in range(len(n.children)):
                c = n.children[j]
                if not isinstance(c, Product):
                    n.children[j] = Product([c])
            children_layer.extend(n.children)
        children_layer_type = Product
    elif current_node_type == Product:
        for i in range(len(layer_nodes)):
            n = layer_nodes[i]
            assert isinstance(n, Product)
            for j in range(len(n.children)):
                c = n.children[j]
                if not isinstance(c, Sum):
                    n.children[j] = Sum([1.0], [c])
            children_layer.extend(n.children)
        children_layer_type = Sum
    else:
        raise Exception('node type' + str(current_node_type))

    complete_layers(children_layer,
                    current_node_type=children_layer_type,
                    depth=depth - 1)

    if root_layer:
        rebuild_scopes_bottom_up(layer_nodes[0])
        assign_ids(layer_nodes[0])
def test_binary_serialization_roundtrip(tmpdir):
    """Tests the binary serialization for SPFlow SPNs by round-tripping 
    a simple SPN through serialization and de-serialization and comparing
    the graph-structure before and after serialization & de-serialization."""
    h1 = Histogram([0., 1., 2.], [0.25, 0.75], [1, 1], scope=1)
    h2 = Histogram([0., 1., 2.], [0.45, 0.55], [1, 1], scope=2)
    h3 = Histogram([0., 1., 2.], [0.33, 0.67], [1, 1], scope=1)
    h4 = Histogram([0., 1., 2.], [0.875, 0.125], [1, 1], scope=2)

    p0 = Product(children=[h1, h2])
    p1 = Product(children=[h3, h4])
    spn = Sum([0.3, 0.7], [p0, p1])

    model = SPNModel(spn, featureValueType="uint32")
    query = JointProbability(model)

    binary_file = os.path.join(tmpdir, "test.bin")
    print(f"Test binary file: {binary_file}")

    BinarySerializer(binary_file).serialize_to_file(query)

    deserialized = BinaryDeserializer(binary_file).deserialize_from_file()

    assert (isinstance(deserialized, JointProbability))
    assert (deserialized.batchSize == query.batchSize)
    assert (deserialized.errorModel.error == query.errorModel.error)
    assert (deserialized.errorModel.kind == query.errorModel.kind)
    assert (deserialized.graph.featureType == model.featureType)
    assert (deserialized.graph.name == model.name)

    deserialized = deserialized.graph.root
    assert get_number_of_nodes(spn) == get_number_of_nodes(deserialized)
    assert get_number_of_nodes(spn,
                               Sum) == get_number_of_nodes(deserialized, Sum)
    assert get_number_of_nodes(spn, Product) == get_number_of_nodes(
        deserialized, Product)
    assert get_number_of_nodes(spn, Histogram) == get_number_of_nodes(
        deserialized, Histogram)
    assert get_number_of_edges(spn) == get_number_of_edges(deserialized)
コード例 #25
0
    def test_equal_to_tf(self):
        # SPFLow implementation
        g00 = Gaussian(mean=0.0, stdev=1.0, scope=0)
        g10 = Gaussian(mean=1.0, stdev=2.0, scope=1)
        g01 = Gaussian(mean=3.0, stdev=2.0, scope=0)
        g11 = Gaussian(mean=5.0, stdev=1.0, scope=1)
        p0 = Product(children=[g00, g10])
        p1 = Product(children=[g01, g11])
        s = Sum(weights=[0.2, 0.8], children=[p0, p1])

        assign_ids(s)
        rebuild_scopes_bottom_up(s)

        # Test for 100 random samples
        data = np.random.randn(100, 2)

        # LL from SPN
        ll = log_likelihood(s, data)

        # PyTorch implementation
        g00 = GaussianNode(mean=0.0, std=1.0, scope=0)
        g10 = GaussianNode(mean=1.0, std=2.0, scope=1)
        g01 = GaussianNode(mean=3.0, std=2.0, scope=0)
        g11 = GaussianNode(mean=5.0, std=1.0, scope=1)
        p0 = ProductNode(children=[g00, g10])
        p1 = ProductNode(children=[g01, g11])
        rootnode = SumNode(weights=[0.2, 0.8], children=[p0, p1])

        datatensor = torch.Tensor(data)
        # LL from pytorch
        ll_torch = rootnode(datatensor)

        # Assert equality
        self.assertTrue(
            np.isclose(np.array(ll).squeeze(),
                       ll_torch.detach().numpy(),
                       atol=DELTA).all())
コード例 #26
0
def learn_structure(
    dataset,
    ds_context,
    split_rows,
    split_cols,
    create_leaf,
    next_operation=get_next_operation(),
    initial_scope=None,
    data_slicer=default_slicer,
):
    assert dataset is not None
    assert ds_context is not None
    assert split_rows is not None
    assert split_cols is not None
    assert create_leaf is not None
    assert next_operation is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))
        num_conditional_cols = None
    elif len(initial_scope) < dataset.shape[1]:
        num_conditional_cols = dataset.shape[1] - len(initial_scope)
    else:
        num_conditional_cols = None
        assert len(initial_scope) > dataset.shape[
            1], "check initial scope: %s" % initial_scope

    tasks = deque()
    tasks.append((dataset, root, 0, initial_scope, False, False))

    while tasks:

        local_data, parent, children_pos, scope, no_clusters, no_independencies = tasks.popleft(
        )

        operation, op_params = next_operation(
            local_data,
            scope,
            create_leaf,
            no_clusters=no_clusters,
            no_independencies=no_independencies,
            is_first=(parent is root),
        )

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(
            operation, local_data.shape, len(tasks)))

        if operation == Operation.REMOVE_UNINFORMATIVE_FEATURES:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            rest_scope = set(range(len(scope)))
            for col in op_params:
                rest_scope.remove(col)
                node.children.append(None)
                tasks.append((
                    data_slicer(local_data, [col], num_conditional_cols),
                    node,
                    len(node.children) - 1,
                    [scope[col]],
                    True,
                    True,
                ))

            next_final = False

            if len(rest_scope) == 0:
                continue
            elif len(rest_scope) == 1:
                next_final = True

            node.children.append(None)
            c_pos = len(node.children) - 1

            rest_cols = list(rest_scope)
            rest_scope = [scope[col] for col in rest_scope]

            tasks.append((
                data_slicer(local_data, rest_cols, num_conditional_cols),
                node,
                c_pos,
                rest_scope,
                next_final,
                next_final,
            ))

            continue

        elif operation == Operation.SPLIT_ROWS:

            split_start_t = perf_counter()
            data_slices = split_rows(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} row clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, True, False))
                continue

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node
            # assert parent.scope == node.scope

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                node.weights.append(proportion)
                tasks.append((data_slice, node, len(node.children) - 1, scope,
                              False, False))

            continue

        elif operation == Operation.SPLIT_COLUMNS:
            split_start_t = perf_counter()
            data_slices = split_cols(local_data, ds_context, scope)
            split_end_t = perf_counter()
            logging.debug("\t\tfound {} col clusters (in {:.5f} secs)".format(
                len(data_slices), split_end_t - split_start_t))

            if len(data_slices) == 1:
                tasks.append(
                    (local_data, parent, children_pos, scope, False, True))
                assert np.shape(data_slices[0][0]) == np.shape(local_data)
                assert data_slices[0][1] == scope
                continue

            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, _ in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.children.append(None)
                tasks.append((data_slice, node, len(node.children) - 1,
                              scope_slice, False, False))

            continue

        elif operation == Operation.NAIVE_FACTORIZATION:
            node = Product()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            local_tasks = []
            local_children_params = []
            split_start_t = perf_counter()
            for col in range(len(scope)):
                node.children.append(None)
                # tasks.append((data_slicer(local_data, [col], num_conditional_cols), node, len(node.children) - 1, [scope[col]], True, True))
                local_tasks.append(len(node.children) - 1)
                child_data_slice = data_slicer(local_data, [col],
                                               num_conditional_cols)
                local_children_params.append(
                    (child_data_slice, ds_context, [scope[col]]))

            result_nodes = pool.starmap(create_leaf, local_children_params)
            # result_nodes = []
            # for l in tqdm(local_children_params):
            #    result_nodes.append(create_leaf(*l))
            # result_nodes = [create_leaf(*l) for l in local_children_params]
            for child_pos, child in zip(local_tasks, result_nodes):
                node.children[child_pos] = child

            split_end_t = perf_counter()

            logging.debug(
                "\t\tnaive factorization {} columns (in {:.5f} secs)".format(
                    len(scope), split_end_t - split_start_t))

            continue

        elif operation == Operation.CREATE_LEAF:
            leaf_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            leaf_end_t = perf_counter()

            logging.debug(
                "\t\t created leaf {} for scope={} (in {:.5f} secs)".format(
                    node.__class__.__name__, scope, leaf_end_t - leaf_start_t))

        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node
コード例 #27
0
ファイル: myfilter1.py プロジェクト: twistedmove/SPN-Spk-Rec
        for z in range(10):
            data.append([x, y, z, int(((x + y + z) / 5))])
data = np.array(data).astype(np.float)
types = [
    MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE
]

ds_context = Context(meta_types=types)
ds_context.parametric_types = [Gaussian, Gaussian, Gaussian, Categorical]
ds_context.add_domains(data)

num_classes = len(np.unique(data[:, 3]))

#spn = learn_mspn(data, ds_context, min_instances_slice=10, leaves=create_leaf, threshold=0.3)

spn = Sum()
for label, count in zip(*np.unique(data[:, 3], return_counts=True)):
    branch = learn_mspn(data[data[:, 3] == label, :],
                        ds_context,
                        min_instances_slice=10,
                        leaves=create_leaf,
                        threshold=0.1)
    spn.children.append(branch)
    spn.weights.append(count / data.shape[0])

spn.scope.extend(branch.scope)

print("learned")

prediction = []
コード例 #28
0
def train_spn(window_size=3,
              min_instances_slice=10000,
              features=None,
              number_of_classes=3):
    if features is None:
        features = [20, 120]

    add_parametric_inference_support()
    add_parametric_text_support()

    data = get_data_in_window(window_size=window_size,
                              features=features,
                              three_classes=number_of_classes == 3)

    sss = sk.model_selection.StratifiedShuffleSplit(test_size=0.2,
                                                    train_size=0.8,
                                                    random_state=42)
    for train_index, test_index in sss.split(
            data[:, 0:window_size * window_size * len(features)],
            data[:, (window_size * window_size * len(features)) +
                 (int(window_size * window_size / 2))]):
        X_train, X_test = data[train_index], data[test_index]

    context_list = list()
    parametric_list = list()
    number_of_features = len(features)
    for _ in range(number_of_features * window_size * window_size):
        context_list.append(MetaType.REAL)
        parametric_list.append(Gaussian)

    for _ in range(window_size * window_size):
        context_list.append(MetaType.DISCRETE)
        parametric_list.append(Categorical)

    ds_context = Context(meta_types=context_list)
    ds_context.add_domains(data)
    ds_context.parametric_types = parametric_list

    spn = load_spn(window_size, features, min_instances_slice,
                   number_of_classes)
    if spn is None:
        spn = Sum()
        for class_pixel in tqdm(range(-window_size * window_size, 0)):
            for label, count in zip(
                    *np.unique(data[:, class_pixel], return_counts=True)):
                train_data = X_train[X_train[:, class_pixel] == label, :]
                branch = learn_parametric(
                    train_data,
                    ds_context,
                    min_instances_slice=min_instances_slice)
                spn.children.append(branch)
                spn.weights.append(train_data.shape[0])

        spn.scope.extend(branch.scope)
        spn.weights = (np.array(spn.weights) / sum(spn.weights)).tolist()

        assign_ids(spn)
        save_spn(spn, window_size, features, min_instances_slice,
                 number_of_classes)

    res = np.ndarray((X_test.shape[0], number_of_classes))

    for i in tqdm(range(number_of_classes)):
        tmp = X_test.copy()
        tmp[:, -int((window_size**2) / 2)] = i
        res[:, i] = log_likelihood(spn, tmp)[:, 0]

    predicted_classes = np.argmax(res, axis=1).reshape((X_test.shape[0], 1))

    correct_predicted = 0
    for x, y in zip(X_test[:, -5], predicted_classes):
        if x == y[0]:
            correct_predicted += 1
    accuracy = correct_predicted / X_test.shape[0]
    return spn, accuracy
コード例 #29
0
from spn.structure.leaves.parametric.Parametric import *
from spn.structure.StatisticalTypes import MetaType, Type
from spn.structure.leaves.parametric.Text import add_parametric_text_support
from spn.io.Text import to_JSON, spn_to_str_equation
from spn.structure.leaves.parametric.Inference import add_parametric_inference_support

#
# create an SPN over three random variables X_1, X_2, X_3
from spn.structure.leaves.typedleaves.Text import add_typed_leaves_text_support
from spn.structure.leaves.typedleaves.TypedLeaves import type_mixture_leaf_factory

add_typed_leaves_text_support()
add_parametric_inference_support()
#
# root is a sum
root = Sum()

#
# two product nodes
l_prod = Product()
r_prod = Product()
root.children = [l_prod, r_prod]
root.weights = np.array([0.75, 0.25])

#
# priors, but useless
pm_continuous_param_map = OrderedDict({
    Type.REAL:
    OrderedDict({Gaussian: {
        'params': {
            'mean': 5,
コード例 #30
0
def learn_structure_cnet(
    dataset,
    ds_context,
    conditioning,
    create_leaf,
    next_operation_cnet=get_next_operation_cnet(),
    initial_scope=None,
    data_slicer=default_slicer,
):
    assert dataset is not None
    assert ds_context is not None
    assert create_leaf is not None
    assert next_operation_cnet is not None

    root = Product()
    root.children.append(None)

    if initial_scope is None:
        initial_scope = list(range(dataset.shape[1]))

    tasks = deque()
    tasks.append((dataset, root, 0, initial_scope))

    while tasks:

        local_data, parent, children_pos, scope = tasks.popleft()

        operation, op_params = next_operation_cnet(local_data, scope)

        logging.debug("OP: {} on slice {} (remaining tasks {})".format(
            operation, local_data.shape, len(tasks)))

        if operation == Operation.CONDITIONING:
            from spn.algorithms.splitting.Base import split_data_by_clusters

            conditioning_start_t = perf_counter()

            col_conditioning, found_conditioning = conditioning(local_data)

            if not found_conditioning:
                node = create_leaf(local_data, ds_context, scope)
                parent.children[children_pos] = node

                continue

            clusters = (local_data[:, col_conditioning] == 1).astype(int)
            data_slices = split_data_by_clusters(local_data,
                                                 clusters,
                                                 scope,
                                                 rows=True)

            node = Sum()
            node.scope.extend(scope)
            parent.children[children_pos] = node

            for data_slice, scope_slice, proportion in data_slices:
                assert isinstance(scope_slice, list), "slice must be a list"

                node.weights.append(proportion)

                product_node = Product()
                node.children.append(product_node)
                node.children[-1].scope.extend(scope)

                right_data_slice = np.hstack(
                    (data_slice[:, :col_conditioning],
                     data_slice[:, (col_conditioning + 1):])).reshape(
                         data_slice.shape[0], data_slice.shape[1] - 1)
                product_node.children.append(None)
                tasks.append((
                    right_data_slice,
                    product_node,
                    len(product_node.children) - 1,
                    scope_slice[:col_conditioning] +
                    scope_slice[col_conditioning + 1:],
                ))

                left_data_slice = data_slice[:, col_conditioning].reshape(
                    data_slice.shape[0], 1)
                product_node.children.append(None)
                tasks.append((left_data_slice, product_node,
                              len(product_node.children) - 1,
                              [scope_slice[col_conditioning]]))

            conditioning_end_t = perf_counter()
            logging.debug("\t\tconditioning  (in {:.5f} secs)".format(
                conditioning_end_t - conditioning_start_t))

            continue

        elif operation == Operation.CREATE_LEAF:
            cltree_start_t = perf_counter()
            node = create_leaf(local_data, ds_context, scope)
            parent.children[children_pos] = node
            cltree_end_t = perf_counter()
        else:
            raise Exception("Invalid operation: " + operation)

    node = root.children[0]
    assign_ids(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err
    node = Prune(node)
    valid, err = is_valid(node)
    assert valid, "invalid spn: " + err

    return node