Beispiel #1
0
    def linked_kernel_density_estimation(cls,
                                         n_instances,
                                         features,
                                         node_dict=None,
                                         alpha=0.1
                                         # ,batch_size=1,
                                         # sparse=False
                                         ):
        """
        WRITEME
        """

        n_features = len(features)

        # the top one is a sum layer with a single node
        root_node = SumNode()
        root_layer = SumLayerLinked([root_node])

        # second one is a product layer with n_instances nodes
        product_nodes = [ProductNode() for i in range(n_instances)]
        product_layer = ProductLayerLinked(product_nodes)
        # linking them to the root node
        for prod_node in product_nodes:
            root_node.add_child(prod_node, 1. / n_instances)

        # last layer can be a categorical smoothed input
        # or sum_layer + categorical indicator input

        input_layer = None
        layers = None
        n_leaf_nodes = n_features * n_instances

        if node_dict is None:
            # creating a sum_layer with n_leaf_nodes
            sum_nodes = [SumNode() for i in range(n_leaf_nodes)]
            # store them into a layer
            sum_layer = SumLayerLinked(sum_nodes)
            # linking them to the products above
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(sum_nodes[i * n_features + j])
            # now creating the indicator nodes
            input_layer = \
                CategoricalIndicatorLayerLinked(vars=features)
            # linking the sum nodes to the indicator vars
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # storing levels
            layers = [sum_layer, product_layer, root_layer]
        else:
            # create a categorical smoothed layer
            input_layer = \
                CategoricalSmoothedLayerLinked(vars=features,
                                               node_dicts=node_dict,
                                               alpha=alpha)
            # it shall contain n_leaf_nodes nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_leaf_nodes

            # linking it
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(smooth_nodes[i * n_features + j])
            # setting the used levels
            layers = [product_layer, root_layer]

        # create the spn from levels
        kern_spn = SpnLinked(input_layer, layers)
        return kern_spn
Beispiel #2
0
        def build_product_layer(parent_layer, parent_scope_list,
                                n_max_children, n_scope_children, input_layer,
                                rand_gen):

            # grouping the scopes of the parents
            scope_clusters = cluster_set_scope(parent_scope_list)
            # for each scope add a fixed number of children
            children_lists = {
                scope: [
                    ProductNode(var_scope=scope)
                    for i in range(n_scope_children)
                ]
                for scope in scope_clusters
            }
            # counting which node is used
            children_counts = {
                scope: [0 for i in range(n_scope_children)]
                for scope in scope_clusters
            }
            # now link those randomly to their parent
            for parent, scope in zip(parent_layer.nodes(), parent_scope_list):
                # only for nodes not becoming leaves
                if len(scope) > 1:
                    # sampling at most n_max_children from those in the same
                    # scope
                    children_scope_list = children_lists[scope]
                    sample_length = min(len(children_scope_list),
                                        n_max_children)
                    sampled_ids = rand_gen.sample(range(n_scope_children),
                                                  sample_length)
                    sampled_children = [None for i in range(sample_length)]
                    for i, id in enumerate(sampled_ids):
                        # getting the sampled child
                        sampled_children[i] = children_scope_list[id]
                        # updating its counter
                        children_counts[scope][id] += 1

                    for child in sampled_children:
                        # parent is a sum layer, we must set a random weight
                        rand_weight = rand_gen.random()
                        parent.add_child(child, rand_weight)

                    # we can now normalize it
                    parent.normalize()
                else:
                    # binding the node to the input layer
                    (scope_var, ) = scope
                    link_leaf_to_input_layer(parent, scope_var, input_layer,
                                             rand_gen)

            # pruning those children never used
            for scope in children_lists.keys():
                children_scope_list = children_lists[scope]
                scope_counts = children_counts[scope]
                used_children = [
                    child
                    for count, child in zip(scope_counts, children_scope_list)
                    if count > 0
                ]
                children_lists[scope] = used_children

            # creating the layer and new scopelist
            # print('children list val', children_lists.values())
            children_list = [
                child for child in itertools.chain.from_iterable(
                    children_lists.values())
            ]
            scope_list = [
                key for key, child_list in children_lists.items()
                for elem in child_list
            ]
            # print('children list', children_list)
            # print('scope list', scope_list)
            prod_layer = ProductLayerLinked(children_list)

            return prod_layer, scope_list
Beispiel #3
0
    def layered_linked_spn(cls, root_node):
        """
        Given a simple linked version (parent->children),
        returns a layered one (linked + layers)
        """
        layers = []
        root_layer = None
        input_nodes = []
        layer_nodes = []
        input_layer = None

        # layers.append(root_layer)
        previous_level = None

        # collecting nodes to visit
        open = deque()
        next_open = deque()
        closed = set()

        open.append(root_node)

        while open:
            # getting a node
            current_node = open.popleft()
            current_id = current_node.id

            # has this already been seen?
            if current_id not in closed:
                closed.add(current_id)
                layer_nodes.append(current_node)
                # print('CURRENT NODE')
                # print(current_node)

                # expand it
                for child in current_node.children:
                    # only for non leaf nodes
                    if (isinstance(child, SumNode)
                            or isinstance(child, ProductNode)):
                        next_open.append(child)
                    else:
                        # it must be an input node
                        if child.id not in closed:
                            input_nodes.append(child)
                            closed.add(child.id)

            # open is now empty, but new open not
            if (not open):
                # swap them
                open = next_open
                next_open = deque()

                # and create a new level alternating type
                if previous_level is None:
                    # it is the first level
                    if isinstance(root_node, SumNode):
                        previous_level = SumLayerLinked([root_node])
                    elif isinstance(root_node, ProductNode):
                        previous_level = ProductLayerLinked([root_node])
                elif isinstance(previous_level, SumLayerLinked):
                    previous_level = ProductLayerLinked(layer_nodes)
                elif isinstance(previous_level, ProductLayerLinked):
                    previous_level = SumLayerLinked(layer_nodes)

                layer_nodes = []

                layers.append(previous_level)

        #
        # finishing layers
        #

        #
        # checking for CLTreeNodes
        cltree_leaves = False
        for node in input_nodes:
            if isinstance(node, CLTreeNode):
                cltree_leaves = True
                break

        if cltree_leaves:
            input_layer = CategoricalCLInputLayerLinked(input_nodes)
        else:
            # otherwiise assuming all input nodes are homogeneous
            if isinstance(input_nodes[0], CategoricalSmoothedNode):
                # print('SMOOTH LAYER')
                input_layer = CategoricalSmoothedLayerLinked(input_nodes)
            elif isinstance(input_nodes[0], CategoricalIndicatorNode):
                input_layer = CategoricalIndicatorLayerLinked(input_nodes)

        spn = SpnLinked(input_layer=input_layer, layers=layers[::-1])
        return spn
Beispiel #4
0
    def linked_naive_factorization(cls, features, node_dict=None, alpha=0.1):
        """
        WRITEME
        """
        n_features = len(features)

        # create an input layer
        input_layer = None
        layers = None

        # first layer is a product layer with n_feature children
        root_node = ProductNode()
        root_layer = ProductLayerLinked([root_node])

        # second is a sum node on an indicator layer
        if node_dict is None:
            # creating sum nodes
            sum_nodes = [SumNode() for i in range(n_features)]
            # linking to the root
            for node in sum_nodes:
                root_node.add_child(node)
            # store into a level
            sum_layer = SumLayerLinked(sum_nodes)
            # now create an indicator layer
            input_layer = CategoricalIndicatorLayerLinked(vars=features)
            # and linking it
            # TODO make this a function
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # collecting layers
            layers = [sum_layer, root_layer]

        # or a categorical smoothed layer
        else:
            input_layer = CategoricalSmoothedLayerLinked(vars=features,
                                                         node_dicts=node_dict,
                                                         alpha=alpha)
            # it shall contain n_features nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_features
            for node in smooth_nodes:
                root_node.add_child(node)

            # set layers accordingly
            layers = [root_layer]

        # build the spn
        naive_fact_spn = SpnLinked(input_layer, layers)

        return naive_fact_spn
Beispiel #5
0
    def layered_linked_spn(cls, root_node, data, config={}):
        """
        Given a simple linked version (parent->children),
        returns a layered one (linked + layers)
        """
        layers = []
        root_layer = None
        input_nodes = []
        layer_nodes = []
        input_layer = None

        # layers.append(root_layer)
        previous_level = None

        # collecting nodes to visit
        open = deque()
        next_open = deque()
        closed = set()

        open.append(root_node)

        while open:
            # getting a node
            current_node = open.popleft()
            current_id = current_node.id

            # has this already been seen?
            if current_id not in closed:
                closed.add(current_id)
                layer_nodes.append(current_node)
                # print('CURRENT NODE')
                # print(current_node)

                # expand it
                for child in current_node.children:
                    # only for non leaf nodes
                    if (isinstance(child, SumNode) or
                            isinstance(child, ProductNode)):
                        next_open.append(child)
                    else:
                        # it must be an input node
                        if child.id not in closed:
                            input_nodes.append(child)
                            closed.add(child.id)

            # open is now empty, but new open not
            if (not open):
                # swap them
                open = next_open
                next_open = deque()

                # and create a new level alternating type
                if previous_level is None:
                    # it is the first level
                    if isinstance(root_node, SumNode):
                        previous_level = SumLayerLinked([root_node])
                    elif isinstance(root_node, ProductNode):
                        previous_level = ProductLayerLinked([root_node])
                elif isinstance(previous_level, SumLayerLinked):
                    previous_level = ProductLayerLinked(layer_nodes)
                elif isinstance(previous_level, ProductLayerLinked):
                    previous_level = SumLayerLinked(layer_nodes)

                layer_nodes = []

                layers.append(previous_level)

        if isinstance(input_nodes[0], PoissonNode):
            input_layer = PoissonLayer(input_nodes)
        if isinstance(input_nodes[0], GaussianNode):
            input_layer = GaussianLayer(input_nodes)
        if isinstance(input_nodes[0], BernoulliNode):
            input_layer = BernoulliLayer(input_nodes)

        spn = SpnLinked(data, input_layer=input_layer, layers=layers[::-1], config=config)
        return spn
Beispiel #6
0
def build_linked_layered_spn(print_spn=True):
    #
    # building an indicator layer
    ind_x_00 = CategoricalIndicatorNode(0, 0)
    ind_x_01 = CategoricalIndicatorNode(0, 1)
    ind_x_10 = CategoricalIndicatorNode(1, 0)
    ind_x_11 = CategoricalIndicatorNode(1, 1)
    ind_x_20 = CategoricalIndicatorNode(2, 0)
    ind_x_21 = CategoricalIndicatorNode(2, 1)

    input_layer = CategoricalIndicatorLayer(
        [ind_x_00, ind_x_01, ind_x_10, ind_x_11, ind_x_20, ind_x_21])

    #
    # sum layer
    #
    sum_node_1 = SumNode(frozenset([0]))
    sum_node_1.add_child(ind_x_00, 0.1)
    sum_node_1.add_child(ind_x_01, 0.9)

    sum_node_2 = SumNode(frozenset([0]))
    sum_node_2.add_child(ind_x_00, 0.4)
    sum_node_2.add_child(ind_x_01, 0.6)

    sum_node_3 = SumNode(frozenset([1]))
    sum_node_3.add_child(ind_x_10, 0.3)
    sum_node_3.add_child(ind_x_11, 0.7)

    sum_node_4 = SumNode(frozenset([1]))
    sum_node_4.add_child(ind_x_10, 0.6)
    sum_node_4.add_child(ind_x_11, 0.4)

    sum_node_5 = SumNode(frozenset([2]))
    sum_node_5.add_child(ind_x_20, 0.5)
    sum_node_5.add_child(ind_x_21, 0.5)

    sum_node_6 = SumNode(frozenset([2]))
    sum_node_6.add_child(ind_x_20, 0.2)
    sum_node_6.add_child(ind_x_21, 0.8)

    sum_layer_1 = SumLayerLinked([
        sum_node_1, sum_node_2, sum_node_3, sum_node_4, sum_node_5, sum_node_6
    ])

    #
    # product nodes

    #
    # xy
    prod_node_7 = ProductNode(frozenset([0, 1]))
    prod_node_7.add_child(sum_node_1)
    prod_node_7.add_child(sum_node_3)

    prod_node_8 = ProductNode(frozenset([0, 1]))
    prod_node_8.add_child(sum_node_2)
    prod_node_8.add_child(sum_node_4)

    prod_node_9 = ProductNode(frozenset([0, 1]))
    prod_node_9.add_child(sum_node_1)
    prod_node_9.add_child(sum_node_3)

    #
    # yz
    prod_node_10 = ProductNode(frozenset([1, 2]))
    prod_node_10.add_child(sum_node_4)
    prod_node_10.add_child(sum_node_5)

    prod_node_11 = ProductNode(frozenset([1, 2]))
    prod_node_11.add_child(sum_node_4)
    prod_node_11.add_child(sum_node_6)

    prod_layer_2 = ProductLayerLinked(
        [prod_node_7, prod_node_8, prod_node_9, prod_node_10, prod_node_11])

    #
    # sum nodes
    #
    # xy
    sum_node_12 = SumNode(frozenset([0, 1]))
    sum_node_12.add_child(prod_node_7, 0.1)
    sum_node_12.add_child(prod_node_8, 0.9)

    sum_node_13 = SumNode(frozenset([0, 1]))
    sum_node_13.add_child(prod_node_8, 0.7)
    sum_node_13.add_child(prod_node_9, 0.3)

    #
    # yz
    sum_node_14 = SumNode(frozenset([1, 2]))
    sum_node_14.add_child(prod_node_10, 0.6)
    sum_node_14.add_child(prod_node_11, 0.4)

    sum_layer_3 = SumLayerLinked([sum_node_12, sum_node_13, sum_node_14])

    #
    # product nodes
    prod_node_15 = ProductNode(frozenset([0, 1, 2]))
    prod_node_15.add_child(sum_node_12)
    prod_node_15.add_child(sum_node_6)

    prod_node_16 = ProductNode(frozenset([0, 1, 2]))
    prod_node_16.add_child(sum_node_13)
    prod_node_16.add_child(sum_node_5)

    prod_node_17 = ProductNode(frozenset([0, 1, 2]))
    prod_node_17.add_child(sum_node_2)
    prod_node_17.add_child(sum_node_14)

    prod_layer_4 = ProductLayerLinked(
        [prod_node_15, prod_node_16, prod_node_17])

    #
    # root
    sum_node_18 = SumNode(frozenset([0, 1, 2]))
    sum_node_18.add_child(prod_node_15, 0.2)
    sum_node_18.add_child(prod_node_16, 0.2)
    sum_node_18.add_child(prod_node_17, 0.6)

    sum_layer_5 = SumLayerLinked([sum_node_18])

    #
    # creating the spn
    layers = [
        sum_layer_1, prod_layer_2, sum_layer_3, prod_layer_4, sum_layer_5
    ]
    nodes = [node for layer in layers for node in layer.nodes()]

    spn = SpnLinked(input_layer=input_layer, layers=layers)

    if print_spn:
        print(spn)

    return spn, layers, nodes
Beispiel #7
0
def test_linked_to_theano_indicator():
    # creating single nodes
    root = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    ind1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind3 = CategoricalIndicatorNode(var=1, var_val=0)
    ind4 = CategoricalIndicatorNode(var=1, var_val=1)
    ind5 = CategoricalIndicatorNode(var=2, var_val=0)
    ind6 = CategoricalIndicatorNode(var=2, var_val=1)
    ind7 = CategoricalIndicatorNode(var=2, var_val=2)
    ind8 = CategoricalIndicatorNode(var=3, var_val=0)
    ind9 = CategoricalIndicatorNode(var=3, var_val=1)
    ind10 = CategoricalIndicatorNode(var=3, var_val=2)
    ind11 = CategoricalIndicatorNode(var=3, var_val=3)

    prod4 = ProductNode()
    prod5 = ProductNode()
    prod6 = ProductNode()
    prod7 = ProductNode()

    # linking nodes
    root.add_child(prod1, 0.3)
    root.add_child(prod2, 0.3)
    root.add_child(prod3, 0.4)

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(ind7)
    prod2.add_child(ind8)
    prod2.add_child(ind11)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    sum1.add_child(ind1, 0.3)
    sum1.add_child(ind2, 0.3)
    sum1.add_child(prod4, 0.4)

    sum2.add_child(ind2, 0.5)
    sum2.add_child(prod4, 0.2)
    sum2.add_child(prod5, 0.3)

    sum3.add_child(prod6, 0.5)
    sum3.add_child(prod7, 0.5)
    sum4.add_child(prod6, 0.5)
    sum4.add_child(prod7, 0.5)

    prod4.add_child(ind3)
    prod4.add_child(ind4)
    prod5.add_child(ind5)
    prod5.add_child(ind6)
    prod6.add_child(ind9)
    prod6.add_child(ind10)
    prod7.add_child(ind9)
    prod7.add_child(ind10)

    # building layers from nodes
    root_layer = SumLayerLinked([root])
    prod_layer = ProductLayerLinked([prod1, prod2, prod3])
    sum_layer = SumLayerLinked([sum1, sum2, sum3, sum4])
    aprod_layer = ProductLayerLinked([prod4, prod5, prod6, prod7])
    ind_layer = CategoricalIndicatorLayer(nodes=[
        ind1, ind2, ind3, ind4, ind5, ind6, ind7, ind8, ind9, ind10, ind11
    ])

    # creating the linked spn
    spn_linked = SpnLinked(
        input_layer=ind_layer,
        layers=[aprod_layer, sum_layer, prod_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Beispiel #8
0
def test_linked_to_theano_categorical():
    vars = [2, 2, 3, 4]
    freqs = [{
        'var': 0,
        'freqs': [1, 2]
    }, {
        'var': 1,
        'freqs': [2, 2]
    }, {
        'var': 0,
        'freqs': [3, 2]
    }, {
        'var': 1,
        'freqs': [0, 3]
    }, {
        'var': 2,
        'freqs': [1, 0, 2]
    }, {
        'var': 3,
        'freqs': [1, 2, 1, 2]
    }, {
        'var': 3,
        'freqs': [3, 4, 0, 1]
    }]

    # create input layer first
    input_layer = CategoricalSmoothedLayer(vars=vars, node_dicts=freqs)
    # get nodes
    ind_nodes = [node for node in input_layer.nodes()]

    root_node = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()

    sum3 = SumNode()
    sum4 = SumNode()

    # linking
    root_node.add_child(sum1)
    root_node.add_child(sum2)
    root_node.add_child(ind_nodes[0])
    root_node.add_child(ind_nodes[1])

    sum1.add_child(ind_nodes[2], 0.4)
    sum1.add_child(ind_nodes[3], 0.6)
    sum2.add_child(ind_nodes[3], 0.2)
    sum2.add_child(prod1, 0.5)
    sum2.add_child(prod2, 0.3)

    prod1.add_child(ind_nodes[4])
    prod1.add_child(sum3)
    prod1.add_child(sum4)
    prod2.add_child(sum3)
    prod2.add_child(sum4)

    sum3.add_child(ind_nodes[5], 0.5)
    sum3.add_child(ind_nodes[6], 0.5)
    sum4.add_child(ind_nodes[5], 0.4)
    sum4.add_child(ind_nodes[6], 0.6)

    # creating layers
    root_layer = ProductLayerLinked([root_node])
    sum_layer = SumLayerLinked([sum1, sum2])
    prod_layer = ProductLayerLinked([prod1, prod2])
    sum_layer2 = SumLayerLinked([sum3, sum4])

    # create the linked spn
    spn_linked = SpnLinked(
        input_layer=input_layer,
        layers=[sum_layer2, prod_layer, sum_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)