예제 #1
0
    def linked_kernel_density_estimation(cls,
                                         n_instances,
                                         features,
                                         node_dict=None,
                                         alpha=0.1
                                         # ,batch_size=1,
                                         # sparse=False
                                         ):
        """
        WRITEME
        """

        n_features = len(features)

        # the top one is a sum layer with a single node
        root_node = SumNode()
        root_layer = SumLayerLinked([root_node])

        # second one is a product layer with n_instances nodes
        product_nodes = [ProductNode() for i in range(n_instances)]
        product_layer = ProductLayerLinked(product_nodes)
        # linking them to the root node
        for prod_node in product_nodes:
            root_node.add_child(prod_node, 1. / n_instances)

        # last layer can be a categorical smoothed input
        # or sum_layer + categorical indicator input

        input_layer = None
        layers = None
        n_leaf_nodes = n_features * n_instances

        if node_dict is None:
            # creating a sum_layer with n_leaf_nodes
            sum_nodes = [SumNode() for i in range(n_leaf_nodes)]
            # store them into a layer
            sum_layer = SumLayerLinked(sum_nodes)
            # linking them to the products above
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(sum_nodes[i * n_features + j])
            # now creating the indicator nodes
            input_layer = \
                CategoricalIndicatorLayerLinked(vars=features)
            # linking the sum nodes to the indicator vars
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # storing levels
            layers = [sum_layer, product_layer, root_layer]
        else:
            # create a categorical smoothed layer
            input_layer = \
                CategoricalSmoothedLayerLinked(vars=features,
                                               node_dicts=node_dict,
                                               alpha=alpha)
            # it shall contain n_leaf_nodes nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_leaf_nodes

            # linking it
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(smooth_nodes[i * n_features + j])
            # setting the used levels
            layers = [product_layer, root_layer]

        # create the spn from levels
        kern_spn = SpnLinked(input_layer, layers)
        return kern_spn
예제 #2
0
    def linked_naive_factorization(cls, features, node_dict=None, alpha=0.1):
        """
        WRITEME
        """
        n_features = len(features)

        # create an input layer
        input_layer = None
        layers = None

        # first layer is a product layer with n_feature children
        root_node = ProductNode()
        root_layer = ProductLayerLinked([root_node])

        # second is a sum node on an indicator layer
        if node_dict is None:
            # creating sum nodes
            sum_nodes = [SumNode() for i in range(n_features)]
            # linking to the root
            for node in sum_nodes:
                root_node.add_child(node)
            # store into a level
            sum_layer = SumLayerLinked(sum_nodes)
            # now create an indicator layer
            input_layer = CategoricalIndicatorLayerLinked(vars=features)
            # and linking it
            # TODO make this a function
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # collecting layers
            layers = [sum_layer, root_layer]

        # or a categorical smoothed layer
        else:
            input_layer = CategoricalSmoothedLayerLinked(vars=features,
                                                         node_dicts=node_dict,
                                                         alpha=alpha)
            # it shall contain n_features nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_features
            for node in smooth_nodes:
                root_node.add_child(node)

            # set layers accordingly
            layers = [root_layer]

        # build the spn
        naive_fact_spn = SpnLinked(input_layer, layers)

        return naive_fact_spn
예제 #3
0
    def linked_kernel_density_estimation(cls,
                                         n_instances,
                                         features,
                                         node_dict=None,
                                         alpha=0.1
                                         # ,batch_size=1,
                                         # sparse=False
                                         ):
        """
        WRITEME
        """

        n_features = len(features)

        # the top one is a sum layer with a single node
        root_node = SumNode()
        root_layer = SumLayerLinked([root_node])

        # second one is a product layer with n_instances nodes
        product_nodes = [ProductNode() for i in range(n_instances)]
        product_layer = ProductLayerLinked(product_nodes)
        # linking them to the root node
        for prod_node in product_nodes:
            root_node.add_child(prod_node, 1. / n_instances)

        # last layer can be a categorical smoothed input
        # or sum_layer + categorical indicator input

        input_layer = None
        layers = None
        n_leaf_nodes = n_features * n_instances

        if node_dict is None:
            # creating a sum_layer with n_leaf_nodes
            sum_nodes = [SumNode() for i in range(n_leaf_nodes)]
            # store them into a layer
            sum_layer = SumLayerLinked(sum_nodes)
            # linking them to the products above
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(sum_nodes[i * n_features + j])
            # now creating the indicator nodes
            input_layer = \
                CategoricalIndicatorLayerLinked(vars=features)
            # linking the sum nodes to the indicator vars
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node
                              in input_layer.nodes()][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # storing levels
            layers = [sum_layer, product_layer,
                      root_layer]
        else:
            # create a categorical smoothed layer
            input_layer = \
                CategoricalSmoothedLayerLinked(vars=features,
                                               node_dicts=node_dict,
                                               alpha=alpha)
            # it shall contain n_leaf_nodes nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_leaf_nodes

            # linking it
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(smooth_nodes[i * n_features + j])
            # setting the used levels
            layers = [product_layer, root_layer]

        # create the spn from levels
        kern_spn = SpnLinked(input_layer, layers)
        return kern_spn
예제 #4
0
def create_valid_toy_spn():
    # root layer
    whole_scope = frozenset({0, 1, 2, 3})
    root_node = SumNode(var_scope=whole_scope)
    root_layer = SumLayer([root_node])

    # prod layer
    prod_node_1 = ProductNode(var_scope=whole_scope)
    prod_node_2 = ProductNode(var_scope=whole_scope)
    prod_layer_1 = ProductLayer([prod_node_1, prod_node_2])

    root_node.add_child(prod_node_1, 0.5)
    root_node.add_child(prod_node_2, 0.5)

    # sum layer
    scope_1 = frozenset({0, 1})
    scope_2 = frozenset({2})
    scope_3 = frozenset({3})
    scope_4 = frozenset({2, 3})

    sum_node_1 = SumNode(var_scope=scope_1)
    sum_node_2 = SumNode(var_scope=scope_2)
    sum_node_3 = SumNode(var_scope=scope_3)
    sum_node_4 = SumNode(var_scope=scope_4)

    prod_node_1.add_child(sum_node_1)
    prod_node_1.add_child(sum_node_2)
    prod_node_1.add_child(sum_node_3)

    prod_node_2.add_child(sum_node_1)
    prod_node_2.add_child(sum_node_4)

    sum_layer_1 = SumLayer([sum_node_1, sum_node_2,
                            sum_node_3, sum_node_4])

    # another product layer
    prod_node_3 = ProductNode(var_scope=scope_1)
    prod_node_4 = ProductNode(var_scope=scope_1)

    prod_node_5 = ProductNode(var_scope=scope_4)
    prod_node_6 = ProductNode(var_scope=scope_4)

    sum_node_1.add_child(prod_node_3, 0.5)
    sum_node_1.add_child(prod_node_4, 0.5)

    sum_node_4.add_child(prod_node_5, 0.5)
    sum_node_4.add_child(prod_node_6, 0.5)

    prod_layer_2 = ProductLayer([prod_node_3, prod_node_4,
                                 prod_node_5, prod_node_6])

    # last sum one
    scope_5 = frozenset({0})
    scope_6 = frozenset({1})

    sum_node_5 = SumNode(var_scope=scope_5)
    sum_node_6 = SumNode(var_scope=scope_6)
    sum_node_7 = SumNode(var_scope=scope_5)
    sum_node_8 = SumNode(var_scope=scope_6)

    sum_node_9 = SumNode(var_scope=scope_2)
    sum_node_10 = SumNode(var_scope=scope_3)
    sum_node_11 = SumNode(var_scope=scope_2)
    sum_node_12 = SumNode(var_scope=scope_3)

    prod_node_3.add_child(sum_node_5)
    prod_node_3.add_child(sum_node_6)
    prod_node_4.add_child(sum_node_7)
    prod_node_4.add_child(sum_node_8)

    prod_node_5.add_child(sum_node_9)
    prod_node_5.add_child(sum_node_10)
    prod_node_6.add_child(sum_node_11)
    prod_node_6.add_child(sum_node_12)

    sum_layer_2 = SumLayer([sum_node_5, sum_node_6,
                            sum_node_7, sum_node_8,
                            sum_node_9, sum_node_10,
                            sum_node_11, sum_node_12])

    # input layer
    vars = [2, 3, 2, 2]
    input_layer = CategoricalIndicatorLayer(vars=vars)
    last_sum_nodes = [sum_node_2, sum_node_3,
                      sum_node_5, sum_node_6,
                      sum_node_7, sum_node_8,
                      sum_node_9, sum_node_10,
                      sum_node_11, sum_node_12]
    for sum_node in last_sum_nodes:
        (var_scope,) = sum_node.var_scope
        for input_node in input_layer.nodes():
            if input_node.var == var_scope:
                sum_node.add_child(input_node, 1.0)

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer_2, prod_layer_2,
                      sum_layer_1, prod_layer_1,
                      root_layer])

    # print(spn)
    return spn
예제 #5
0
    def linked_naive_factorization(cls,
                                   features,
                                   node_dict=None,
                                   alpha=0.1):
        """
        WRITEME
        """
        n_features = len(features)

        # create an input layer
        input_layer = None
        layers = None

        # first layer is a product layer with n_feature children
        root_node = ProductNode()
        root_layer = ProductLayerLinked([root_node])

        # second is a sum node on an indicator layer
        if node_dict is None:
            # creating sum nodes
            sum_nodes = [SumNode() for i in range(n_features)]
            # linking to the root
            for node in sum_nodes:
                root_node.add_child(node)
            # store into a level
            sum_layer = SumLayerLinked(sum_nodes)
            # now create an indicator layer
            input_layer = CategoricalIndicatorLayerLinked(vars=features)
            # and linking it
            # TODO make this a function
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node
                              in input_layer.nodes()][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # collecting layers
            layers = [sum_layer, root_layer]

        # or a categorical smoothed layer
        else:
            input_layer = CategoricalSmoothedLayerLinked(vars=features,
                                                         node_dicts=node_dict,
                                                         alpha=alpha)
            # it shall contain n_features nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_features
            for node in smooth_nodes:
                root_node.add_child(node)

            # set layers accordingly
            layers = [root_layer]

        # build the spn
        naive_fact_spn = SpnLinked(input_layer, layers)

        return naive_fact_spn