예제 #1
0
def test_categorical_indicator_layer_vars():
    # create indicator nodes first
    ind1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind2 = CategoricalIndicatorNode(var=3, var_val=0)
    ind3 = CategoricalIndicatorNode(var=3, var_val=1)
    ind4 = CategoricalIndicatorNode(var=2, var_val=0)
    ind5 = CategoricalIndicatorNode(var=1, var_val=1)
    ind6 = CategoricalIndicatorNode(var=2, var_val=1)
    ind7 = CategoricalIndicatorNode(var=1, var_val=0)
    ind8 = CategoricalIndicatorNode(var=0, var_val=1)
    ind9 = CategoricalIndicatorNode(var=2, var_val=2)
    ind10 = CategoricalIndicatorNode(var=3, var_val=2)
    ind11 = CategoricalIndicatorNode(var=3, var_val=3)

    # building the layer from nodes
    layer = CategoricalIndicatorLayer(nodes=[ind1, ind2,
                                             ind3, ind4,
                                             ind5, ind6,
                                             ind7, ind8,
                                             ind9, ind10, ind11])

    # checking for the construction of the vars property
    layer_vars = layer.vars()

    assert vars == layer_vars
예제 #2
0
def test_categorical_indicator_layer_eval():
    # create a layer from vars
    input_layer = CategoricalIndicatorLayer(vars=vars)
    # evaluating for obs
    input_layer.eval(obs)
    # getting values
    layer_evals = input_layer.node_values()
    print('layer eval nodes')
    print(layer_evals)
    # bulding the log vals by hand
    log_vals = []
    for var, obs_val in zip(vars, obs):
        var_log_vals = None
        if obs_val == MARG_IND:
            # all 1s
            var_log_vals = [0. for i in range(var)]
        else:
            # just one is 1, the rest are 0
            var_log_vals = [LOG_ZERO for i in range(var)]
            var_log_vals[obs_val] = 0.
        # concatenate vals
        log_vals.extend(var_log_vals)
    print('log vals')
    print(log_vals)
    assert log_vals == layer_evals
예제 #3
0
    def linked_kernel_density_estimation(cls,
                                         n_instances,
                                         features,
                                         node_dict=None,
                                         alpha=0.1
                                         # ,batch_size=1,
                                         # sparse=False
                                         ):
        """
        WRITEME
        """

        n_features = len(features)

        # the top one is a sum layer with a single node
        root_node = SumNode()
        root_layer = SumLayerLinked([root_node])

        # second one is a product layer with n_instances nodes
        product_nodes = [ProductNode() for i in range(n_instances)]
        product_layer = ProductLayerLinked(product_nodes)
        # linking them to the root node
        for prod_node in product_nodes:
            root_node.add_child(prod_node, 1. / n_instances)

        # last layer can be a categorical smoothed input
        # or sum_layer + categorical indicator input

        input_layer = None
        layers = None
        n_leaf_nodes = n_features * n_instances

        if node_dict is None:
            # creating a sum_layer with n_leaf_nodes
            sum_nodes = [SumNode() for i in range(n_leaf_nodes)]
            # store them into a layer
            sum_layer = SumLayerLinked(sum_nodes)
            # linking them to the products above
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(sum_nodes[i * n_features + j])
            # now creating the indicator nodes
            input_layer = \
                CategoricalIndicatorLayerLinked(vars=features)
            # linking the sum nodes to the indicator vars
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # storing levels
            layers = [sum_layer, product_layer, root_layer]
        else:
            # create a categorical smoothed layer
            input_layer = \
                CategoricalSmoothedLayerLinked(vars=features,
                                               node_dicts=node_dict,
                                               alpha=alpha)
            # it shall contain n_leaf_nodes nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_leaf_nodes

            # linking it
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(smooth_nodes[i * n_features + j])
            # setting the used levels
            layers = [product_layer, root_layer]

        # create the spn from levels
        kern_spn = SpnLinked(input_layer, layers)
        return kern_spn
예제 #4
0
    def linked_naive_factorization(cls, features, node_dict=None, alpha=0.1):
        """
        WRITEME
        """
        n_features = len(features)

        # create an input layer
        input_layer = None
        layers = None

        # first layer is a product layer with n_feature children
        root_node = ProductNode()
        root_layer = ProductLayerLinked([root_node])

        # second is a sum node on an indicator layer
        if node_dict is None:
            # creating sum nodes
            sum_nodes = [SumNode() for i in range(n_features)]
            # linking to the root
            for node in sum_nodes:
                root_node.add_child(node)
            # store into a level
            sum_layer = SumLayerLinked(sum_nodes)
            # now create an indicator layer
            input_layer = CategoricalIndicatorLayerLinked(vars=features)
            # and linking it
            # TODO make this a function
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node in input_layer.nodes()
                              ][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # collecting layers
            layers = [sum_layer, root_layer]

        # or a categorical smoothed layer
        else:
            input_layer = CategoricalSmoothedLayerLinked(vars=features,
                                                         node_dicts=node_dict,
                                                         alpha=alpha)
            # it shall contain n_features nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_features
            for node in smooth_nodes:
                root_node.add_child(node)

            # set layers accordingly
            layers = [root_layer]

        # build the spn
        naive_fact_spn = SpnLinked(input_layer, layers)

        return naive_fact_spn
예제 #5
0
def create_valid_toy_spn():
    # root layer
    whole_scope = frozenset({0, 1, 2, 3})
    root_node = SumNode(var_scope=whole_scope)
    root_layer = SumLayer([root_node])

    # prod layer
    prod_node_1 = ProductNode(var_scope=whole_scope)
    prod_node_2 = ProductNode(var_scope=whole_scope)
    prod_layer_1 = ProductLayer([prod_node_1, prod_node_2])

    root_node.add_child(prod_node_1, 0.5)
    root_node.add_child(prod_node_2, 0.5)

    # sum layer
    scope_1 = frozenset({0, 1})
    scope_2 = frozenset({2})
    scope_3 = frozenset({3})
    scope_4 = frozenset({2, 3})

    sum_node_1 = SumNode(var_scope=scope_1)
    sum_node_2 = SumNode(var_scope=scope_2)
    sum_node_3 = SumNode(var_scope=scope_3)
    sum_node_4 = SumNode(var_scope=scope_4)

    prod_node_1.add_child(sum_node_1)
    prod_node_1.add_child(sum_node_2)
    prod_node_1.add_child(sum_node_3)

    prod_node_2.add_child(sum_node_1)
    prod_node_2.add_child(sum_node_4)

    sum_layer_1 = SumLayer([sum_node_1, sum_node_2,
                            sum_node_3, sum_node_4])

    # another product layer
    prod_node_3 = ProductNode(var_scope=scope_1)
    prod_node_4 = ProductNode(var_scope=scope_1)

    prod_node_5 = ProductNode(var_scope=scope_4)
    prod_node_6 = ProductNode(var_scope=scope_4)

    sum_node_1.add_child(prod_node_3, 0.5)
    sum_node_1.add_child(prod_node_4, 0.5)

    sum_node_4.add_child(prod_node_5, 0.5)
    sum_node_4.add_child(prod_node_6, 0.5)

    prod_layer_2 = ProductLayer([prod_node_3, prod_node_4,
                                 prod_node_5, prod_node_6])

    # last sum one
    scope_5 = frozenset({0})
    scope_6 = frozenset({1})

    sum_node_5 = SumNode(var_scope=scope_5)
    sum_node_6 = SumNode(var_scope=scope_6)
    sum_node_7 = SumNode(var_scope=scope_5)
    sum_node_8 = SumNode(var_scope=scope_6)

    sum_node_9 = SumNode(var_scope=scope_2)
    sum_node_10 = SumNode(var_scope=scope_3)
    sum_node_11 = SumNode(var_scope=scope_2)
    sum_node_12 = SumNode(var_scope=scope_3)

    prod_node_3.add_child(sum_node_5)
    prod_node_3.add_child(sum_node_6)
    prod_node_4.add_child(sum_node_7)
    prod_node_4.add_child(sum_node_8)

    prod_node_5.add_child(sum_node_9)
    prod_node_5.add_child(sum_node_10)
    prod_node_6.add_child(sum_node_11)
    prod_node_6.add_child(sum_node_12)

    sum_layer_2 = SumLayer([sum_node_5, sum_node_6,
                            sum_node_7, sum_node_8,
                            sum_node_9, sum_node_10,
                            sum_node_11, sum_node_12])

    # input layer
    vars = [2, 3, 2, 2]
    input_layer = CategoricalIndicatorLayer(vars=vars)
    last_sum_nodes = [sum_node_2, sum_node_3,
                      sum_node_5, sum_node_6,
                      sum_node_7, sum_node_8,
                      sum_node_9, sum_node_10,
                      sum_node_11, sum_node_12]
    for sum_node in last_sum_nodes:
        (var_scope,) = sum_node.var_scope
        for input_node in input_layer.nodes():
            if input_node.var == var_scope:
                sum_node.add_child(input_node, 1.0)

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer_2, prod_layer_2,
                      sum_layer_1, prod_layer_1,
                      root_layer])

    # print(spn)
    return spn
예제 #6
0
def build_linked_layered_spn(print_spn=True):
    #
    # building an indicator layer
    ind_x_00 = CategoricalIndicatorNode(0, 0)
    ind_x_01 = CategoricalIndicatorNode(0, 1)
    ind_x_10 = CategoricalIndicatorNode(1, 0)
    ind_x_11 = CategoricalIndicatorNode(1, 1)
    ind_x_20 = CategoricalIndicatorNode(2, 0)
    ind_x_21 = CategoricalIndicatorNode(2, 1)

    input_layer = CategoricalIndicatorLayer(
        [ind_x_00, ind_x_01, ind_x_10, ind_x_11, ind_x_20, ind_x_21])

    #
    # sum layer
    #
    sum_node_1 = SumNode(frozenset([0]))
    sum_node_1.add_child(ind_x_00, 0.1)
    sum_node_1.add_child(ind_x_01, 0.9)

    sum_node_2 = SumNode(frozenset([0]))
    sum_node_2.add_child(ind_x_00, 0.4)
    sum_node_2.add_child(ind_x_01, 0.6)

    sum_node_3 = SumNode(frozenset([1]))
    sum_node_3.add_child(ind_x_10, 0.3)
    sum_node_3.add_child(ind_x_11, 0.7)

    sum_node_4 = SumNode(frozenset([1]))
    sum_node_4.add_child(ind_x_10, 0.6)
    sum_node_4.add_child(ind_x_11, 0.4)

    sum_node_5 = SumNode(frozenset([2]))
    sum_node_5.add_child(ind_x_20, 0.5)
    sum_node_5.add_child(ind_x_21, 0.5)

    sum_node_6 = SumNode(frozenset([2]))
    sum_node_6.add_child(ind_x_20, 0.2)
    sum_node_6.add_child(ind_x_21, 0.8)

    sum_layer_1 = SumLayerLinked([
        sum_node_1, sum_node_2, sum_node_3, sum_node_4, sum_node_5, sum_node_6
    ])

    #
    # product nodes

    #
    # xy
    prod_node_7 = ProductNode(frozenset([0, 1]))
    prod_node_7.add_child(sum_node_1)
    prod_node_7.add_child(sum_node_3)

    prod_node_8 = ProductNode(frozenset([0, 1]))
    prod_node_8.add_child(sum_node_2)
    prod_node_8.add_child(sum_node_4)

    prod_node_9 = ProductNode(frozenset([0, 1]))
    prod_node_9.add_child(sum_node_1)
    prod_node_9.add_child(sum_node_3)

    #
    # yz
    prod_node_10 = ProductNode(frozenset([1, 2]))
    prod_node_10.add_child(sum_node_4)
    prod_node_10.add_child(sum_node_5)

    prod_node_11 = ProductNode(frozenset([1, 2]))
    prod_node_11.add_child(sum_node_4)
    prod_node_11.add_child(sum_node_6)

    prod_layer_2 = ProductLayerLinked(
        [prod_node_7, prod_node_8, prod_node_9, prod_node_10, prod_node_11])

    #
    # sum nodes
    #
    # xy
    sum_node_12 = SumNode(frozenset([0, 1]))
    sum_node_12.add_child(prod_node_7, 0.1)
    sum_node_12.add_child(prod_node_8, 0.9)

    sum_node_13 = SumNode(frozenset([0, 1]))
    sum_node_13.add_child(prod_node_8, 0.7)
    sum_node_13.add_child(prod_node_9, 0.3)

    #
    # yz
    sum_node_14 = SumNode(frozenset([1, 2]))
    sum_node_14.add_child(prod_node_10, 0.6)
    sum_node_14.add_child(prod_node_11, 0.4)

    sum_layer_3 = SumLayerLinked([sum_node_12, sum_node_13, sum_node_14])

    #
    # product nodes
    prod_node_15 = ProductNode(frozenset([0, 1, 2]))
    prod_node_15.add_child(sum_node_12)
    prod_node_15.add_child(sum_node_6)

    prod_node_16 = ProductNode(frozenset([0, 1, 2]))
    prod_node_16.add_child(sum_node_13)
    prod_node_16.add_child(sum_node_5)

    prod_node_17 = ProductNode(frozenset([0, 1, 2]))
    prod_node_17.add_child(sum_node_2)
    prod_node_17.add_child(sum_node_14)

    prod_layer_4 = ProductLayerLinked(
        [prod_node_15, prod_node_16, prod_node_17])

    #
    # root
    sum_node_18 = SumNode(frozenset([0, 1, 2]))
    sum_node_18.add_child(prod_node_15, 0.2)
    sum_node_18.add_child(prod_node_16, 0.2)
    sum_node_18.add_child(prod_node_17, 0.6)

    sum_layer_5 = SumLayerLinked([sum_node_18])

    #
    # creating the spn
    layers = [
        sum_layer_1, prod_layer_2, sum_layer_3, prod_layer_4, sum_layer_5
    ]
    nodes = [node for layer in layers for node in layer.nodes()]

    spn = SpnLinked(input_layer=input_layer, layers=layers)

    if print_spn:
        print(spn)

    return spn, layers, nodes
예제 #7
0
def build_spn_indicator_layer(the_vars):
    input_layer = CategoricalIndicatorLayer(vars=the_vars)
    return input_layer
예제 #8
0
def test_linked_to_theano_indicator():
    # creating single nodes
    root = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    ind1 = CategoricalIndicatorNode(var=0, var_val=0)
    ind2 = CategoricalIndicatorNode(var=0, var_val=1)
    ind3 = CategoricalIndicatorNode(var=1, var_val=0)
    ind4 = CategoricalIndicatorNode(var=1, var_val=1)
    ind5 = CategoricalIndicatorNode(var=2, var_val=0)
    ind6 = CategoricalIndicatorNode(var=2, var_val=1)
    ind7 = CategoricalIndicatorNode(var=2, var_val=2)
    ind8 = CategoricalIndicatorNode(var=3, var_val=0)
    ind9 = CategoricalIndicatorNode(var=3, var_val=1)
    ind10 = CategoricalIndicatorNode(var=3, var_val=2)
    ind11 = CategoricalIndicatorNode(var=3, var_val=3)

    prod4 = ProductNode()
    prod5 = ProductNode()
    prod6 = ProductNode()
    prod7 = ProductNode()

    # linking nodes
    root.add_child(prod1, 0.3)
    root.add_child(prod2, 0.3)
    root.add_child(prod3, 0.4)

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(ind7)
    prod2.add_child(ind8)
    prod2.add_child(ind11)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    sum1.add_child(ind1, 0.3)
    sum1.add_child(ind2, 0.3)
    sum1.add_child(prod4, 0.4)

    sum2.add_child(ind2, 0.5)
    sum2.add_child(prod4, 0.2)
    sum2.add_child(prod5, 0.3)

    sum3.add_child(prod6, 0.5)
    sum3.add_child(prod7, 0.5)
    sum4.add_child(prod6, 0.5)
    sum4.add_child(prod7, 0.5)

    prod4.add_child(ind3)
    prod4.add_child(ind4)
    prod5.add_child(ind5)
    prod5.add_child(ind6)
    prod6.add_child(ind9)
    prod6.add_child(ind10)
    prod7.add_child(ind9)
    prod7.add_child(ind10)

    # building layers from nodes
    root_layer = SumLayerLinked([root])
    prod_layer = ProductLayerLinked([prod1, prod2, prod3])
    sum_layer = SumLayerLinked([sum1, sum2, sum3, sum4])
    aprod_layer = ProductLayerLinked([prod4, prod5, prod6, prod7])
    ind_layer = CategoricalIndicatorLayer(nodes=[
        ind1, ind2, ind3, ind4, ind5, ind6, ind7, ind8, ind9, ind10, ind11
    ])

    # creating the linked spn
    spn_linked = SpnLinked(
        input_layer=ind_layer,
        layers=[aprod_layer, sum_layer, prod_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
예제 #9
0
def test_mini_spn_fit_em():
    vars = numpy.array([2, 2, 2, 2])
    input_layer = CategoricalIndicatorLayer(vars=vars)

    print(input_layer)
    ind1 = input_layer._nodes[0]
    ind2 = input_layer._nodes[1]
    ind3 = input_layer._nodes[2]
    ind4 = input_layer._nodes[3]
    ind5 = input_layer._nodes[4]
    ind6 = input_layer._nodes[5]
    ind7 = input_layer._nodes[6]
    ind8 = input_layer._nodes[7]

    # creating a sum layer of 4 nodes
    sum1 = SumNode()
    sum2 = SumNode()
    sum3 = SumNode()
    sum4 = SumNode()

    sum1.add_child(ind1, 0.6)
    sum1.add_child(ind2, 0.4)
    sum2.add_child(ind3, 0.5)
    sum2.add_child(ind4, 0.5)
    sum3.add_child(ind5, 0.7)
    sum3.add_child(ind6, 0.3)
    sum4.add_child(ind7, 0.4)
    sum4.add_child(ind8, 0.6)

    sum_layer = SumLayer(nodes=[sum1, sum2, sum3, sum4])

    # and a top layer of 3 products
    prod1 = ProductNode()
    prod2 = ProductNode()
    prod3 = ProductNode()

    prod1.add_child(sum1)
    prod1.add_child(sum2)
    prod2.add_child(sum2)
    prod2.add_child(sum3)
    prod3.add_child(sum3)
    prod3.add_child(sum4)

    prod_layer = ProductLayer(nodes=[prod1, prod2, prod3])

    # root layer
    root = SumNode()

    root.add_child(prod1, 0.4)
    root.add_child(prod2, 0.25)
    root.add_child(prod3, 0.35)

    root_layer = SumLayer(nodes=[root])

    spn = Spn(input_layer=input_layer,
              layers=[sum_layer, prod_layer, root_layer])

    print(spn)

    # training on obs
    spn.fit_em(train=syn_train_data, valid=syn_val_data, test=None, hard=True)
예제 #10
0
    def linked_kernel_density_estimation(cls,
                                         n_instances,
                                         features,
                                         node_dict=None,
                                         alpha=0.1
                                         # ,batch_size=1,
                                         # sparse=False
                                         ):
        """
        WRITEME
        """

        n_features = len(features)

        # the top one is a sum layer with a single node
        root_node = SumNode()
        root_layer = SumLayerLinked([root_node])

        # second one is a product layer with n_instances nodes
        product_nodes = [ProductNode() for i in range(n_instances)]
        product_layer = ProductLayerLinked(product_nodes)
        # linking them to the root node
        for prod_node in product_nodes:
            root_node.add_child(prod_node, 1. / n_instances)

        # last layer can be a categorical smoothed input
        # or sum_layer + categorical indicator input

        input_layer = None
        layers = None
        n_leaf_nodes = n_features * n_instances

        if node_dict is None:
            # creating a sum_layer with n_leaf_nodes
            sum_nodes = [SumNode() for i in range(n_leaf_nodes)]
            # store them into a layer
            sum_layer = SumLayerLinked(sum_nodes)
            # linking them to the products above
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(sum_nodes[i * n_features + j])
            # now creating the indicator nodes
            input_layer = \
                CategoricalIndicatorLayerLinked(vars=features)
            # linking the sum nodes to the indicator vars
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node
                              in input_layer.nodes()][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # storing levels
            layers = [sum_layer, product_layer,
                      root_layer]
        else:
            # create a categorical smoothed layer
            input_layer = \
                CategoricalSmoothedLayerLinked(vars=features,
                                               node_dicts=node_dict,
                                               alpha=alpha)
            # it shall contain n_leaf_nodes nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_leaf_nodes

            # linking it
            for i, prod_node in enumerate(product_nodes):
                for j in range(n_features):
                    # getting the next n_features nodes
                    prod_node.add_child(smooth_nodes[i * n_features + j])
            # setting the used levels
            layers = [product_layer, root_layer]

        # create the spn from levels
        kern_spn = SpnLinked(input_layer, layers)
        return kern_spn
예제 #11
0
    def linked_naive_factorization(cls,
                                   features,
                                   node_dict=None,
                                   alpha=0.1):
        """
        WRITEME
        """
        n_features = len(features)

        # create an input layer
        input_layer = None
        layers = None

        # first layer is a product layer with n_feature children
        root_node = ProductNode()
        root_layer = ProductLayerLinked([root_node])

        # second is a sum node on an indicator layer
        if node_dict is None:
            # creating sum nodes
            sum_nodes = [SumNode() for i in range(n_features)]
            # linking to the root
            for node in sum_nodes:
                root_node.add_child(node)
            # store into a level
            sum_layer = SumLayerLinked(sum_nodes)
            # now create an indicator layer
            input_layer = CategoricalIndicatorLayerLinked(vars=features)
            # and linking it
            # TODO make this a function
            for i, sum_node in enumerate(sum_nodes):
                # getting the feature id
                j = i % n_features
                # and thus its number of values
                n_values = features[j]
                # getting the indices of indicators
                start_index = sum(features[:j])
                end_index = start_index + n_values
                indicators = [node for node
                              in input_layer.nodes()][start_index:end_index]
                for ind_node in indicators:
                    sum_node.add_child(ind_node, 1. / n_values)

            # collecting layers
            layers = [sum_layer, root_layer]

        # or a categorical smoothed layer
        else:
            input_layer = CategoricalSmoothedLayerLinked(vars=features,
                                                         node_dicts=node_dict,
                                                         alpha=alpha)
            # it shall contain n_features nodes
            smooth_nodes = list(input_layer.nodes())
            assert len(smooth_nodes) == n_features
            for node in smooth_nodes:
                root_node.add_child(node)

            # set layers accordingly
            layers = [root_layer]

        # build the spn
        naive_fact_spn = SpnLinked(input_layer, layers)

        return naive_fact_spn
예제 #12
0
def build_linked_spn_from_scope_graph(scope_graph,
                                      k,
                                      root_scope=None,
                                      feature_values=None):
    """
    Turning a ScopeGraph into an SPN by puttin k sum nodes for each scope
    and a combinatorial number of product nodes to wire the partition nodes

    This is the algorithm used in Poon2011 and is shown (and used) as BuildSPN in Dennis2012
    """

    if not root_scope:
        root_scope = scope_graph.root

    n_vars = len(root_scope.vars)
    if not feature_values:
        #
        # assuming binary r.v.s
        feature_values = [2 for _i in range(n_vars)]

    #
    # adding leaves
    leaves_dict = defaultdict(list)
    leaves_list = []
    for var in sorted(root_scope.vars):
        for var_val in range(feature_values[var]):
            leaf = CategoricalIndicatorNode(var, var_val)
            leaves_list.append(leaf)
            leaves_dict[var].append(leaf)

    input_layer = CategoricalIndicatorLayer(nodes=leaves_list,
                                            vars=list(sorted(root_scope.vars)))

    #
    # in a first pass we need to assign each scope/region k sum nodes
    sum_nodes_assoc = {}
    for r in scope_graph.traverse_scopes(root_scope=root_scope):

        num_sum_nodes = k

        if r == root_scope:
            num_sum_nodes = 1

        added_sum_nodes = [
            SumNode(var_scope=r.vars) for i in range(num_sum_nodes)
        ]
        #
        # creating a sum layer
        sum_layer = SumLayer(added_sum_nodes)
        sum_nodes_assoc[r] = sum_layer

        #
        # if this is a univariate scope, we link it to leaves corresponding to its r.v.
        if r.is_atomic():
            single_rv = set(r.vars).pop()
            rv_leaves = leaves_dict[single_rv]
            uniform_weight = 1.0 / len(rv_leaves)
            for s in added_sum_nodes:
                for leaf in rv_leaves:
                    s.add_child(leaf, uniform_weight)
            #
            # linking to input layer
            sum_layer.add_input_layer(input_layer)
            input_layer.add_output_layer(sum_layer)

    layers = []
    #
    # looping again to add and wire product nodes
    for r in scope_graph.traverse_scopes(root_scope=root_scope):

        sum_layer = sum_nodes_assoc[r]
        layers.append(sum_layer)

        for p in r.partitions:

            sum_layer_descs = [sum_nodes_assoc[r_p] for r_p in p.scopes]
            sum_nodes_lists = [
                list(layer.nodes()) for layer in sum_layer_descs
            ]
            num_prod_nodes = numpy.prod([len(r_p) for r_p in sum_nodes_lists])

            #
            # adding product nodes
            added_prod_nodes = [
                ProductNode(var_scope=r.vars) for i in range(num_prod_nodes)
            ]
            #
            # adding product layer and linking
            prod_layer = ProductLayer(added_prod_nodes)
            sum_layer.add_input_layer(prod_layer)
            prod_layer.add_output_layer(sum_layer)
            for desc in sum_layer_descs:
                prod_layer.add_input_layer(desc)
                desc.add_output_layer(prod_layer)
            layers.append(prod_layer)

            #
            # linking to parents
            sum_nodes_parents = sum_layer.nodes()
            for sum_node in sum_nodes_parents:
                uniform_weight = 1.0 / (len(added_prod_nodes) *
                                        len(r.partitions))
                for prod_node in added_prod_nodes:
                    sum_node.add_child(prod_node, uniform_weight)
            #
            # linking to children
            sum_nodes_to_wire = list(itertools.product(*sum_nodes_lists))

            assert len(added_prod_nodes) == len(sum_nodes_to_wire)

            for prod_node, sum_nodes in zip(added_prod_nodes,
                                            sum_nodes_to_wire):
                for sum_node in sum_nodes:
                    prod_node.add_child(sum_node)

    #
    # toposort
    layers = topological_layer_sort(layers)

    spn = LinkedSpn(layers=layers, input_layer=input_layer)

    return spn
예제 #13
0
def test_compute_block_layer_depths_II():

    input_layer = CategoricalIndicatorLayer([])

    sum_layer_1 = SumLayer([])

    prod_layer_21 = ProductLayer([])
    prod_layer_22 = ProductLayer([])

    sum_layer_3 = SumLayer([])

    prod_layer_41 = ProductLayer([])
    prod_layer_42 = ProductLayer([])

    sum_layer_5 = SumLayer([])

    #
    # linking them
    sum_layer_1.add_input_layer(input_layer)
    input_layer.add_output_layer(sum_layer_1)

    prod_layer_21.add_input_layer(sum_layer_1)
    prod_layer_22.add_input_layer(sum_layer_1)
    sum_layer_1.add_output_layer(prod_layer_21)
    sum_layer_1.add_output_layer(prod_layer_22)

    sum_layer_3.add_input_layer(prod_layer_21)
    sum_layer_3.add_input_layer(prod_layer_22)
    sum_layer_3.add_input_layer(input_layer)
    prod_layer_21.add_output_layer(sum_layer_3)
    prod_layer_22.add_output_layer(sum_layer_3)
    input_layer.add_output_layer(sum_layer_3)

    prod_layer_41.add_input_layer(sum_layer_3)
    prod_layer_41.add_input_layer(sum_layer_1)
    prod_layer_42.add_input_layer(sum_layer_3)
    prod_layer_42.add_input_layer(input_layer)
    sum_layer_3.add_output_layer(prod_layer_41)
    sum_layer_3.add_output_layer(prod_layer_42)
    sum_layer_1.add_output_layer(prod_layer_41)
    input_layer.add_output_layer(prod_layer_42)

    sum_layer_5.add_input_layer(prod_layer_41)
    sum_layer_5.add_input_layer(prod_layer_42)
    prod_layer_41.add_output_layer(sum_layer_5)
    prod_layer_42.add_output_layer(sum_layer_5)

    #
    # creating an SPN with unordered layer list
    spn = LinkedSpn(input_layer=input_layer,
                    layers=[
                        sum_layer_1, sum_layer_3, sum_layer_5, prod_layer_21,
                        prod_layer_22, prod_layer_41, prod_layer_42
                    ])

    depth_dict = compute_block_layer_depths(spn)

    print(spn)

    for layer, depth in depth_dict.items():
        print(layer.id, depth)
예제 #14
0
def test_build_linked_spn_from_scope_graph():

    #
    # creating a region graph as an input scope graph
    n_cols = 2
    n_rows = 2
    coarse = 2
    #
    # create initial region
    root_region = Region.create_whole_region(n_rows, n_cols)

    region_graph = create_poon_region_graph(root_region, coarse=coarse)

    # print(region_graph)
    print('# partitions', region_graph.n_partitions())
    print('# regions', region_graph.n_scopes())

    print(region_graph)

    #
    #
    k = 2
    spn = build_linked_spn_from_scope_graph(region_graph, k)

    print(spn)

    print(spn.stats())

    #
    # back to the scope graph
    root_layer = list(spn.root_layer().nodes())
    assert len(root_layer) == 1
    root = root_layer[0]

    scope_graph = get_scope_graph_from_linked_spn(root)
    print(scope_graph)

    assert scope_graph == region_graph

    #
    # building an spn from scratch
    #
    # building leaf nodes
    n_vars = 4
    vars = [0, 1, 2, 3]
    leaves = [
        CategoricalIndicatorNode(var, val) for var in range(n_vars)
        for val in [0, 1]
    ]
    input_layer = CategoricalIndicatorLayer(nodes=leaves, vars=vars)

    #
    # building root
    root_node = SumNode(var_scope=frozenset(vars))
    root_layer = SumLayer([root_node])

    #
    # building product nodes
    prod_list_1 = [ProductNode(var_scope=vars) for i in range(4)]
    prod_list_2 = [ProductNode(var_scope=vars) for i in range(4)]
    prod_nodes_1 = prod_list_1 + prod_list_2
    product_layer_1 = ProductLayer(prod_nodes_1)

    for p in prod_nodes_1:
        root_node.add_child(p, 1.0 / len(prod_nodes_1))

    #
    # build sum nodes
    sum_list_1 = [SumNode() for i in range(2)]
    sum_list_2 = [SumNode() for i in range(2)]
    sum_list_3 = [SumNode() for i in range(2)]
    sum_list_4 = [SumNode() for i in range(2)]

    sum_layer_2 = SumLayer(sum_list_1 + sum_list_2 + sum_list_3 + sum_list_4)

    sum_pairs = []
    for s_1 in sum_list_1:
        for s_2 in sum_list_2:
            sum_pairs.append((s_1, s_2))

    for p, (s_1, s_2) in zip(prod_list_1, sum_pairs):
        p.add_child(s_1)
        p.add_child(s_2)

    sum_pairs = []
    for s_3 in sum_list_3:
        for s_4 in sum_list_4:
            sum_pairs.append((s_3, s_4))

    for p, (s_3, s_4) in zip(prod_list_2, sum_pairs):
        p.add_child(s_3)
        p.add_child(s_4)

    #
    # again product nodes
    prod_list_3 = [ProductNode() for i in range(4)]
    prod_list_4 = [ProductNode() for i in range(4)]
    prod_list_5 = [ProductNode() for i in range(4)]
    prod_list_6 = [ProductNode() for i in range(4)]

    product_layer_3 = ProductLayer(prod_list_3 + prod_list_4 + prod_list_5 +
                                   prod_list_6)

    for s in sum_list_1:
        for p in prod_list_3:
            s.add_child(p, 1.0 / len(prod_list_3))

    for s in sum_list_2:
        for p in prod_list_4:
            s.add_child(p, 1.0 / len(prod_list_4))

    for s in sum_list_3:
        for p in prod_list_5:
            s.add_child(p, 1.0 / len(prod_list_5))

    for s in sum_list_4:
        for p in prod_list_6:
            s.add_child(p, 1.0 / len(prod_list_6))

    #
    # build sum nodes
    sum_list_5 = [SumNode() for i in range(2)]
    sum_list_6 = [SumNode() for i in range(2)]
    sum_list_7 = [SumNode() for i in range(2)]
    sum_list_8 = [SumNode() for i in range(2)]

    sum_layer_4 = SumLayer(sum_list_5 + sum_list_6 + sum_list_7 + sum_list_8)

    sum_pairs = []
    for s_5 in sum_list_5:
        for s_7 in sum_list_7:
            sum_pairs.append((s_5, s_7))

    for p, (s_5, s_7) in zip(prod_list_3, sum_pairs):
        p.add_child(s_5)
        p.add_child(s_7)

    sum_pairs = []
    for s_6 in sum_list_6:
        for s_8 in sum_list_8:
            sum_pairs.append((s_6, s_8))

    for p, (s_6, s_8) in zip(prod_list_4, sum_pairs):
        p.add_child(s_6)
        p.add_child(s_8)

    sum_pairs = []
    for s_5 in sum_list_5:
        for s_6 in sum_list_6:
            sum_pairs.append((s_5, s_6))

    for p, (s_5, s_6) in zip(prod_list_5, sum_pairs):
        p.add_child(s_5)
        p.add_child(s_6)

    sum_pairs = []
    for s_7 in sum_list_7:
        for s_8 in sum_list_8:
            sum_pairs.append((s_7, s_8))

    for p, (s_7, s_8) in zip(prod_list_6, sum_pairs):
        p.add_child(s_7)
        p.add_child(s_8)

    #
    # linking to input layer
    for s in sum_list_5:
        for i in leaves[0:2]:
            s.add_child(i, 0.5)

    for s in sum_list_6:
        for i in leaves[2:4]:
            s.add_child(i, 0.5)

    for s in sum_list_7:
        for i in leaves[4:6]:
            s.add_child(i, 0.5)

    for s in sum_list_8:
        for i in leaves[6:]:
            s.add_child(i, 0.5)

    lspn = LinkedSpn(input_layer=input_layer,
                     layers=[
                         sum_layer_4, product_layer_3, sum_layer_2,
                         product_layer_1, root_layer
                     ])
    print(lspn)
    print(lspn.stats())

    #
    # trying to evaluate them
    input_vec = numpy.array([[1., 1., 1., 0.], [0., 0., 0., 0.],
                             [0., 1., 1., 0.],
                             [MARG_IND, MARG_IND, MARG_IND, MARG_IND]]).T

    res = spn.eval(input_vec)
    print('First evaluation')
    print(res)

    res = lspn.eval(input_vec)
    print('Second evaluation')
    print(res)