Example #1
0
def test_categorical_smoothed_layer_vars():
    # creating single nodes in a list from dicts
    nodes = [CategoricalSmoothedNode(dict_i['var'], vars[dict_i['var']])
             for dict_i in dicts]
    # creating the layer
    layer = CategoricalSmoothedLayer(nodes)

    # evaluating for the construction of vars
    layer_vars = layer.vars()

    assert vars == layer_vars
Example #2
0
def test_categorical_smoothed_layer_eval():
    alpha = 0.1

    # creating input layer
    input_layer = CategoricalSmoothedLayer(vars=vars,
                                           node_dicts=dicts,
                                           alpha=alpha)
    # evaluate it
    input_layer.eval(obs)
    # getting values
    layer_evals = input_layer.node_values()
    print('layer eval nodes')
    print(layer_evals)

    # crafting by hand
    logvals = []
    for node_dict in dicts:
        var_id = node_dict['var']
        freqs = node_dict['freqs'] if 'freqs' in node_dict else None

        logvals.append(compute_smoothed_ll(obs[var_id],
                                           freqs,
                                           vars[var_id],
                                           alpha))
    print('log vals')
    print(logvals)
    assert logvals == layer_evals

    # now changing alphas
    print('\nCHANGING ALPHAS\n')
    alphas = [0., 0.1, 1., 10.]
    for alpha_new in alphas:
        print('alpha', alpha_new)
        input_layer.smooth_probs(alpha_new)
        # evaluating again
        input_layer.eval(obs)
        # getting values
        layer_evals = input_layer.node_values()
        print('layer evals')
        print(layer_evals)
        logvals = []
        for node_dict in dicts:
            var_id = node_dict['var']
            freqs = node_dict['freqs'] if 'freqs' in node_dict else None

            logvals.append(compute_smoothed_ll(obs[var_id],
                                               freqs,
                                               vars[var_id],
                                               alpha_new))
        print('logvals')
        print(logvals)
        assert_array_almost_equal(logvals, layer_evals)
Example #3
0
def build_spn_smoothed_layer(the_vars, node_dicts, the_alpha):
    input_layer = CategoricalSmoothedLayer(vars=the_vars,
                                           node_dicts=node_dicts,
                                           alpha=the_alpha)
    # print('FREQS')
    # print([node._var_freqs for node in input_layer._nodes])
    # print('PROBS')
    # print([node._var_probs for node in input_layer._nodes])

    return input_layer
Example #4
0
def test_linked_to_theano_categorical():
    vars = [2, 2, 3, 4]
    freqs = [{'var': 0, 'freqs': [1, 2]},
             {'var': 1, 'freqs': [2, 2]},
             {'var': 0, 'freqs': [3, 2]},
             {'var': 1, 'freqs': [0, 3]},
             {'var': 2, 'freqs': [1, 0, 2]},
             {'var': 3, 'freqs': [1, 2, 1, 2]},
             {'var': 3, 'freqs': [3, 4, 0, 1]}]

    # create input layer first
    input_layer = CategoricalSmoothedLayer(vars=vars,
                                           node_dicts=freqs)
    # get nodes
    ind_nodes = [node for node in input_layer.nodes()]

    root_node = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()

    sum3 = SumNode()
    sum4 = SumNode()

    # linking
    root_node.add_child(sum1)
    root_node.add_child(sum2)
    root_node.add_child(ind_nodes[0])
    root_node.add_child(ind_nodes[1])

    sum1.add_child(ind_nodes[2], 0.4)
    sum1.add_child(ind_nodes[3], 0.6)
    sum2.add_child(ind_nodes[3], 0.2)
    sum2.add_child(prod1, 0.5)
    sum2.add_child(prod2, 0.3)

    prod1.add_child(ind_nodes[4])
    prod1.add_child(sum3)
    prod1.add_child(sum4)
    prod2.add_child(sum3)
    prod2.add_child(sum4)

    sum3.add_child(ind_nodes[5], 0.5)
    sum3.add_child(ind_nodes[6], 0.5)
    sum4.add_child(ind_nodes[5], 0.4)
    sum4.add_child(ind_nodes[6], 0.6)

    # creating layers
    root_layer = ProductLayerLinked([root_node])
    sum_layer = SumLayerLinked([sum1, sum2])
    prod_layer = ProductLayerLinked([prod1, prod2])
    sum_layer2 = SumLayerLinked([sum3, sum4])

    # create the linked spn
    spn_linked = SpnLinked(input_layer=input_layer,
                           layers=[sum_layer2, prod_layer,
                                   sum_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)
Example #5
0
def test_spn_sampling():

    from collections import Counter

    from spn.factory import linked_categorical_input_to_indicators

    #
    # building a small mixture model
    features = [2, 2, 2, 2]
    n_features = len(features)

    #
    # different categorical vars groups as leaves
    input_nodes_1 = [
        CategoricalSmoothedNode(i, features[i], alpha=0.0, freqs=[0, 1])
        for i in range(n_features)
    ]

    input_nodes_2 = [
        CategoricalSmoothedNode(i, features[i], alpha=0.0, freqs=[1, 0])
        for i in range(n_features)
    ]

    input_nodes_3 = [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                             freqs=[1, 0]) for i in range(n_features // 2)] + \
        [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                 freqs=[0, 1]) for i in range(n_features // 2, n_features)]

    input_nodes_4 = [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                             freqs=[0, 1]) for i in range(n_features // 2)] + \
        [CategoricalSmoothedNode(i, features[i], alpha=0.0,
                                 freqs=[1, 0]) for i in range(n_features // 2, n_features)]

    input_layer = CategoricalSmoothedLayer(
        nodes=input_nodes_1 + input_nodes_2 + input_nodes_3 + input_nodes_4)
    #
    # one product node for each group
    prod_node_1 = ProductNode()
    for leaf in input_nodes_1:
        prod_node_1.add_child(leaf)

    prod_node_2 = ProductNode()
    for leaf in input_nodes_2:
        prod_node_2.add_child(leaf)

    prod_node_3 = ProductNode()
    for leaf in input_nodes_3:
        prod_node_3.add_child(leaf)

    prod_node_4 = ProductNode()
    for leaf in input_nodes_4:
        prod_node_4.add_child(leaf)

    prod_layer = ProductLayer(
        nodes=[prod_node_1, prod_node_2, prod_node_3, prod_node_4])

    #
    # one root as a mixture
    root = SumNode()
    root.add_child(prod_node_1, 0.5)
    root.add_child(prod_node_2, 0.1)
    root.add_child(prod_node_3, 0.2)
    root.add_child(prod_node_4, 0.2)

    root_layer = SumLayer(nodes=[root])

    spn = Spn(input_layer=input_layer, layers=[prod_layer, root_layer])

    print(spn)

    n_instances = 1000
    #
    # sampling some instances
    sample_start_t = perf_counter()
    samples = spn.sample(n_instances=n_instances, verbose=False)
    sample_end_t = perf_counter()
    print('Sampled in {} secs'.format(sample_end_t - sample_start_t))
    if n_instances < 20:
        print(samples)

    #
    # some statistics
    tuple_samples = [tuple(s) for s in samples]
    if n_instances < 20:
        print(tuple_samples)

    sample_counter = Counter(tuple_samples)
    print(sample_counter)

    #
    # transforming into an spn with indicator nodes
    print('Into indicator nodes')
    ind_start_t = perf_counter()
    spn = linked_categorical_input_to_indicators(spn)
    ind_end_t = perf_counter()
    print('Done in ', ind_end_t - ind_start_t)

    sample_start_t = perf_counter()
    samples = spn.sample(n_instances=n_instances,
                         verbose=False,
                         one_hot_encoding=True)
    sample_end_t = perf_counter()
    print('Sampled in {} secs'.format(sample_end_t - sample_start_t))
    if n_instances < 20:
        print(samples)

    #
    # some statistics
    tuple_samples = [tuple(s) for s in samples]
    if n_instances < 20:
        print(tuple_samples)

    sample_counter = Counter(tuple_samples)
    print(sample_counter)
Example #6
0
def test_linked_to_theano_categorical():
    vars = [2, 2, 3, 4]
    freqs = [{
        'var': 0,
        'freqs': [1, 2]
    }, {
        'var': 1,
        'freqs': [2, 2]
    }, {
        'var': 0,
        'freqs': [3, 2]
    }, {
        'var': 1,
        'freqs': [0, 3]
    }, {
        'var': 2,
        'freqs': [1, 0, 2]
    }, {
        'var': 3,
        'freqs': [1, 2, 1, 2]
    }, {
        'var': 3,
        'freqs': [3, 4, 0, 1]
    }]

    # create input layer first
    input_layer = CategoricalSmoothedLayer(vars=vars, node_dicts=freqs)
    # get nodes
    ind_nodes = [node for node in input_layer.nodes()]

    root_node = ProductNode()

    sum1 = SumNode()
    sum2 = SumNode()

    prod1 = ProductNode()
    prod2 = ProductNode()

    sum3 = SumNode()
    sum4 = SumNode()

    # linking
    root_node.add_child(sum1)
    root_node.add_child(sum2)
    root_node.add_child(ind_nodes[0])
    root_node.add_child(ind_nodes[1])

    sum1.add_child(ind_nodes[2], 0.4)
    sum1.add_child(ind_nodes[3], 0.6)
    sum2.add_child(ind_nodes[3], 0.2)
    sum2.add_child(prod1, 0.5)
    sum2.add_child(prod2, 0.3)

    prod1.add_child(ind_nodes[4])
    prod1.add_child(sum3)
    prod1.add_child(sum4)
    prod2.add_child(sum3)
    prod2.add_child(sum4)

    sum3.add_child(ind_nodes[5], 0.5)
    sum3.add_child(ind_nodes[6], 0.5)
    sum4.add_child(ind_nodes[5], 0.4)
    sum4.add_child(ind_nodes[6], 0.6)

    # creating layers
    root_layer = ProductLayerLinked([root_node])
    sum_layer = SumLayerLinked([sum1, sum2])
    prod_layer = ProductLayerLinked([prod1, prod2])
    sum_layer2 = SumLayerLinked([sum3, sum4])

    # create the linked spn
    spn_linked = SpnLinked(
        input_layer=input_layer,
        layers=[sum_layer2, prod_layer, sum_layer, root_layer])

    print(spn_linked)

    # converting to theano repr
    spn_theano = SpnFactory.linked_to_theano(spn_linked)
    print(spn_theano)

    # time for some inference comparison
    for instance in I:
        print('linked')
        res_l = spn_linked.eval(instance)
        print(res_l)
        print('theano')
        res_t = spn_theano.eval(instance)
        print(res_t)
        assert_array_almost_equal(res_l, res_t)