예제 #1
0
def test_random_spn_em():
    print('Loading datasets')
    train, valid, test = dataset.load_train_val_test_csvs('nltcs')
    n_instances = train.shape[0]
    n_test_instances = test.shape[0]
    # estimating the frequencies for the features
    print('Estimating features')
    freqs, features = dataset.data_2_freqs(train)

    n_layers = 2
    n_max_children = 4
    n_scope_children = 5
    max_scope_split = 3
    merge_prob = 0.5
    print('Build random spn')
    spn = SpnFactory.linked_random_spn_top_down(features, n_layers,
                                                n_max_children,
                                                n_scope_children,
                                                max_scope_split, merge_prob)

    assert spn.is_valid()
    print('Stats\n')
    print(spn.stats())

    spn.fit_em(train, valid, test, hard=False, n_epochs=10)
예제 #2
0
def test_linked_random_spn_top_down():
    # number small parameters
    n_levels = 10
    vars = [2, 3, 2, 2, 4]
    n_max_children = 2
    n_scope_children = 3
    max_scope_split = 2
    merge_prob = 0.5

    # building it
    print('creating random spn')
    rand_gen = random.Random(789)

    #
    # doing this for more than once
    n_times = 10
    for _i in range(n_times):
        spn = SpnFactory.linked_random_spn_top_down(vars,
                                                    n_levels,
                                                    n_max_children,
                                                    n_scope_children,
                                                    max_scope_split,
                                                    merge_prob,
                                                    rand_gen=rand_gen)

        # printing for comparison
        print(spn)
        print(spn.stats())
        assert spn.is_valid()

        # translating to theano representation
        theano_spn = SpnFactory.linked_to_theano(spn)

        print(theano_spn)
        print(theano_spn.stats())

        #
        # looking for the same computations
        # time for some inference comparison
        for instance in II:
            print('linked')
            res_l = spn.eval(instance)
            print(res_l)
            print('theano')
            res_t = theano_spn.eval(instance)
            print(res_t)
            assert_array_almost_equal(res_l, res_t)
예제 #3
0
def test_linked_random_spn_top_down():
    # number small parameters
    n_levels = 10
    vars = [2, 3, 2, 2, 4]
    n_max_children = 2
    n_scope_children = 3
    max_scope_split = 2
    merge_prob = 0.5

    # building it
    print('creating random spn')
    rand_gen = random.Random(789)

    #
    # doing this for more than once
    n_times = 10
    for _i in range(n_times):
        spn = SpnFactory.linked_random_spn_top_down(vars,
                                                    n_levels,
                                                    n_max_children,
                                                    n_scope_children,
                                                    max_scope_split,
                                                    merge_prob,
                                                    rand_gen=rand_gen)

        # printing for comparison
        print(spn)
        print(spn.stats())
        assert spn.is_valid()

        # translating to theano representation
        theano_spn = SpnFactory.linked_to_theano(spn)

        print(theano_spn)
        print(theano_spn.stats())

        #
        # looking for the same computations
        # time for some inference comparison
        for instance in II:
            print('linked')
            res_l = spn.eval(instance)
            print(res_l)
            print('theano')
            res_t = theano_spn.eval(instance)
            print(res_t)
            assert_array_almost_equal(res_l, res_t)
예제 #4
0
def test_random_spn_sgd():
    print('Loading datasets')
    train, valid, test = dataset.load_train_val_test_csvs('nltcs')
    n_instances = train.shape[0]
    n_test_instances = test.shape[0]
    # estimating the frequencies for the features
    print('Estimating features')
    freqs, features = dataset.data_2_freqs(train)

    n_layers = 1
    n_max_children = 2000
    n_scope_children = 2000
    max_scope_split = -1
    merge_prob = 0.5
    seed = 1337
    rand_gen = random.Random(seed)

    print('Build random spn')
    spn = SpnFactory.linked_random_spn_top_down(features,
                                                n_layers,
                                                n_max_children,
                                                n_scope_children,
                                                max_scope_split,
                                                merge_prob,
                                                rand_gen=rand_gen)

    assert spn.is_valid()
    print('Stats\n')
    print(spn.stats())

    np_rand_gen = numpy.random.RandomState(seed)

    spn.fit_sgd(train,
                valid,
                test,
                learning_rate=0.2,
                n_epochs=10,
                batch_size=1,
                grad_method=1,
                validation_frequency=100,
                rand_gen=np_rand_gen,
                hard=False)
예제 #5
0
def test_build_theanok_spn_from_block_linked_top_rand():

    data = numpy.array([[1, 1, 0, 1, 0], [0, 1, 1, 1, 1], [1, 0, 0, 0, 0],
                        [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [1, 0, 0, 0, 0],
                        [1, 0, 1, 1, 1], [0, 0, 0, 0, 1]])

    # number small parameters
    n_levels = 10
    # n_levels = 3
    vars = [2, 2, 2, 2, 2]
    n_max_children = 2
    n_scope_children = 3
    max_scope_split = 2
    merge_prob = 0.5

    ind_data = dataset.one_hot_encoding(data, feature_values=vars)
    # log_ind_data = numpy.clip(numpy.log(ind_data), LOG_ZERO, 0)

    # building it
    print('creating random spn')
    rand_gen = random.Random(789)

    #
    # doing this for more than once
    n_times = 10
    for i in range(n_times):

        print('\n\n******* Trial {}/{} *******\n'.format(i + 1, n_times))
        spn = SpnFactory.linked_random_spn_top_down(vars,
                                                    n_levels,
                                                    n_max_children,
                                                    n_scope_children,
                                                    max_scope_split,
                                                    merge_prob,
                                                    rand_gen=rand_gen)

        # printing for comparison
        print(spn)
        print(spn.stats())
        assert spn.is_valid()

        max_nodes = 2
        # # translating to theanok representation
        theano_spn = build_theanok_spn_from_block_linked_top(
            spn, ind_data.shape[1], vars, max_n_edges_layer=max_nodes)
        # for l in theano_spn.layers:
        #     print(l.id)
        #     l.build()
        #     l.compile()

        print(theano_spn)

        res = spn.eval(data.T)
        print('Linked Spn res', res)
        # log_data = numpy.clip(numpy.log(ind_data), LOG_ZERO, 0)
        # t_res = theano_spn.evaluate(log_data)

        t_res = theano_spn.evaluate(ind_data)
        print('Theano Spn res', t_res)

        assert_array_almost_equal(numpy.array(res), numpy.array(t_res))

        #
        # evaluate batch
        batch_preds = evaluate_on_dataset_batch(theano_spn, ind_data)
        print('Theano batch res', batch_preds)
        assert_array_almost_equal(
            numpy.array(res).flatten(), numpy.array(batch_preds))

        batch_size = 3
        minibatch_preds = evaluate_on_dataset_batch(theano_spn, ind_data,
                                                    batch_size)
        print('Theano mini batch res', minibatch_preds)
        assert_array_almost_equal(
            numpy.array(res).flatten(), numpy.array(minibatch_preds))