def test_random_spn_em(): print('Loading datasets') train, valid, test = dataset.load_train_val_test_csvs('nltcs') n_instances = train.shape[0] n_test_instances = test.shape[0] # estimating the frequencies for the features print('Estimating features') freqs, features = dataset.data_2_freqs(train) n_layers = 2 n_max_children = 4 n_scope_children = 5 max_scope_split = 3 merge_prob = 0.5 print('Build random spn') spn = SpnFactory.linked_random_spn_top_down(features, n_layers, n_max_children, n_scope_children, max_scope_split, merge_prob) assert spn.is_valid() print('Stats\n') print(spn.stats()) spn.fit_em(train, valid, test, hard=False, n_epochs=10)
def test_linked_random_spn_top_down(): # number small parameters n_levels = 10 vars = [2, 3, 2, 2, 4] n_max_children = 2 n_scope_children = 3 max_scope_split = 2 merge_prob = 0.5 # building it print('creating random spn') rand_gen = random.Random(789) # # doing this for more than once n_times = 10 for _i in range(n_times): spn = SpnFactory.linked_random_spn_top_down(vars, n_levels, n_max_children, n_scope_children, max_scope_split, merge_prob, rand_gen=rand_gen) # printing for comparison print(spn) print(spn.stats()) assert spn.is_valid() # translating to theano representation theano_spn = SpnFactory.linked_to_theano(spn) print(theano_spn) print(theano_spn.stats()) # # looking for the same computations # time for some inference comparison for instance in II: print('linked') res_l = spn.eval(instance) print(res_l) print('theano') res_t = theano_spn.eval(instance) print(res_t) assert_array_almost_equal(res_l, res_t)
def test_linked_random_spn_top_down(): # number small parameters n_levels = 10 vars = [2, 3, 2, 2, 4] n_max_children = 2 n_scope_children = 3 max_scope_split = 2 merge_prob = 0.5 # building it print('creating random spn') rand_gen = random.Random(789) # # doing this for more than once n_times = 10 for _i in range(n_times): spn = SpnFactory.linked_random_spn_top_down(vars, n_levels, n_max_children, n_scope_children, max_scope_split, merge_prob, rand_gen=rand_gen) # printing for comparison print(spn) print(spn.stats()) assert spn.is_valid() # translating to theano representation theano_spn = SpnFactory.linked_to_theano(spn) print(theano_spn) print(theano_spn.stats()) # # looking for the same computations # time for some inference comparison for instance in II: print('linked') res_l = spn.eval(instance) print(res_l) print('theano') res_t = theano_spn.eval(instance) print(res_t) assert_array_almost_equal(res_l, res_t)
def test_random_spn_sgd(): print('Loading datasets') train, valid, test = dataset.load_train_val_test_csvs('nltcs') n_instances = train.shape[0] n_test_instances = test.shape[0] # estimating the frequencies for the features print('Estimating features') freqs, features = dataset.data_2_freqs(train) n_layers = 1 n_max_children = 2000 n_scope_children = 2000 max_scope_split = -1 merge_prob = 0.5 seed = 1337 rand_gen = random.Random(seed) print('Build random spn') spn = SpnFactory.linked_random_spn_top_down(features, n_layers, n_max_children, n_scope_children, max_scope_split, merge_prob, rand_gen=rand_gen) assert spn.is_valid() print('Stats\n') print(spn.stats()) np_rand_gen = numpy.random.RandomState(seed) spn.fit_sgd(train, valid, test, learning_rate=0.2, n_epochs=10, batch_size=1, grad_method=1, validation_frequency=100, rand_gen=np_rand_gen, hard=False)
def test_build_theanok_spn_from_block_linked_top_rand(): data = numpy.array([[1, 1, 0, 1, 0], [0, 1, 1, 1, 1], [1, 0, 0, 0, 0], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [1, 0, 0, 0, 0], [1, 0, 1, 1, 1], [0, 0, 0, 0, 1]]) # number small parameters n_levels = 10 # n_levels = 3 vars = [2, 2, 2, 2, 2] n_max_children = 2 n_scope_children = 3 max_scope_split = 2 merge_prob = 0.5 ind_data = dataset.one_hot_encoding(data, feature_values=vars) # log_ind_data = numpy.clip(numpy.log(ind_data), LOG_ZERO, 0) # building it print('creating random spn') rand_gen = random.Random(789) # # doing this for more than once n_times = 10 for i in range(n_times): print('\n\n******* Trial {}/{} *******\n'.format(i + 1, n_times)) spn = SpnFactory.linked_random_spn_top_down(vars, n_levels, n_max_children, n_scope_children, max_scope_split, merge_prob, rand_gen=rand_gen) # printing for comparison print(spn) print(spn.stats()) assert spn.is_valid() max_nodes = 2 # # translating to theanok representation theano_spn = build_theanok_spn_from_block_linked_top( spn, ind_data.shape[1], vars, max_n_edges_layer=max_nodes) # for l in theano_spn.layers: # print(l.id) # l.build() # l.compile() print(theano_spn) res = spn.eval(data.T) print('Linked Spn res', res) # log_data = numpy.clip(numpy.log(ind_data), LOG_ZERO, 0) # t_res = theano_spn.evaluate(log_data) t_res = theano_spn.evaluate(ind_data) print('Theano Spn res', t_res) assert_array_almost_equal(numpy.array(res), numpy.array(t_res)) # # evaluate batch batch_preds = evaluate_on_dataset_batch(theano_spn, ind_data) print('Theano batch res', batch_preds) assert_array_almost_equal( numpy.array(res).flatten(), numpy.array(batch_preds)) batch_size = 3 minibatch_preds = evaluate_on_dataset_batch(theano_spn, ind_data, batch_size) print('Theano mini batch res', minibatch_preds) assert_array_almost_equal( numpy.array(res).flatten(), numpy.array(minibatch_preds))