def run_experiment(dataset, top_n_features, linear=False): ds_name, words, data, train, _, statistical_type, _ = dataset data = data[:, 0:top_n_features] words = words[0:top_n_features] train = train[:, 0:top_n_features] ds_context = Context() ds_context.statistical_type = statistical_type add_domains(data, ds_context) spn = learn_mspn(train, ds_context, linear=linear, memory=memory) save_exp(spn, ds_name, top_n_features, words, data)
def run_experiment_binary(ds_file, min_instances=200, threshold=0.3): ds_name, words, data, train, _, statistical_type, _ = get_binary_data( ds_file) ds_context = Context() ds_context.statistical_type = statistical_type add_domains(data, ds_context) print("train data shape", train.shape) spn = learn_mspn(train, ds_context, min_instances_slice=min_instances, threshold=threshold, linear=True, memory=memory) print(fpga_count_ops(spn)) save_exp(spn, ds_name, min_instances, words, data)
@author: Alejandro Molina ''' from spn.algorithms import Inference from spn.algorithms.StructureLearning import learn_structure from spn.algorithms.splitting.Clustering import get_split_rows_KMeans from spn.algorithms.splitting.RDC import get_split_cols_RDC from spn.data.datasets import get_nips_data from spn.structure.Base import Context from spn.structure.leaves.Histograms import add_domains, create_histogram_leaf if __name__ == '__main__': import numpy as np ds_name, words, data, train, _, statistical_type, _ = get_nips_data() print(words) print(data) ds_context = Context() ds_context.statistical_type = np.asarray(["discrete"] * data.shape[1]) add_domains(data, ds_context) spn = learn_structure(data, ds_context, get_split_rows_KMeans(), get_split_cols_RDC(), create_histogram_leaf) # print(to_str_equation(spn, words)) print(Inference.likelihood(spn, data[0:100, :]))
"FROZEN_LAKE", D, train, test, np.asarray(words), np.asarray(["discrete"] * F), np.asarray(["categorical"] * F), ) if __name__ == "__main__": ds_name, data, train, test, words, statistical_type, distribution_family = get_RL_data( ) ds_context = Context() ds_context.statistical_type = statistical_type ds_context.distribution_family = distribution_family add_domains(data, ds_context) spn = learn(train, ds_context, min_instances_slice=100, linear=True) print(get_structure_stats(spn)) # print(to_str_ref_graph(spn, histogram_to_str)) spn_marg = marginalize(spn, set([0])) # print(to_str_equation(spn_marg, histogram_to_str)) def eval_conditional(data):