def save_exp(spn, ds_name, size, words, data): print(get_structure_stats(spn)) path = os.path.dirname(__file__) outprefix = path + "/spns/%s_%s/" % (ds_name, size) if not os.path.exists(outprefix): os.makedirs(outprefix) with open(outprefix + "eqq.txt", "w") as text_file: print(spn_to_str_equation(spn, words), file=text_file) with open(outprefix + "spn.txt", "w") as text_file: print(spn_to_str_ref_graph(spn, words), file=text_file) with codecs.open(outprefix + "spn.json", "w", "utf-8-sig") as text_file: text_file.write(to_JSON(spn)) with codecs.open(outprefix + "stats.txt", "w", "utf-8-sig") as text_file: text_file.write(get_structure_stats(spn)) text_file.write("\n") text_file.write("ads=%s \t muls=%s\n" % fpga_count_ops(spn)) np.savetxt(outprefix + "all_data.txt", data, delimiter=";", header=";".join(words))
def learn_CNET(): import numpy as np np.random.seed(123) train_data = np.random.binomial(1, [0.1, 0.2, 0.3, 0.4], size=(1000, 4)) print(np.mean(train_data, axis=0)) from spn.structure.leaves.cltree.CLTree import create_cltree_leaf from spn.structure.Base import Context from spn.structure.leaves.parametric.Parametric import Bernoulli ds_context = Context( parametric_types=[Bernoulli, Bernoulli, Bernoulli, Bernoulli ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_parametric, learn_cnet cnet_naive_mle = learn_cnet(train_data, ds_context, cond="naive_mle", min_instances_slice=20, min_features_slice=1) cnet_random = learn_cnet(train_data, ds_context, cond="random", min_instances_slice=20, min_features_slice=1) from spn.algorithms.Statistics import get_structure_stats from spn.io.Text import spn_to_str_equation from spn.algorithms.Inference import log_likelihood print(get_structure_stats(cnet_naive_mle)) print(spn_to_str_equation(cnet_naive_mle)) ll = log_likelihood(cnet_naive_mle, train_data) print(np.mean(ll)) print(get_structure_stats(cnet_random)) print(spn_to_str_equation(cnet_random)) ll = log_likelihood(cnet_random, train_data) print(np.mean(ll)) from spn.algorithms.MPE import mpe train_data_mpe = train_data.astype(float) train_data_mpe[:, 0] = np.nan print(mpe(cnet_random, train_data_mpe)[:30]) ll = log_likelihood(cnet_random, train_data_mpe) print(np.mean(ll))
def learn_PSPN(): import numpy as np np.random.seed(123) a = np.random.randint(2, size=1000).reshape(-1, 1) b = np.random.randint(3, size=1000).reshape(-1, 1) c = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] d = 5 * a + 3 * b + c train_data = np.c_[a, b, c, d] from spn.structure.Base import Context from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian ds_context = Context( parametric_types=[Categorical, Categorical, Gaussian, Gaussian ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_parametric spn = learn_parametric(train_data, ds_context, min_instances_slice=20) from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(spn))
def learn_MSPN(): import numpy as np np.random.seed(123) a = np.random.randint(2, size=1000).reshape(-1, 1) b = np.random.randint(3, size=1000).reshape(-1, 1) c = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] d = 5 * a + 3 * b + c train_data = np.c_[a, b, c, d] from spn.structure.Base import Context from spn.structure.StatisticalTypes import MetaType ds_context = Context(meta_types=[ MetaType.DISCRETE, MetaType.DISCRETE, MetaType.REAL, MetaType.REAL ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_mspn mspn = learn_mspn(train_data, ds_context, min_instances_slice=20) from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(mspn))
def load_whittle_spn_1d(ARGS): save_path = get_save_path(ARGS) f = open(save_path + 'wspn_1d.pkl', 'rb') spn = pickle.load(f) f.close() log_msg = get_structure_stats(spn) print(log_msg) logger.info(log_msg) return spn
def learn_CLTSPN(): import numpy as np np.random.seed(123) train_data = np.random.binomial( 1, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1], size=(100, 10)) print(np.mean(train_data, axis=0)) from spn.structure.leaves.cltree.CLTree import create_cltree_leaf from spn.structure.Base import Context from spn.structure.leaves.parametric.Parametric import Bernoulli ds_context = Context(parametric_types=[ Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, Bernoulli, ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_parametric spn = learn_parametric( train_data, ds_context, min_instances_slice=20, min_features_slice=1, multivariate_leaf=True, leaves=create_cltree_leaf, ) from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(spn)) from spn.io.Text import spn_to_str_equation print(spn_to_str_equation(spn)) from spn.algorithms.Inference import log_likelihood ll = log_likelihood(spn, train_data) print(np.mean(ll))
def load_whittle_spn_res(args): # load res-spn, need to be modified when model changed log_msg = 'Have you set the latest model path?' print(log_msg) logger.info(log_msg) rspn_path = 'ventola/em_optimized_fuse_spn_yu_sine' f = open(rspn_path, 'rb') rspn = pickle.load(f) f.close() log_msg = get_structure_stats(rspn) print(log_msg) logger.info(log_msg) return rspn
def test_bernoulli_spn_ll(self): train_data = get_binary_data("dna")[3] train_data = train_data[:, 0:3] ds_context = Context(parametric_types=[Bernoulli] * 3, feature_names=["x0", "x1", "x2"]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_parametric spn = learn_parametric(train_data, ds_context, min_instances_slice=1500) print(get_structure_stats(spn)) sympyecc = spn_to_sympy(spn) print(sympyecc)
def stats(): spn = create_SPN() from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(spn))
start = time.perf_counter() rg = RegionGraph(range(28 * 28)) for _ in range(0, 2): # for _ in range(0, 20): rg.random_split(2, 2) rg_layers = rg.make_layers() print("random graph built in ", (time.perf_counter() - start)) start = time.perf_counter() vector_list, root = Make_SPN_from_RegionGraph(rg_layers, np.random.RandomState(100), num_classes=1, num_gauss=20, num_sums=20) print("Make_SPN_from_RegionGraph in ", (time.perf_counter() - start)) start = time.perf_counter() print(get_structure_stats(root)) print("get_structure_stats in ", (time.perf_counter() - start)) old_root = Copy(root) start = time.perf_counter() root = Prune(root) print("Prune in ", (time.perf_counter() - start)) start = time.perf_counter() root = SPN_Reshape(root, 2) print("SPN_Reshape in ", (time.perf_counter() - start)) start = time.perf_counter() print(get_structure_stats(root)) print("get_structure_stats in ", (time.perf_counter() - start))
np.asarray(["categorical"] * F), ) if __name__ == "__main__": ds_name, data, train, test, words, statistical_type, distribution_family = get_RL_data( ) ds_context = Context() ds_context.statistical_type = statistical_type ds_context.distribution_family = distribution_family add_domains(data, ds_context) spn = learn(train, ds_context, min_instances_slice=100, linear=True) print(get_structure_stats(spn)) # print(to_str_ref_graph(spn, histogram_to_str)) spn_marg = marginalize(spn, set([0])) # print(to_str_equation(spn_marg, histogram_to_str)) def eval_conditional(data): return conditional_log_likelihood(spn, spn_marg, data, histogram_likelihood) print(eval_conditional(train[0, :].reshape(1, -1))) import dill
print('\033[1mStart SPN training...\033[0m') start_time = time.time() spn = learn_classifier(data=train_data, ds_context=context, spn_learn_wrapper=learn_parametric, label_idx=label_idx, min_instances_slice=min_instances_slice, threshold=threshold, cpus=num_threads) duration = time.time() - start_time print('\033[1mFinished training after %.3f sec.\033[0m' % duration) # Model performance evaluation spn_stats = get_structure_stats(spn) print(spn_stats, end="") stats_file = open(plot_path + "/spn_stats.txt", "w+") stats_file.write(spn_stats) # plot_spn(spn, plot_path + "/spn_struct.pdf") (predicted_train_labels, correct_train_preds), (predicted_test_labels, correct_test_preds) = \ evaluate_spn_performance(spn, train_samples, train_labels, test_samples, test_labels, label_idx, stats_file) # Save metadata into stats file metadata = "\nSeed: %d\n" % seed + \ "Test sample ID: %d\n" % t + \ "Noisy dataset: %r\n" % noisy_dataset + \ "Minimum instances per slice: %d\n" % min_instances_slice + \ "Alpha (threshold): %f\n" % threshold + \ "Type of loss: %s\n" % type_of_loss + \
rg.random_split(2, 2) rg_layers = rg.make_layers() logger.info("random graph built in %s", (time.perf_counter() - start)) start = time.perf_counter() vector_list, root = Make_SPN_from_RegionGraph(rg_layers, np.random.RandomState(100), num_classes=1, num_gauss=20, num_sums=20) logger.info("Make_SPN_from_RegionGraph in %s", (time.perf_counter() - start)) start = time.perf_counter() logger.info(get_structure_stats(root)) logger.info("get_structure_stats in %s", (time.perf_counter() - start)) old_root = Copy(root) start = time.perf_counter() root = Prune(root) logger.info("Prune in %s", (time.perf_counter() - start)) start = time.perf_counter() root = SPN_Reshape(root, 2) logger.info("SPN_Reshape in %s", (time.perf_counter() - start)) start = time.perf_counter() logger.info(get_structure_stats(root)) logger.info("get_structure_stats in %s", (time.perf_counter() - start))