def test_naive_factorization(self): np.random.seed(17) data = np.arange(0, 1000).reshape(-1, 8) parent = Sum() parent.children.append(None) ctx = Context() ctx.feature_size = 4 scope = [1, 3, 4, 6] data2 = np.array(data) result = naive_factorization(data=data2, parent=parent, pos=0, context=ctx, scope=list(scope)) self.assertListEqual(data.tolist(), data2.tolist()) self.assertEqual(parent.children[0], result[0][1]['parent']) y, x = get_YX(data, 4) self.assertEqual(len(result), len(scope)) for i, s in enumerate(scope): r = result[i] self.assertEqual(len(r), 2) self.assertEqual(r[0], SplittingOperations.CREATE_LEAF_NODE) self.assertEqual(type(r[1]['parent']), Product) self.assertEqual(r[1]['pos'], i) self.assertListEqual(r[1]['scope'], [s]) self.assertListEqual(r[1]['data'].tolist(), concatenate_yx(y[:, i], x).tolist())
def test_conditional(self): labels = np.c_[np.zeros((500, 1)), np.ones((500, 1))] features = np.c_[ np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))] ] train_data = concatenate_yx(labels, features) ds_context = Context( parametric_types=[Bernoulli] * labels.shape[1] ).add_domains(labels) ds_context.feature_size = 2 def label_conditional(y, x): from sklearn.cluster import KMeans clusters = KMeans( n_clusters=2, random_state=17, precompute_distances=True ).fit_predict(y) return clusters spn = learn_cspn_structure( train_data, ds_context, split_rows=get_split_conditional_rows_KMeans(), split_cols=getCIGroup(), create_leaf=create_conditional_leaf, label_conditional=label_conditional, cluster_univariate=True, )
def learn_parametric_spn(data, parametric_types): from spn.algorithms.LearningWrappers import learn_parametric ds_context = Context(parametric_types=parametric_types).add_domains(data) ds_context.add_domains(data) spn = learn_parametric(data, ds_context, min_instances_slice=100, threshold=0.01) return spn
def test_leaf_mpe_bernoulli(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1) # associates y=0 with X=[10,10] # associates y=1 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 0) res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 1) res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 1) self.assertAlmostEqual(res[1, 0], 0) with self.assertRaises(AssertionError): mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
def test_optimization(self): np.random.seed(17) d1 = np.random.normal(10, 1, size=4000).tolist() d2 = np.random.normal(30, 1, size=4000).tolist() data = d1 + d2 data = np.array(data).reshape((-1, 4)) data = data.astype(np.float32) ds_context = Context(meta_types=[MetaType.REAL] * data.shape[1], parametric_types=[Gaussian] * data.shape[1]) ds_context.add_domains(data) spn = learn_parametric(data, ds_context) spn.weights = [0.8, 0.2] spn.children[0].children[0].mean = 3.0 py_ll = np.sum(log_likelihood(spn, data)) print(spn.weights, spn.children[0].children[0].mean) EM_optimization(spn, data, iterations=1000) print(spn.weights, spn.children[0].children[0].mean) py_ll_opt = np.sum(log_likelihood(spn, data)) self.assertLessEqual(py_ll, py_ll_opt) self.assertAlmostEqual(spn.weights[0], 0.5, 4) self.assertAlmostEqual(spn.weights[1], 0.5, 4) c1_mean = spn.children[0].children[0].mean c2_mean = spn.children[1].children[0].mean self.assertEqual(round(min(c1_mean, c2_mean)), 10) self.assertEqual(round(max(c1_mean, c2_mean)), 30)
def test_leaf_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array( np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) self.assertFalse(np.any(np.isnan(likelihood(leaf, data)))) self.assertGreater(get_ll(leaf, [20, 10, 10]), get_ll(leaf, [20, 1, 1])) self.assertGreater(get_ll(leaf, [60, 1, 1]), get_ll(leaf, [60, 10, 10])) self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652) self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
def test_leaf_mpe_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 5000), np.random.multivariate_normal([1, 1], np.eye(2), 5000), ), axis=0, ) y = np.array(np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1) # associates y=20 with X=[10,10] # associates y=60 with X=[1,1] data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 # leaf = create_conditional_leaf(data, ds_context, [0]) leaf = create_parametric_leaf(data, ds_context, [0]) res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 20.435226001909466) res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 59.4752193542575) res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3)) self.assertAlmostEqual(res[0, 0], 59.4752193542575) self.assertAlmostEqual(res[1, 0], 20.435226001909466) with self.assertRaises(AssertionError): mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
def test_conditional_probability(self): # test if conditional probability is correct # same spn as in entropy test # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # tests x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1) self.assertAlmostEqual(conditional_probability(spn, 2, x_instance)[0][0], 0.9) self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.48) x_instance = np.array([2, 1, 0], dtype=float).reshape(1, -1) self.assertAlmostEqual(conditional_probability(spn, 0, x_instance)[0][0], 0.36)
def test_leaf_no_variance_gaussian(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([1] * 1000).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Gaussian]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data[:, 0] = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) self.assertEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.398942280401432) data3 = np.array(data) data3[:, 0] = 3 leaf = create_conditional_leaf(data3, ds_context, [0]) l = likelihood(leaf, data) self.assertAlmostEqual(np.var(l[:, 0]), 0) self.assertAlmostEqual(l[0, 0], 0.241970724519143)
def test_leaf_categorical(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([20, 20], np.eye(2), 500), np.random.multivariate_normal([10, 10], np.eye(2), 500), np.random.multivariate_normal([1, 1], np.eye(2), 500), ), axis=0, ) y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Categorical]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x)) l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x)) l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x)) np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0) self.assertTrue(np.all(l0[1000:1500] > 0.85)) self.assertTrue(np.all(l0[0:1000] < 0.15)) self.assertTrue(np.all(l1[500:1000] > 0.85)) self.assertTrue(np.all(l1[0:500] < 0.15)) self.assertTrue(np.all(l1[1000:1500] < 0.15)) self.assertTrue(np.all(l2[0:500] > 0.85)) self.assertTrue(np.all(l2[500:15000] < 0.15))
def get_ds_context_sum(curr_train_data, scope, index, scope_index, params): """ returns the Context object of spflow to use with split_rows method while creating sum node for spmn """ n = curr_train_data.shape[1] curr_var_set_sum = params.partial_order[index:len(params.partial_order) + 1] curr_var_set_sum1 = [ var for curr_var_set in curr_var_set_sum for var in curr_var_set ] if params.util_to_bin: context = [Categorical] * n ds_context = Context( parametric_types=context, scope=scope, feature_names=curr_var_set_sum1).add_domains(curr_train_data) # utilty is meta type -- real else: if params.utility_node[0] in curr_var_set_sum1: context = [MetaType.DISCRETE] * (n - 1) context.append(MetaType.REAL) else: context = [MetaType.DISCRETE] * (n) scope = scope ds_context = Context( meta_types=context, scope=scope, feature_names=curr_var_set_sum1).add_domains(curr_train_data) return ds_context
def get_ds_context_prod(curr_train_data, scope, index, scope_index, params): """ returns the Context object of spflow to use with split_cols, learn_mspn or learn_parametric methods of spflow while creating product node for spmn """ n = curr_train_data.shape[1] scope_var = params.feature_names[scope_index:scope_index + n] context = [] # if parametric, all variables are meta type -- categorical if params.util_to_bin: context = [Categorical] * n ds_context = Context( parametric_types=context, scope=scope, feature_names=scope_var).add_domains(curr_train_data) # if mixed, utilty is meta type -- real else: if params.utility_node[0] in scope_var: context = [MetaType.DISCRETE] * (n - 1) context.append(MetaType.REAL) else: context = [MetaType.DISCRETE] * (n) scope = scope ds_context = Context( meta_types=context, scope=scope, feature_names=scope_var).add_domains(curr_train_data) return ds_context
def test_leaf_bernoulli_bootstrap(self): np.random.seed(17) x = np.concatenate( ( np.random.multivariate_normal([10, 10], np.eye(2), 100), np.random.multivariate_normal([1, 1], np.eye(2), 100), ), axis=0, ) y = np.array([1] * 100 + [0] * 100).reshape(-1, 1) data = concatenate_yx(y, x) ds_context = Context(parametric_types=[Bernoulli]) ds_context.feature_size = 2 leaf = create_conditional_leaf(data, ds_context, [0]) l = likelihood(leaf, data) neg_data = np.concatenate([1 - y, x], axis=1) lneg = likelihood(leaf, neg_data) np.testing.assert_array_almost_equal(l + lneg, 1.0) self.assertTrue(np.all(l >= 0.5)) self.assertTrue(np.all(lneg < 0.5))
def test_histogram_to_str_and_back(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) self.check_obj_and_reconstruction(hist)
def test_histogram_leaf(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) self.assertTrue( np.array_equal(mpe(hist, np.array([[np.nan]])), np.array([[3]])), "mpe should be 3")
def test_valid_histogram(self): np.random.seed(17) data = [1] + [5]*20 + [7] + [10]*50 + [20] + [30]*10 data = np.array(data).reshape((-1, 1)) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde") self.assertGreater(len(hist.bin_repr_points), 1)
def test_PWL_no_variance(self): data = np.array([1.0, 1.0]).reshape(-1, 1) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) with self.assertRaises(AssertionError): create_piecewise_leaf(data, ds_context, scope=[0], hist_source="kde")
def test_PWL_no_variance(self): data = np.array([1.0, 1.0]).reshape(-1, 1) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) leaf = create_piecewise_leaf(data, ds_context, scope=[0], hist_source="kde") prob = np.exp(log_likelihood(leaf, data)) self.assertAlmostEqual(float(prob[0]), 2 / 6) self.assertAlmostEqual(float(prob[1]), 2 / 6)
def test_we_score(self): # test if we_score is correct """ # explain how training data and the spn comes # number of RVs M = 3 # table of probabilities p1 = 0.6 p2 = 0.3 p31 = 0.1 p32 = 0.9 # generate x1 and x2 x1 = np.random.binomial(1, p1, size=N) + np.random.binomial(1, p1, size=N) x2 = np.random.binomial(1, p2, size=N) x3 = np.zeros(N) # generate x3 for i in range(N): if x2[i] == 1: x3[i] = np.random.binomial(1, p31, size=1) else: x3[i] = np.random.binomial(1, p32, size=1) # form a matrix, rows are instances and columns are RVs train_data = np.concatenate((x1, x2, x3)).reshape((M, N)).transpose() """ # only for generating the ds_context train_data = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [2.0, 0.0, 1.0]]) # spn ds_context = Context(meta_types=[MetaType.DISCRETE] * 3) ds_context.add_domains(train_data) ds_context.parametric_type = [Categorical] * 3 spn = 0.64 * ( ( Categorical(p=[0.25, 0.75, 0.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) + 0.36 * ( ( Categorical(p=[0.0, 0.0, 1.0], scope=0) * ( 0.34 * ((Categorical(p=[7 / 34, 27 / 34], scope=1) * Categorical(p=[1.0, 0.0], scope=2))) + 0.66 * ((Categorical(p=[21 / 22, 1 / 22], scope=1) * Categorical(p=[0.0, 1.0], scope=2))) ) ) ) # test n = 40000 x_instance = np.array([1, 1, 0], dtype=float).reshape(1, -1) y_index = 0 we = weight_of_evidence(spn, 0, x_instance, n, ds_context.domains[y_index].shape[0]) we_true = np.array([[np.nan, 0, 0]]) we = we[~np.isnan(we)] we_true = we_true[~np.isnan(we_true)] self.assertTrue((we == we_true).all())
def test_create_conditional(self): np.random.seed(17) data = np.arange(0, 1000).reshape(-1, 8) parent = Sum() parent.children.append(None) ctx = Context() ctx.feature_size = 4 scope = [1, 3, 4, 6] data2 = np.array(data) K = int(data.shape[0] * 0.25) split_idx = np.array([0] * K + [1] * (data.shape[0] - K)) np.random.shuffle(split_idx) y, x = get_YX(data, 4) def label_conditional(local_y, local_x): self.assertListEqual(local_y.tolist(), y.tolist()) self.assertListEqual(local_x.tolist(), x.tolist()) return split_idx result = create_conditional(data=data2, parent=parent, pos=0, context=ctx, scope=list(scope), label_conditional=label_conditional) self.assertListEqual(data.tolist(), data2.tolist()) self.assertEqual(len(result), 2) for i, r in enumerate(result): self.assertEqual(r[0], SplittingOperations.GET_NEXT_OP) self.assertIn('data', r[1]) self.assertEqual(parent.children[0], r[1]['parent']) self.assertEqual(r[1]['pos'], i) self.assertListEqual(scope, r[1]['scope']) self.assertEqual(r[1]['data'].shape[1], data.shape[1]) conditional_node = result[0][1]['parent'] child_idx = conditional_supervised_likelihood( conditional_node, [np.zeros((data.shape[0], 1)), np.ones((data.shape[0], 1))], data) self.assertListEqual(result[0][1]['data'].tolist(), data[child_idx[:, 0] == 0, :].tolist()) self.assertListEqual(result[1][1]['data'].tolist(), data[child_idx[:, 0] == 1, :].tolist())
def run_experiment(dataset, top_n_features, linear=False): ds_name, words, data, train, _, statistical_type, _ = dataset data = data[:, 0:top_n_features] words = words[0:top_n_features] train = train[:, 0:top_n_features] ds_context = Context() ds_context.statistical_type = statistical_type add_domains(data, ds_context) spn = learn_mspn(train, ds_context, linear=linear, memory=memory) save_exp(spn, ds_name, top_n_features, words, data)
def test_Histogram_discrete_inference(self): data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False) prob = np.exp(log_likelihood(hist, data)) self.assertAlmostEqual(float(prob[0]), 2 / 6) self.assertAlmostEqual(float(prob[1]), 2 / 6) self.assertAlmostEqual(float(prob[2]), 1 / 6) self.assertAlmostEqual(float(prob[3]), 3 / 6) self.assertAlmostEqual(float(prob[4]), 3 / 6) self.assertAlmostEqual(float(prob[5]), 3 / 6) data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1) ds_context = Context([MetaType.DISCRETE]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=True) # print(np.var(data.shape[0])) prob = np.exp(log_likelihood(hist, data)) self.assertAlmostEqual(float(prob[0]), 3 / 9) self.assertAlmostEqual(float(prob[1]), 3 / 9) self.assertAlmostEqual(float(prob[2]), 2 / 9) self.assertAlmostEqual(float(prob[3]), 4 / 9) self.assertAlmostEqual(float(prob[4]), 4 / 9) self.assertAlmostEqual(float(prob[5]), 4 / 9)
def test_PWL(self): #data = np.array([1.0, 1.0, 2.0, 3.0]*100).reshape(-1, 1) data = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) leaf = create_piecewise_leaf(data, ds_context, scope=[0], prior_weight=None, hist_source="kde") prob = np.exp(log_likelihood(leaf, data))
def test_create_sum_with_split(self): np.random.seed(17) data = np.arange(0, 1000).reshape(-1, 8) parent = Sum() parent.children.append(None) ctx = Context() ctx.feature_size = 4 scope = [1, 3, 4, 6] data2 = np.array(data) K = int(data.shape[0] * 0.25) split_idx = np.array([0] * K + [1] * (data.shape[0] - K)) np.random.shuffle(split_idx) def split_rows(data, context, scope): result = [] result.append((data[split_idx == 0, :], scope, 0.25)) result.append((data[split_idx == 1, :], scope, 0.75)) return result result = create_sum(data=data2, parent=parent, pos=0, context=ctx, scope=list(scope), split_rows=split_rows, split_on_sum=True) self.assertListEqual(data.tolist(), data2.tolist()) self.assertEqual(len(result), 2) for i, r in enumerate(result): self.assertEqual(r[0], SplittingOperations.GET_NEXT_OP) self.assertIn('data', r[1]) self.assertEqual(parent.children[0], r[1]['parent']) self.assertEqual(r[1]['pos'], i) self.assertListEqual(scope, r[1]['scope']) self.assertEqual(r[1]['data'].shape[1], data.shape[1]) self.assertEqual(r[1]['data'].shape[0], int(np.sum(split_idx == i))) self.assertListEqual(result[0][1]['data'].tolist(), data[split_idx == 0, :].tolist()) self.assertListEqual(result[1][1]['data'].tolist(), data[split_idx == 1, :].tolist()) self.assertAlmostEqual(np.sum(parent.children[0].weights), 1.0)
def test_mixture_gaussians(self): np.random.seed(17) data = np.random.normal(10, 1, size=200).tolist() + np.random.normal(30, 1, size=200).tolist() data = np.array(data).reshape((-1, 1)) ds_context = Context([MetaType.REAL]) ds_context.add_domains(data) hist = create_histogram_leaf(data, ds_context, [0], alpha=False, hist_source="kde") x = np.linspace(0, 60, 1000).tolist() + data[:, 0].tolist() x = np.sort(x) from scipy.stats import norm y = 0.5 * norm.pdf(x, 10, 1) + 0.5 * norm.pdf(x, 30, 1) ye = likelihood(hist, x.reshape((-1, 1))) error = np.sum(np.abs(ye[:, 0] - y)) # print(error) self.assertLessEqual(error, 7)
def get_ds_context(data, scope, params): """ :param data: numpy array of data for Context object :param scope: scope of data :param params: params of SPMN :return: Context object of SPFlow """ num_of_variables = data.shape[1] scope_var = np.array(params.feature_names)[scope].tolist() ds_context = Context(meta_types=[params.meta_types[i] for i in scope], scope=scope, feature_names=scope_var) ds_context.add_domains(data) return ds_context
def learn_PSPN(): import numpy as np np.random.seed(123) a = np.random.randint(2, size=1000).reshape(-1, 1) b = np.random.randint(3, size=1000).reshape(-1, 1) c = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] d = 5 * a + 3 * b + c train_data = np.c_[a, b, c, d] from spn.structure.Base import Context from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian ds_context = Context( parametric_types=[Categorical, Categorical, Gaussian, Gaussian ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_parametric spn = learn_parametric(train_data, ds_context, min_instances_slice=20) from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(spn))
def learn_MSPN(): import numpy as np np.random.seed(123) a = np.random.randint(2, size=1000).reshape(-1, 1) b = np.random.randint(3, size=1000).reshape(-1, 1) c = np.r_[np.random.normal(10, 5, (300, 1)), np.random.normal(20, 10, (700, 1))] d = 5 * a + 3 * b + c train_data = np.c_[a, b, c, d] from spn.structure.Base import Context from spn.structure.StatisticalTypes import MetaType ds_context = Context(meta_types=[ MetaType.DISCRETE, MetaType.DISCRETE, MetaType.REAL, MetaType.REAL ]).add_domains(train_data) from spn.algorithms.LearningWrappers import learn_mspn mspn = learn_mspn(train_data, ds_context, min_instances_slice=20) from spn.algorithms.Statistics import get_structure_stats print(get_structure_stats(mspn))
def learn_whittle_spn_2d(train_data, n_RV, n_min_slice, init_scope=None): from spn.structure.leaves.parametric.Parametric import MultivariateGaussian # learn spn ds_context = Context(parametric_types=[MultivariateGaussian] * n_RV).add_domains(train_data) print('learning WSPN') # need to pair RVs # need flag for 2d? l_rfft = get_l_rfft(args) # l_rfft!=None --> 2d/pair gaussian node, is_2d=True --> pairwise gaussian, full covariance matrix wspn = learn_parametric(train_data, ds_context, min_instances_slice=n_min_slice, threshold=args.threshold, initial_scope=init_scope, cpus=1, l_rfft=l_rfft, is_2d=True) save_path = get_save_path(args) check_path(save_path) f = open(save_path + 'wspn_2d.pkl', 'wb') pickle.dump(wspn, f) f.close() return wspn
def test_learn(self): from sklearn.datasets import load_iris iris = load_iris() X = iris.data y = iris.target.reshape(-1, 1) train_data = np.hstack((X, y)) from spn.algorithms.LearningWrappers import learn_parametric, learn_classifier from spn.structure.leaves.parametric.Parametric import Categorical, MultivariateGaussian from spn.structure.Base import Context spn_classification = learn_parametric( train_data, Context( parametric_types=[ MultivariateGaussian, MultivariateGaussian, MultivariateGaussian, MultivariateGaussian, Categorical, ] ).add_domains(train_data), multivariate_leaf=True, )