Exemple #1
0
    def test_leaf_mpe_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        # leaf = create_conditional_leaf(data, ds_context, [0])
        leaf = create_parametric_leaf(data, ds_context, [0])

        res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 20.435226001909466)

        res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 59.4752193542575)

        res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 59.4752193542575)
        self.assertAlmostEqual(res[1, 0], 20.435226001909466)

        with self.assertRaises(AssertionError):
            mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Exemple #2
0
    def test_leaf_mpe_bernoulli(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1)

        # associates y=0 with X=[10,10]
        # associates y=1 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 0)

        res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)

        res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)
        self.assertAlmostEqual(res[1, 0], 0)

        with self.assertRaises(AssertionError):
            mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Exemple #3
0
    def test_leaf_mpe_conditional(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1)

        # associates y=0 with X=[10,10]
        # associates y=1 with X=[1,1]

        data = concatenate_yx(y, x)

        cspn = CSPNClassifier([Bernoulli] * y.shape[1], min_instances_slice=4990, cluster_univariate=True)
        cspn.fit(x, y)

        res = mpe(cspn.cspn, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 0)

        res = mpe(cspn.cspn, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)

        res = mpe(cspn.cspn, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)
        self.assertAlmostEqual(res[1, 0], 0)

        with self.assertRaises(AssertionError):
            mpe(cspn.cspn, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Exemple #4
0
    def test_induced_trees(self):
        spn = 0.5 * (Gaussian(mean=10, stdev=1, scope=0) * Categorical(p=[1.0, 0], scope=1)) + \
              0.5 * (Gaussian(mean=50, stdev=1, scope=0) * Categorical(p=[0, 1.0], scope=1))

        data = np.zeros((2, 2))

        data[1, 1] = 1

        data[:, 0] = np.nan

        mpe(spn, data)

        self.assertAlmostEqual(data[0, 0], 10)
        self.assertAlmostEqual(data[1, 0], 50)
Exemple #5
0
    def predict(self, X):
        """
        Make a prediction of the given data.

        Parameters
        ----------
        X : np.ndarray
            Test data

        Returns
        -------
        np.ndarray
            Label predictions for the given test data
        """
        # Check is fit had been called
        check_is_fitted(self, ["X_", "y_"])

        # Input validation
        X = check_array(X)

        # Classify
        n_test = X.shape[0]
        y_empty = np.full((n_test, 1), fill_value=np.nan)
        data = np.c_[X, y_empty]
        data_filled = mpe(self._spn, data)
        y_pred = data_filled[:, -1]

        return y_pred
Exemple #6
0
    def test_piecewise_leaf(self):
        piecewise1 = PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0])
        piecewise2 = PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])
        self.assertTrue(is_valid(piecewise1))
        self.assertTrue(is_valid(piecewise2))

        self.assertTrue(
            np.array_equal(mpe(piecewise1, np.array([[np.nan]])),
                           np.array([[1]])), "mpe should be 1")

        self.assertTrue(
            np.array_equal(mpe(piecewise2, np.array([[np.nan]])),
                           np.array([[-1]])), "mpe should be -1")

        with self.assertRaises(AssertionError) as error:
            mpe(piecewise1, np.array([[1]]))
Exemple #7
0
def classification():
    import numpy as np
    np.random.seed(123)
    train_data = np.c_[np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))],
                       np.r_[np.zeros((500, 1)), np.ones((500, 1))]]

    centers = [[5, 5], [10, 10]]

    import matplotlib.pyplot as plt
    colors = ['#bda36b', '#7aaab4']
    plt.figure()
    # plt.hold(True)
    for k, col in zip(range(2), colors):
        my_members = train_data[:, 2] == k
        plt.plot(train_data[my_members, 0], train_data[my_members, 1], 'w', markerfacecolor=col, marker='.')
        plt.plot(centers[k][0], centers[k][1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6)
    plt.title('Training Data')
    plt.grid(True)
    plt.savefig("classification_training_data.png", bbox_inches='tight', pad_inches=0)

    from spn.algorithms.LearningWrappers import learn_parametric, learn_classifier
    from spn.structure.leaves.parametric.Parametric import Categorical, Gaussian
    from spn.structure.Base import Context
    spn_classification = learn_classifier(train_data,
                                          Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains(
                                              train_data),
                                          learn_parametric, 2)

    test_classification = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3)
    print(test_classification)
    from spn.algorithms.MPE import mpe
    print(mpe(spn_classification, test_classification))
Exemple #8
0
    def test_correct_parameters(self):
        node_1_2_2 = Leaf(0)
        node_1_2_1 = Leaf(1)
        node_1_1 = Leaf([0, 1])
        node_1_2 = node_1_2_1 * node_1_2_2
        spn = 0.1 * node_1_1 + 0.9 * node_1_2
        node_1_2.id = 0

        rand_gen = RandomState(1234)
        with self.assertRaises(AssertionError):
            mpe(spn, rand_gen.rand(10, 3))

        assign_ids(spn)
        node_1_2_2.id += 1

        with self.assertRaises(AssertionError):
            mpe(spn, rand_gen.rand(10, 3))
Exemple #9
0
 def test_histogram_leaf(self):
     data = np.array([1, 1, 2, 3, 3, 3]).reshape(-1, 1)
     ds_context = Context([MetaType.DISCRETE])
     ds_context.add_domains(data)
     hist = create_histogram_leaf(data, ds_context, [0], alpha=False)
     self.assertTrue(
         np.array_equal(mpe(hist, np.array([[np.nan]])), np.array([[3]])),
         "mpe should be 3")
Exemple #10
0
def learn_CNET():
    import numpy as np

    np.random.seed(123)

    train_data = np.random.binomial(1, [0.1, 0.2, 0.3, 0.4], size=(1000, 4))
    print(np.mean(train_data, axis=0))

    from spn.structure.leaves.cltree.CLTree import create_cltree_leaf
    from spn.structure.Base import Context
    from spn.structure.leaves.parametric.Parametric import Bernoulli

    ds_context = Context(
        parametric_types=[Bernoulli, Bernoulli, Bernoulli, Bernoulli
                          ]).add_domains(train_data)

    from spn.algorithms.LearningWrappers import learn_parametric, learn_cnet

    cnet_naive_mle = learn_cnet(train_data,
                                ds_context,
                                cond="naive_mle",
                                min_instances_slice=20,
                                min_features_slice=1)
    cnet_random = learn_cnet(train_data,
                             ds_context,
                             cond="random",
                             min_instances_slice=20,
                             min_features_slice=1)

    from spn.algorithms.Statistics import get_structure_stats
    from spn.io.Text import spn_to_str_equation
    from spn.algorithms.Inference import log_likelihood

    print(get_structure_stats(cnet_naive_mle))
    print(spn_to_str_equation(cnet_naive_mle))
    ll = log_likelihood(cnet_naive_mle, train_data)
    print(np.mean(ll))
    print(get_structure_stats(cnet_random))
    print(spn_to_str_equation(cnet_random))
    ll = log_likelihood(cnet_random, train_data)
    print(np.mean(ll))

    from spn.algorithms.MPE import mpe

    train_data_mpe = train_data.astype(float)
    train_data_mpe[:, 0] = np.nan
    print(mpe(cnet_random, train_data_mpe)[:30])

    ll = log_likelihood(cnet_random, train_data_mpe)
    print(np.mean(ll))
Exemple #11
0
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, ["X_", "y_"])

        # Input validation
        X = check_array(X)

        # Classify
        n_test = X.shape[0]
        y_empty = np.full((n_test, 1), fill_value=np.nan)
        data = np.c_[X, y_empty]
        data_filled = mpe(self._spn, data)
        y_pred = data_filled[:, -1]

        return y_pred
def get_node_description(spn, parent_node, size):
    # parent_node.validate()
    parent_type = type(parent_node).__name__
    node_descriptions = dict()
    node_descriptions['num'] = len(parent_node.children)
    nodes = list()
    for i, node in enumerate(parent_node.children):
        node_spn = Copy(node)
        assign_ids(node_spn)
        node_dir = dict()
        node_dir[
            'weight'] = parent_node.weights[i] if parent_type == 'Sum' else 1
        node_dir['size'] = get_number_of_nodes(node) - 1
        node_dir['num_children'] = len(
            node.children) if not isinstance(node, Leaf) else 0
        node_dir['leaf'] = isinstance(node, Leaf)
        node_dir['type'] = type(node).__name__ + ' Node'
        node_dir['split_features'] = [
            list(c.scope) for c in node.children
        ] if not isinstance(node, Leaf) else node.scope
        node_dir['split_features'].sort(key=lambda x: len(x))
        node_dir['depth'] = get_depth(node)
        node_dir['child_depths'] = [get_depth(c) for c in node.children]

        descriptor = node_dir['type']
        if all((d == 0 for d in node_dir['child_depths'])):
            descriptor = 'shallow ' + descriptor
            node_dir['quick'] = 'shallow'
        elif len([d for d in node_dir['child_depths'] if d == 0]) == 1:
            node_dir['quick'] = 'split_one'
            descriptor += ', which separates one feature'
        else:
            node_dir['quick'] = 'deep'
            descriptor = 'deep ' + descriptor
        descriptor = 'a ' + descriptor
        node_dir['descriptor'] = descriptor
        node_dir['short_descriptor'] = descriptor
        node_dir['representative'] = mpe(node_spn, np.array([[np.nan] * size]))
        nodes.append(node_dir)
    node_descriptions['shallow'] = len(
        [d for d in nodes if d['quick'] == 'shallow'])
    node_descriptions['split_one'] = len(
        [d for d in nodes if d['quick'] == 'split_one'])
    node_descriptions['deep'] = len([d for d in nodes if d['quick'] == 'deep'])
    nodes.sort(key=lambda x: x['weight'])
    nodes.reverse()
    node_descriptions['nodes'] = nodes
    return node_descriptions
Exemple #13
0
    def test_binary(self):

        A = 0.4 * (
            Bernoulli(p=0.8, scope=0) *
            (0.3 *
             (Bernoulli(p=0.7, scope=1) * Bernoulli(p=0.6, scope=2)) + 0.7 *
             (Bernoulli(p=0.5, scope=1) * Bernoulli(p=0.4, scope=2)))
        ) + 0.6 * (Bernoulli(p=0.8, scope=0) * Bernoulli(p=0.7, scope=1) *
                   Bernoulli(p=0.6, scope=2))

        setup_cpp_bridge(A)

        spn_cc_eval_func_bernoulli = get_cpp_function(A)
        num_data = 200000

        data = (np.random.binomial(
            1, 0.3, size=(num_data)).astype("float32").tolist() +
                np.random.binomial(
                    1, 0.3, size=(num_data)).astype("float32").tolist() +
                np.random.binomial(1, 0.3,
                                   size=(num_data)).astype("float32").tolist())

        data = np.array(data).reshape((-1, 3))

        num_nodes = len(get_nodes_by_type(A))

        lls_matrix = np.zeros((num_data, num_nodes))

        # Test for every single lls_maxtrix element.
        _ = log_likelihood(A, data, lls_matrix=lls_matrix)
        c_ll = spn_cc_eval_func_bernoulli(data)
        self.assertTrue(np.allclose(lls_matrix, c_ll))

        ### Testing for MPE.
        spn_cc_mpe_func_bernoulli = get_cpp_mpe_function(A)

        # drop some data.
        for i in range(data.shape[0]):
            drop_data = np.random.binomial(data.shape[1] - 1, 0.5)
            data[i, drop_data] = np.nan

        cc_completion = spn_cc_mpe_func_bernoulli(data)
        py_completion = mpe(A, data)
        self.assertTrue(np.allclose(py_completion, cc_completion))
Exemple #14
0
def cross_validate(data, n_folds, label=2):
    splitind = int(np.floor(len(data) / n_folds))
    splits = []
    i = 0
    while i < n_folds - 1:
        splits.append(data[splitind * i:splitind * (i + 1)])
        i += 1
    splits.append(data[splitind * (n_folds - 1):])
    i = 0
    acc = 0
    while i < n_folds:
        train = splits.copy()
        test_data = train.pop(i)
        train = np.concatenate(train)
        masked = mask_data(test_data, label)
        spn = build_spn(train)
        prediction = mpe(spn, masked)
        acc += accuracy(prediction, test_data, label)
        i += 1
    return acc / n_folds
Exemple #15
0
    def __init__(self, train, test, zscore, features):
        self.train = train
        self.test = test

        np.random.seed(5)

        # Read the CSV into a pandas data frame (df)
        dftr = pd.read_csv(train, delimiter=',')
        train_data = np.array(dftr)
        dfte = pd.read_csv(test, delimiter=',')
        test_data = np.array(dfte)

        train_data = train_data[(np.abs(stats.zscore(train_data)) <
                                 zscore).all(axis=1)]
        #read csv files to arrays and convert types
        XX = train_data[:, 0:(len(train_data[0]))]
        X = np.array(XX, dtype=np.float)

        #Learning on train data
        spn_classification = learn_classifier(
            X,
            Context(parametric_types=features).add_domains(X),
            learn_parametric, 0)

        TT = test_data[:, 1:(len(test_data[0]))]
        R = test_data[:, [0]]

        T = np.array(TT, dtype=np.float)
        nan = np.array([[np.nan]] * len(train))
        T = np.append(nan, T, axis=1)
        test_classification = T

        res = mpe(spn_classification, test_classification)

        r = res[:, [0]]
        r = np.array(r, dtype=int)
        xx = np.append(R, r, axis=1)

        #np.count_nonzero(r == 1)
        def saveresults(self, results):
            np.savetxt(self.results, (xx), "%s,%i", header="ID_code,target")
        # for i, ((tr_block, block_idx), conditional_blocks) in enumerate(datasets):
        #    cspn = cspns[i]
        if i == 0:
            # first time, we only care about the structure to put nans
            mpe_query_blocks = np.zeros_like(tr_block[0:num_mpes, :].reshape(num_mpes, -1))
            sample_query_blocks = np.zeros_like(tr_block[0:num_samples, :].reshape(num_samples, -1))
        else:
            # i+1 time: we set the previous mpe values as evidence
            mpe_query_blocks = np.zeros_like(np.array(tr_block[0:num_mpes, :].reshape(num_mpes, -1)))
            mpe_query_blocks[:, -(mpe_result.shape[1]) :] = mpe_result

            sample_query_blocks = np.zeros_like(np.array(tr_block[0:num_samples, :].reshape(num_samples, -1)))
            sample_query_blocks[:, -(sample_result.shape[1]) :] = sample_result

        cspn_mpe_query = set_sub_block_nans(mpe_query_blocks, inp=block_idx, nans=block_idx[0:conditional_blocks])
        mpe_result = mpe(cspn, cspn_mpe_query)

        mpe_img_blocks = stitch_imgs(
            mpe_result.shape[0], img_size=img_size, num_blocks=num_blocks, blocks={tuple(block_idx): mpe_result}
        )

        cspn_sample_query = set_sub_block_nans(sample_query_blocks, inp=block_idx, nans=block_idx[0:conditional_blocks])
        sample_result = sample_instances(cspn, cspn_sample_query, RandomState(123))

        sample_img_blocks = stitch_imgs(
            sample_result.shape[0], img_size=img_size, num_blocks=num_blocks, blocks={tuple(block_idx): sample_result}
        )

        for j in range(num_mpes):
            mpe_fname = output_path + "mpe_%s_%s.png" % ("-".join(map(str, block_idx)), j)
            save_img(mpe_img_blocks[j], mpe_fname)
Exemple #17
0
# Read the CSV into a pandas data frame (df)
dftr = pd.read_csv(train_path, delimiter=',')
train_data = np.array(dftr)
dfte = pd.read_csv(test_path, delimiter=',')
test_data = np.array(dfte)

#read csv files to arrays and convert types
XX = train_data[:, 1:(len(train_data[0]))]
X = np.array(XX, dtype=np.float)
#TODO: the first column as integer since is the binary class

t = [Categorical]
for i in range(200):
    t.append(Gaussian)
#Learning on train data
spn_classification = learn_classifier(
    X,
    Context(parametric_types=t).add_domains(X), learn_parametric, 0)

TT = test_data[:, 1:(len(test_data[0]))]
R = test_data[:, [0]]

T = np.array(TT, dtype=np.float)
nan = np.array([[np.nan]] * 200000)
T = np.append(T, nan, axis=1)
test_classification = T
#predicting on test data
from spn.algorithms.MPE import mpe

print(mpe(spn_classification, test_classification))
t = [Categorical]
for i in range(200):
	t.append(Gaussian)

#Learning on train data
spn_classification = learn_classifier(X,
                       Context(parametric_types=t).add_domains(X),
                       learn_parametric, 0)


TT = test_data[:,1:(len(test_data[0]))]
R = test_data[:,[0]]

T = np.array(TT, dtype=np.float)
nan = np.array([[np.nan]]*200000)
T = np.append(nan,T,axis=1)
test_classification = T

#predicting on test data
from spn.algorithms.MPE import mpe
#print(mpe(spn_classification, test_classification))		
res = mpe(spn_classification, test_classification)

r = res[:,[0]]
r = np.array(r,dtype=int)
xx = np.append(R,r,axis=1)
np.count_nonzero(r == 1)	
np.savetxt("submission28-spn.csv", (xx),"%s,%i",header="ID_code,target")

Exemple #19
0
def plot_gradients_with_likelihoods(spn,
                                    marg_spn,
                                    gradients,
                                    classes,
                                    plot_title,
                                    plot_pdf,
                                    train_samples,
                                    res=100,
                                    test_sample=None,
                                    test_sample_label=None,
                                    true_train_labels=None):
    """Generates a 2D heightmap of marginal likelihoods combined with influence gradients."""
    if len(classes) > 10:
        raise Exception(
            "Not more than 10 distinct classes allowed for gradient with likelihood "
            "plot, but given %d." % len(classes))

    # Samples
    lin = np.linspace(0.5, 127.5, res)
    y, x = np.meshgrid(lin, lin)
    samples = np.asarray(list(itertools.product(lin, lin)), dtype=np.float32)

    # Predictions and likelihoods
    data = np.column_stack((samples, res**2 * [np.nan]))
    mpe_data = mpe(spn, data)
    predictions = mpe_data[:, -1]
    likelihoods = likelihood(marg_spn, samples).reshape((res, res)) * 100000

    # Determine colormap levels
    l_max = np.max(likelihoods)
    l_min = np.min(likelihoods)
    levels = np.linspace(l_min, l_max, 15, endpoint=True)

    # Get default colormap for default colors
    def_cmap = plt.get_cmap("tab10")

    inf_grad_x = gradients[:, 0]
    inf_grad_y = gradients[:, 1]
    train_samples_x = train_samples[:, 0]
    train_samples_y = train_samples[:, 1]

    test_sample_color = 'black'

    # Initialize plot
    plt.subplots(figsize=(5, 5))
    Q = plt.quiver(train_samples_x, train_samples_y, inf_grad_x, inf_grad_y)
    Q._init()
    scale = Q.scale
    plt.close()
    fig, ax = plt.subplots(figsize=(5, 5))

    # Plot the likelihoods
    for i, c in enumerate(classes):
        cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            "", [(1, 1, 1, 0), def_cmap(i)])
        preds_c = np.array([1 if pred == c else 0
                            for pred in predictions]).reshape((res, res))
        plt.contourf(x, y, likelihoods * preds_c, levels, cmap=cmap)

    # Plot the vector field
    if true_train_labels is None:
        plt.quiver(train_samples_x, train_samples_y, inf_grad_x, inf_grad_y)
    else:
        classes = np.sort(np.unique(true_train_labels))
        if len(classes) > 10:
            raise Exception(
                "Not more than 10 distinct classes allowed for gradient plot.")
        # Get default colors
        prop_cycle = plt.rcParams['axes.prop_cycle']
        colors = prop_cycle.by_key()['color']
        # scale = np.max(list(map(np.linalg.norm, gradients)))*15

        for i, c in enumerate(classes):
            class_c = true_train_labels == c
            inf_grad_x_c = np.extract(class_c, inf_grad_x)
            inf_grad_y_c = np.extract(class_c, inf_grad_y)
            train_samples_x_c = np.extract(class_c, train_samples_x)
            train_samples_y_c = np.extract(class_c, train_samples_y)
            plt.quiver(train_samples_x_c,
                       train_samples_y_c,
                       inf_grad_x_c,
                       inf_grad_y_c,
                       color=colors[i],
                       scale=scale,
                       label='Class %d' % c)
            if test_sample_label == c:
                test_sample_color = colors[i]

    if test_sample is not None:
        ax.scatter(test_sample[0],
                   test_sample[1],
                   c=test_sample_color,
                   s=200,
                   edgecolors='w',
                   linewidth='3',
                   label=r'$z_{\mathrm{test}}$')

    plt.title(plot_title)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.legend()
    ax.set_xlim([0, 128])
    ax.set_ylim([0, 128])

    plot_pdf.savefig(fig)
    plt.show()
Exemple #20
0
def plot_decision_boundaries(spn,
                             marg_spn,
                             classes,
                             plot_pdf,
                             res=100,
                             test_sample=None):
    """Generates a 2D heightmap of marginal likelihoods and draws decision boundaries."""
    if len(classes) > 10:
        raise Exception(
            "Not more than 10 distinct classes allowed for decision "
            "boundary plot, but given %d." % len(classes))

    # Samples
    lin = np.linspace(0.5, 127.5, res)
    y, x = np.meshgrid(lin, lin)
    samples = np.asarray(list(itertools.product(lin, lin)), dtype=np.float32)

    # Predictions and likelihoods
    data = np.column_stack((samples, res**2 * [np.nan]))
    mpe_data = mpe(spn, data)
    predictions = mpe_data[:, -1]
    likelihoods = likelihood(marg_spn, samples).reshape((res, res)) * 100000

    # Determine colormap levels
    l_max = np.max(likelihoods)
    l_min = np.min(likelihoods)
    levels = np.linspace(l_min, l_max, 15, endpoint=True)

    # Get default colormap for default colors
    def_cmap = plt.get_cmap("tab10")

    conts = []

    # Plot the decision boundary and likelihoods
    fig, ax = plt.subplots(figsize=(5.5, 3.5))
    for i, c in enumerate(classes):
        cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            "", [(1, 1, 1, 0), def_cmap(i)])
        preds_c = np.array([1 if pred == c else 0
                            for pred in predictions]).reshape((res, res))
        conts.append(
            plt.contourf(x, y, likelihoods * preds_c, levels, cmap=cmap))
        plt.contour(x, y, preds_c, [0.5], colors="k")

    if test_sample is not None:
        plt.scatter(test_sample[0],
                    test_sample[1],
                    c='gold',
                    s=200,
                    edgecolors='w',
                    linewidth='3',
                    label=r'$z_{\mathrm{test}}$')
        plt.legend()

    plt.axis('equal')
    # plt.title('Decision Boundary\nand Marginal Log-Likelihood')
    ax.get_xaxis().set_ticks([])
    ax.get_yaxis().set_ticks([])
    plt.xlabel('x')
    plt.ylabel('y')
    ax.set_xlim([0, 128])
    ax.set_ylim([0, 128])
    divider = make_axes_locatable(ax)

    for i, c in enumerate(classes):
        cax = divider.append_axes("right", size=0.3, pad=0.1)
        if i == len(classes) - 1:
            fig.colorbar(conts[i], cax=cax, ticks=[])
        else:
            fig.colorbar(conts[i], cax=cax, ticks=[])

    plot_pdf.savefig(fig)
    plt.show()
Exemple #21
0
    Bernoulli,
    Bernoulli,
    Bernoulli,
    Bernoulli,
]).add_domains(train_data)

# Learning a CNet with a naive mle conditioning
cnet_naive_mle = learn_cnet(train_data,
                            ds_context,
                            cond="naive_mle",
                            min_instances_slice=20,
                            min_features_slice=1)

# Learning a CNet with random conditioning
cnet_random = learn_cnet(train_data,
                         ds_context,
                         cond="random",
                         min_instances_slice=20,
                         min_features_slice=1)

ll = log_likelihood(cnet_naive_mle, train_data)
print("Naive mle conditioning", np.mean(ll))

ll = log_likelihood(cnet_random, train_data)
print("Random conditioning", np.mean(ll))

# computing exact MPE
train_data_mpe = train_data.astype(float)
train_data_mpe[:, 0] = np.nan
print(mpe(cnet_random, train_data_mpe))
Exemple #22
0
    num_classes = 10

    # The SPN to test
    output_path = "/home/ml-mrothermel/projects/Interpreting-SPNs/output/spns"
    file_name = "mnist_spn_9.pckl"
    spn = load_object_from(output_path + "/" + file_name)

    # Print SPN node statistics
    print(get_structure_stats(spn))

    # ---- Model Performance Evaluation ----

    # Predict train labels
    train_performance_data = np.column_stack(
        (train_images, [np.nan] * num_train_samples))
    train_predictions = mpe(spn, train_performance_data)
    predicted_train_labels = train_predictions[:, 784]

    # Accuracy on train set
    correct_answers = train_labels == predicted_train_labels
    acc = np.count_nonzero(correct_answers) / num_train_samples

    print('\033[1mTrain set performance:\033[0m')
    print("Train sample count:", num_train_samples)
    print("Train set accuracy:", acc * 100, "%")

    print("Prediction distribution:")
    for i in range(10):
        print("    # of occurrence of", i, "in train predictions:",
              np.count_nonzero(predicted_train_labels == i))
Exemple #23
0
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Prepare data
iris = datasets.load_iris()
X_train, X_test, y_train, y_test= train_test_split(iris.data, iris.target, test_size = 0.4, random_state = 42)
train_data_with_labels = np.insert(X_train, obj=X_train.shape[1], values=y_train, axis=1)
test_data_with_labels = np.insert(X_test, obj=X_test.shape[1], values=y_test, axis=1)

# Learn SPN
context = Context(parametric_types=[Gaussian, Gaussian, Gaussian, Gaussian, Categorical]).add_domains(train_data_with_labels)
spn_classification = learn_classifier(train_data_with_labels, context, learn_parametric, 4)

# Plot SPN
plot_spn(spn_classification, 'images/iris_spn.png')

# Predict
true_values = np.array(test_data_with_labels[:,-1])
items_to_predict = test_data_with_labels
items_to_predict[:, 4] = np.nan
predicted_values = mpe(spn_classification, test_data_with_labels)
predicted_labels = predicted_values[:, 4]

acc = accuracy_score(true_values, predicted_labels)
print(acc)


Exemple #24
0
print(spn.sigma)

spn.scope = dummyscope

#print(mn.pdf(spn.mean, spn.mean, spn.cov))

print(spn.scope)

dummydata = np.asarray([[np.nan, 2.0, np.nan, np.nan],
                        [np.nan, np.nan, np.nan, 64.3]])

print(np.shape(dummydata))
print(np.shape(np.asarray(spn.mean)))
print(np.shape(np.asarray(spn.sigma)))

print(mpe(spn, dummydata))

print(spn_to_str_equation(spn))

recreate = (str_to_spn(spn_to_str_equation(spn)))

print(spn_to_str_equation(recreate))

print(recreate.mean)
print(recreate.sigma)

arr = np.load('./test.npy')
teststruct = prometheus(arr, 1, itermult=0, leafsize=4, maxsize=6)

testspn = str_to_spn(teststruct)
Exemple #25
0
if __name__ == '__main__':  # needed to circumvent multiprocessing RuntimeError under Windows 10
    import numpy as np
    import tensorflow as tf
    import matplotlib.backends.backend_pdf

    from spn.algorithms.MPE import mpe  # most probable explanation (MPE)
    from spn.gpu.TensorFlow import spn_to_tf_graph  # conversion into TensorFlow representation
    from src.help_functions import *
    from spn.algorithms.MPE import mpe  # most probable explanation (MPE)

    # ==== Script for MPE generation of a sum-product network (SPN) on MNIST ====

    spn_name = "mnist_spn_2"
    plot_name = "%s_mpe" % spn_name
    output_path = "output"
    plot_path = output_path + "/plots/mnist/" + plot_name

    create_dir(plot_path, force_overwrite=True)
    pdf = matplotlib.backends.backend_pdf.PdfPages(plot_path + "/" + plot_name + ".pdf")

    res = 28
    spn = load_object_from(output_path + "/spns/" + spn_name + ".pckl")

    for label in range(0, 10):
        evidence = [np.append(res ** 2 * [np.nan], [label])]
        mpe_values = mpe(spn, evidence)
        plot_digit(mpe_values[0][:-1], res, "Label: \"%d\"" % label, pdf)

    pdf.close()
Exemple #26
0
if __name__ == '__main__':
    train_input, train_labels, test_input, test_labels = get_categorical_data('yeast')

    print(train_input.shape)
    print(train_labels.shape)
    print(test_input.shape)
    print(test_labels.shape)

    num_labels = train_labels.shape[1]

    ds_context = Context(parametric_types=[Conditional_Bernoulli] * num_labels)
    ds_context.add_domains(train_labels)

    train_data = np.concatenate((train_labels, train_input), axis=1)

    cspn = learn_conditional(train_data, ds_context, scope=list(range(num_labels)),rows='tsne',
                             min_instances_slice=500, threshold=0.5, memory=memory)

    test_data = np.zeros_like(test_labels, dtype=np.float32)
    test_data[:] = np.nan
    test_data = np.concatenate((test_data, test_input), axis=1)
    pred_test_labels = mpe(cspn, test_data)[:, 0:num_labels]

    # compare with
    # https://papers.nips.cc/paper/1964-a-kernel-method-for-multi-labelled-classification.pdf
    binary_pred_labels = np.round(pred_test_labels).astype(int)
    binary_pred_labels[binary_pred_labels < 0] = 0
    print("hamming_loss", hamming_loss(test_labels, binary_pred_labels))
    print("zero_one_loss", zero_one_loss(test_labels, binary_pred_labels))
    print("precision_score", precision_score(test_labels, binary_pred_labels, average='micro'))
Exemple #27
0
                                   values=y_train,
                                   axis=1)
test_data_with_labels = np.insert(X_test,
                                  obj=X_test.shape[1],
                                  values=y_test,
                                  axis=1)

# Learn SPN
parametric_types = [
    Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian, Gaussian,
    Gaussian, Gaussian
]
target_position = 8
context = Context(
    parametric_types=parametric_types).add_domains(train_data_with_labels)
spn = learn_classifier(train_data_with_labels, context, learn_parametric,
                       target_position)

# Plot SPN
# plot_spn(spn, 'images/california_housing_spn.png')

# Predict
true_values = np.array(test_data_with_labels[:, -1])
items_to_predict = test_data_with_labels
items_to_predict[:, target_position] = np.nan
predicted_values = mpe(spn, test_data_with_labels)
predicted_labels = predicted_values[:, target_position]

error = mean_squared_error(true_values, predicted_labels)
print(f'MSE test: {error}')
Exemple #28
0
def evaluate_spn_performance(spn,
                             train_samples,
                             train_labels,
                             test_samples,
                             test_labels,
                             label_idx,
                             stats_file=None):
    """Evaluates the performance of a given SPN by means of given train and test data.
    Returns a boolean vector containing an entry for the correctness of each single test prediction.

    :param spn: the Sum-Product-Network
    :param train_samples: list of training samples (without labels) of shape (X, Y)
    :param train_labels: list of train labels of shape (X, 1)
    :param test_samples: list of test samples (without labels) of shape (Z, Y)
    :param test_labels: list of test labels of shape (Z, 1)
    :param label_idx: position of the label when fed into the SPN
    :param stats_file: optional output file to save evaluation results
    :return: boolean vector of length Z where entry i is True iff test label i was correctly predicted
    :return: vector of predicted test labels
    """

    num_train_samples = len(train_samples)
    num_test_samples = len(test_samples)

    # Predict train labels
    train_performance_data = np.column_stack(
        (train_samples, [np.nan] * num_train_samples))
    train_predictions = mpe(spn, train_performance_data)
    predicted_train_labels = train_predictions[:, label_idx]

    # Accuracy on train set
    correct_train_answers = np.reshape(train_labels,
                                       -1) == predicted_train_labels
    acc = np.count_nonzero(correct_train_answers) / num_train_samples

    train_text = "\n\nTrain set performance:" \
                 "\nTrain sample count: %d" \
                 "\nTrain set accuracy: %.2f %%" % \
                 (num_train_samples, acc * 100)
    print(train_text, end='')
    if stats_file is not None:
        stats_file.write(train_text)

    # Predict test labels
    test_performance_data = np.column_stack(
        (test_samples, [np.nan] * num_test_samples))
    test_predictions = mpe(spn, test_performance_data)
    predicted_test_labels = test_predictions[:, label_idx]

    # Accuracy on test set
    correct_test_answers = np.reshape(test_labels, -1) == predicted_test_labels
    acc = np.count_nonzero(correct_test_answers) / num_test_samples

    test_text = "\n\nTest set performance:" \
                "\nTest sample count: %d" \
                "\nTest set accuracy: %.2f %%\n" % \
                (num_test_samples, acc * 100)
    print(test_text)
    if stats_file is not None:
        stats_file.write(test_text)

    return (predicted_train_labels,
            correct_train_answers), (predicted_test_labels,
                                     correct_test_answers)
        spn.children.append(branch)
        spn.weights.append(count / data.shape[0])

    spn.scope.extend(branch.scope)

    assign_ids(spn)

    print(spn)

    mpe_test = data[[0, 1, 2], :].astype(float)
    mpe_test[:, 0] = np.nan

    from spn.algorithms.MPE import mpe
    add_conditional_mpe_support()

    print(mpe(spn, mpe_test)[:, 0])

    # class TestBase(unittest.TestCase):
#
#     def test_bfs(self):
#         add_parametric_inference_support()
#
#         np.random.seed(42)
#         data = np.random.randint(low=0, high=3, size=600).reshape(-1, 3)
#
#         # print(data)
#
#         ds_context = Context(meta_types=[MetaType.DISCRETE, MetaType.DISCRETE, MetaType.DISCRETE])
#         ds_context.add_domains(data)
#         ds_context.parametric_type = [Poisson, Poisson, Categorical]
#
sns.scatterplot(train_data[:, 0], train_data[:, 1], hue=train_data[:, 2])

# %%
# We can learn an SPN from the training data:

spn_classification = learn_classifier(
    train_data,
    Context(parametric_types=[Gaussian, Gaussian, Categorical]).add_domains(
        train_data), learn_parametric, 2)

from spn.io.Graphics import draw_spn

draw_spn(spn_classification)

# %%
# Now, imagine we want to classify two instances, one located at :math:`(3,4)`
# and another one at :math:`(12,8)`. To do that, we first create an array with
# two rows and 3 columns. We set the last column to ``np.nan`` to indicate
# that we don't know the labels. And we set the rest of the values in the 2D
# array accordingly.

test_data = np.array([3.0, 4.0, np.nan, 12.0, 18.0, np.nan]).reshape(-1, 3)

# %%
# We can do classification via approximate most probable explanation (MPE).
# Here, we expect the first instance to be labeled as 0 and the second one as 1.

from spn.algorithms.MPE import mpe

print(mpe(spn_classification, test_data))