Beispiel #1
0
    def test_leaf_bernoulli_bootstrap(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 100),
                np.random.multivariate_normal([1, 1], np.eye(2), 100),
            ),
            axis=0,
        )
        y = np.array([1] * 100 + [0] * 100).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l = likelihood(leaf, data)
        neg_data = np.concatenate([1 - y, x], axis=1)
        lneg = likelihood(leaf, neg_data)

        np.testing.assert_array_almost_equal(l + lneg, 1.0)

        self.assertTrue(np.all(l >= 0.5))
        self.assertTrue(np.all(lneg < 0.5))
Beispiel #2
0
def meu(node, input_data, node_top_down_meu=_node_top_down_meu, node_bottom_up_meu=_node_bottom_up_meu, in_place=False):
    valid, err = is_valid(node)
    assert valid, err
    if in_place:
        data = input_data
    else:
        data = np.array(input_data)

    nodes = get_nodes_by_type(node)

    lls_per_node = np.zeros((data.shape[0], len(nodes)))

    # one pass bottom up evaluating the likelihoods
    # log_likelihood(node, data, dtype=data.dtype, node_log_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node)
    likelihood(node, data, dtype=data.dtype, node_likelihood=node_bottom_up_meu, lls_matrix=lls_per_node)

    meu_val = lls_per_node[:, 0]

    instance_ids = np.arange(data.shape[0])

    # one pass top down to decide on the max branch until it reaches a leaf; returns  all_result, decisions at each max node for each instance.
    all_result, all_decisions = eval_spn_top_down_meu(node, node_top_down_meu, parent_result=instance_ids, data=data,
                                                      lls_per_node=lls_per_node)

    decisions = merge_rows_for_decisions(all_decisions)

    return meu_val, decisions
Beispiel #3
0
    def test_leaf_no_variance_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([1] * 1000).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data[:, 0] = 2
        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data3 = np.array(data)
        data3[:, 0] = 3
        leaf = create_conditional_leaf(data3, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertAlmostEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.241970724519143)
Beispiel #4
0
    def test_leaf_categorical(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([20, 20], np.eye(2), 500),
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Categorical])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x))
        l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x))
        l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x))

        np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0)

        self.assertTrue(np.all(l0[1000:1500] > 0.85))
        self.assertTrue(np.all(l0[0:1000] < 0.15))

        self.assertTrue(np.all(l1[500:1000] > 0.85))
        self.assertTrue(np.all(l1[0:500] < 0.15))
        self.assertTrue(np.all(l1[1000:1500] < 0.15))

        self.assertTrue(np.all(l2[0:500] > 0.85))
        self.assertTrue(np.all(l2[500:15000] < 0.15))
Beispiel #5
0
def meu(root,
        input_data,
        node_bottom_up_meu=_node_bottom_up_meu,
        in_place=False):
    # valid, err = is_valid(node)
    # assert valid, err
    if in_place:
        data = input_data
    else:
        data = np.copy(input_data)
    nodes = get_nodes_by_type(root)
    utility_scope = set()
    for node in nodes:
        if type(node) is Utility:
            utility_scope.add(node.scope[0])
    assert np.all(np.isnan(data[:, list(utility_scope)])
                  ), "Please specify all utility values as np.nan"
    likelihood_per_node = np.zeros((data.shape[0], len(nodes)))
    meu_per_node = np.zeros((data.shape[0], len(nodes)))
    meu_per_node.fill(np.nan)
    # one pass bottom up evaluating the likelihoods
    likelihood(root, data, dtype=data.dtype, lls_matrix=likelihood_per_node)
    eval_spmn_bottom_up_meu(root,
                            _node_bottom_up_meu,
                            meu_per_node=meu_per_node,
                            data=data,
                            lls_per_node=likelihood_per_node)
    result = meu_per_node[:, root.id]
    return result
def Expectation(spn, feature_id, ranges, node_expectation, node_likelihood):
    def leaf_expectation(node, data, dtype=np.float64, **kwargs):
        if node.scope[0] == feature_id:
            t_node = type(node)
            if t_node in node_expectation:
                exps = np.zeros((data.shape[0], 1), dtype=dtype)
                exps[:] = node_expectation[t_node](node)
                return exps
            else:
                raise Exception("Node type unknown for expectation: " +
                                str(t_node))
        else:
            t_node = type(node)
            if t_node in node_likelihood:
                return node_likelihood[t_node](node,
                                               ranges,
                                               node_likelihood=node_likelihood)

    node_expectations = {
        type(leaf): leaf_expectation
        for leaf in get_nodes_by_type(spn, Leaf)
    }
    node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood})

    expectation = likelihood(spn, ranges, node_likelihood=node_expectations)
    expectation = expectation / likelihood(
        spn, ranges, node_likelihood=node_likelihood)

    return expectation
Beispiel #7
0
    def test_ll_matrix(self):
        add_node_likelihood(Leaf, sum_and_multiplier_ll)

        node_1_1_1_1 = leaf(2, 1)
        node_1_1_1_2 = leaf(2, 2)
        node_1_1_1 = 0.7 * node_1_1_1_1 + 0.3 * node_1_1_1_2
        node_1_1_2 = leaf([0, 1], 3)
        node_1_1 = node_1_1_1 * node_1_1_2
        node_1_2_1_1_1 = leaf(0, 5)
        node_1_2_1_1_2 = leaf(1, 4)
        node_1_2_1_1 = node_1_2_1_1_1 * node_1_2_1_1_2
        node_1_2_1_2 = leaf([0, 1], 6)
        node_1_2_1 = 0.1 * node_1_2_1_1 + 0.9 * node_1_2_1_2
        node_1_2_2 = leaf(2, 3)
        node_1_2 = node_1_2_1 * node_1_2_2
        spn = 0.4 * node_1_1 + 0.6 * node_1_2

        assign_ids(spn)

        max_id = max([n.id for n in get_nodes_by_type(spn)])

        data = np.random.rand(10, 10)

        node_1_1_1_1_r = data[:, 2] * 1
        node_1_1_1_2_r = data[:, 2] * 2
        node_1_1_1_r = 0.7 * node_1_1_1_1_r + 0.3 * node_1_1_1_2_r
        node_1_1_2_r = 3 * (data[:, 0] + data[:, 1])
        node_1_1_r = node_1_1_1_r * node_1_1_2_r
        node_1_2_1_1_1_r = data[:, 0] * 5
        node_1_2_1_1_2_r = data[:, 1] * 4
        node_1_2_1_1_r = node_1_2_1_1_1_r * node_1_2_1_1_2_r
        node_1_2_1_2_r = 6 * (data[:, 0] + data[:, 1])
        node_1_2_1_r = 0.1 * node_1_2_1_1_r + 0.9 * node_1_2_1_2_r
        node_1_2_2_r = data[:, 2] * 3
        node_1_2_r = node_1_2_1_r * node_1_2_2_r
        spn_r = 0.4 * node_1_1_r + 0.6 * node_1_2_r

        self.assert_correct(spn, data, spn_r)

        lls = np.zeros((data.shape[0], max_id + 1))
        likelihood(spn, data, lls_matrix=lls)
        llls = np.zeros((data.shape[0], max_id + 1))
        log_likelihood(spn, data, lls_matrix=llls)

        self.assertTrue(np.alltrue(np.isclose(lls, np.exp(llls))))

        self.assertTrue(np.alltrue(np.isclose(spn_r, lls[:, spn.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_r, lls[:, node_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_2_r, lls[:, node_1_2_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_r, lls[:, node_1_2_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_2_r, lls[:, node_1_2_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_r, lls[:, node_1_2_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_2_r, lls[:, node_1_2_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_2_1_1_1_r, lls[:, node_1_2_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_r, lls[:, node_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_2_r, lls[:, node_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_r, lls[:, node_1_1_1.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_2_r, lls[:, node_1_1_1_2.id])))
        self.assertTrue(np.alltrue(np.isclose(node_1_1_1_1_r, lls[:, node_1_1_1_1.id])))
def Expectation(spn,
                feature_scope,
                evidence_scope,
                evidence,
                node_expectation=_node_expectation):
    """Compute the Expectation:

        E[X_feature_scope | X_evidence_scope] given the spn and the evidence data

    Keyword arguments:
    spn -- the spn to compute the probabilities from
    feature_scope -- set() of integers, the scope of the features to get the expectation from
    evidence_scope -- set() of integers, the scope of the evidence features
    evidence -- numpy 2d array of the evidence data
    """

    if evidence_scope is None:
        evidence_scope = set()

    assert not (len(evidence_scope) > 0 and evidence is None)

    assert len(feature_scope.intersection(evidence_scope)) == 0

    marg_spn = marginalize(spn, keep=feature_scope | evidence_scope)

    def leaf_expectation(node, data, dtype=np.float64, **kwargs):
        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_expectation:
                exps = np.zeros((data.shape[0], 1), dtype=dtype)
                exps[:] = node_expectation[t_node](node)
                return exps
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence)

    node_expectations = {
        type(leaf): leaf_expectation
        for leaf in get_nodes_by_type(marg_spn, Leaf)
    }
    node_expectations.update({Sum: sum_likelihood, Product: prod_likelihood})

    if evidence is None:
        #fake_evidence is not used
        fake_evidence = np.zeros((1, len(spn.scope))).reshape(1, -1)
        expectation = likelihood(marg_spn,
                                 fake_evidence,
                                 node_likelihood=node_expectations)
        return expectation

    #if we have evidence, we want to compute the conditional expectation
    expectation = likelihood(marg_spn,
                             evidence,
                             node_likelihood=node_expectations)
    expectation = expectation / likelihood(
        marginalize(marg_spn, keep=evidence_scope), evidence)

    return expectation
Beispiel #9
0
def get_mutual_information_correlation(spn, context):
    categoricals = get_categoricals(spn, context)
    num_features = len(spn.scope)

    correlation_matrix = []

    for x in range(num_features):
        if x not in categoricals:
            correlation_matrix.append(np.full((num_features), np.nan))
        else:
            x_correlation = [np.nan] * num_features
            x_range = context.get_domains_by_scope([x])[0]
            spn_x = marginalize(spn, [x])
            query_x = np.array([[np.nan] * num_features] * len(x_range))
            query_x[:, x] = x_range
            for y in categoricals:
                if x == y:
                    x_correlation[x] = 1
                    continue
                spn_y = marginalize(spn, [y])
                spn_xy = marginalize(spn, [x, y])
                y_range = context.get_domains_by_scope([y])[0]
                query_y = np.array([[np.nan] * num_features] * len(y_range))
                query_y[:, y] = y_range
                query_xy = np.array([[np.nan] * num_features] *
                                    (len(x_range + 1) * (len(y_range + 1))))
                xy = np.mgrid[x_range[0]:x_range[-1]:len(x_range) * 1j,
                              y_range[0]:y_range[-1]:len(y_range) * 1j]
                xy = xy.reshape(2, -1)
                query_xy[:, x] = xy[0, :]
                query_xy[:, y] = xy[1, :]
                results_xy = likelihood(spn_xy, query_xy)
                results_xy = results_xy.reshape(len(x_range), len(y_range))
                results_x = likelihood(spn_x, query_x)
                results_y = likelihood(spn_y, query_y)
                xx, yy = np.mgrid[0:len(x_range) - 1:len(x_range) * 1j,
                                  0:len(y_range) - 1:len(y_range) * 1j]
                xx = xx.astype(int)
                yy = yy.astype(int)

                grid_results_x = results_x[xx]
                grid_results_y = results_y[yy]
                grid_results_xy = results_xy

                log = np.log(
                    grid_results_xy /
                    (np.multiply(grid_results_x, grid_results_y).squeeze()))
                prod = np.prod(np.array([log, grid_results_xy]), axis=0)

                log_x = np.log(results_x)
                log_y = np.log(results_y)

                entropy_x = -1 * np.sum(np.multiply(log_x, results_x))
                entropy_y = -1 * np.sum(np.multiply(log_y, results_y))

                x_correlation[y] = (np.sum(prod) /
                                    np.sqrt(entropy_x * entropy_y))
            correlation_matrix.append(np.array(x_correlation))
    return np.array(correlation_matrix)
Beispiel #10
0
 def assert_correct(self, spn, data, result):
     l = likelihood(spn, data)
     self.assertEqual(l.shape[0], data.shape[0])
     self.assertEqual(l.shape[1], 1)
     self.assertTrue(np.alltrue(np.isclose(result.reshape(-1, 1), l)))
     self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(spn, data))))
     self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(spn, data, debug=True))))
     self.assertTrue(np.alltrue(np.isclose(l, likelihood(spn, data, debug=True))))
Beispiel #11
0
    def test_type(self):
        add_node_likelihood(Leaf, identity_ll)

        # test that we get basic computations right
        spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * (Leaf(scope=0) * Leaf(scope=1))
        data = np.random.rand(10, 4)
        l = likelihood(spn, data, dtype=np.float32)
        self.assertEqual(l.dtype, np.float32)

        l = likelihood(spn, data, dtype=np.float128)
        self.assertEqual(l.dtype, np.float128)
Beispiel #12
0
def conditional_probability(spn, y_index, x_instance):
    """
    calculation of conditional probability P(Y|X)
    :param spn:
    :param y_index:      index of y in vector x_instance
    :param x_instance:   vector, with value of requested RVs(and y) and NaN of non-requested RVs
    :return:
    vector x_instance includes the value at y th index
    """
    x_without_y = np.copy(x_instance)
    x_without_y[0][y_index] = np.nan
    # P(Y|X)
    p_y_given_x = likelihood(spn, x_instance) / likelihood(spn, x_without_y)

    return p_y_given_x
Beispiel #13
0
    def test_piecewise_linear_multiplied(self):
        piecewise_spn = (
            0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) +
            0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])) * (
                0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[1]) +
                0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[1]))

        evidence = np.array([
            [-2, -2],
            [-1.5, -1.5],
            [-1, -1],
            [-0.5, -0.5],
            [0, 0],
            [0.5, 0.5],
            [1, 1],
            [1.5, 1.5],
            [2, 2],
            [3, 3],
            [-3, -3],
            [0, 100],
        ])
        results = likelihood(piecewise_spn, evidence)
        expected_results = np.array([[0], [0.25], [0.5], [0.25], [0], [0.25],
                                     [0.5], [0.25], [0], [0], [0], [0]])**2
        self.assertTrue(np.all(np.equal(results, expected_results)))
Beispiel #14
0
    def _density(self, x):
        # map all inputs from categorical to numeric values
        for i in range(len(x)):
            if self.names[i] in self._categorical_variables:
                inverse_mapping = self._categorical_variables[self.names[i]]['name_to_int']
                x[i] = inverse_mapping[x[i]]

            # if variable has integer representation round
            elif self.data.dtypes[i] == int:
                x[i] = round(x[i])

        # Copy the current state of the network
        input = self._density_mask.copy()
        counter = 0
        for i in range(input.shape[1]):
            # if variable on index i is not conditioned or marginalized
            # set input i to the value in the input array indicated by counter
            if input[0, i] == 2:
                input[0, i] = x[counter]
                counter += 1
            # if the variable i is conditioned set the input value to the condition
            elif input[0, i] == 1:
                input[0, i] = self._condition[0, i]
            # else the value of input at index i is np.nan by initialization and indicates a marginalized variable

        res = likelihood(self._spn, input)
        return res[0][0]
Beispiel #15
0
    def assert_correct_node_sampling_continuous(self, node, samples, plot):
        node.scope = [0]
        rand_gen = np.random.RandomState(1234)
        samples_gen = sample_parametric_node(node, 1000000, rand_gen)

        if plot:
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(1, 1)

            x = np.linspace(np.min(samples), np.max(samples), 1000)
            ax.plot(x, likelihood(node, x.reshape(-1, 1)), 'r-', lw=2, alpha=0.6,
                    label=node.__class__.__name__ + ' pdf')
            ax.hist(samples, normed=True, histtype='stepfilled', alpha=0.7, bins=1000)
            ax.legend(loc='best', frameon=False)
            plt.show()

        scipy_obj, params = get_scipy_obj_params(node)
        # H_0 dist are identical
        test_outside_samples = kstest(samples, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_outside_samples.pvalue, 0.05)

        test_generated_samples = kstest(samples_gen, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
Beispiel #16
0
        def execute_python():
            start = time.perf_counter()
            py_ll = likelihood(spn, test_data)
            end = time.perf_counter()
            elapsed = (end - start)

            return py_ll, elapsed * 1000000000
Beispiel #17
0
def plot_density(spn, data):
    import matplotlib.pyplot as plt
    import numpy as np

    x_max = data[:, 0].max()
    x_min = data[:, 0].min()
    y_max = data[:, 1].max()
    y_min = data[:, 1].min()

    nbinsx = int(x_max - x_min) / 1
    nbinsy = int(y_max - y_min) / 1
    xi, yi = np.mgrid[x_min:x_max:nbinsx * 1j, y_min:y_max:nbinsy * 1j]

    spn_input = np.vstack([xi.flatten(), yi.flatten()]).T

    marg_spn = marginalize(spn, set([0, 1]))

    zill = likelihood(marg_spn, spn_input)

    z = zill.reshape(xi.shape)

    # Make the plot
    # plt.pcolormesh(xi, yi, z)

    plt.imshow(z + 1,
               extent=(x_min, x_max, y_min, y_max),
               cmap=cm.hot,
               norm=PowerNorm(gamma=1. / 5.))
    # plt.pcolormesh(xi, yi, z)
    plt.colorbar()
    plt.show()
Beispiel #18
0
    def test_leaf_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(
            np.random.normal(20, 2, 5000).tolist() +
            np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        self.assertFalse(np.any(np.isnan(likelihood(leaf, data))))

        self.assertGreater(get_ll(leaf, [20, 10, 10]),
                           get_ll(leaf, [20, 1, 1]))
        self.assertGreater(get_ll(leaf, [60, 1, 1]),
                           get_ll(leaf, [60, 10, 10]))
        self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652)
        self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
Beispiel #19
0
    def test_sum_one_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test that we get basic computations right
        spn = 0.5 * Leaf(scope=0) + 0.5 * Leaf(scope=0)
        data = np.random.rand(10, 1)
        self.assert_correct(spn, data, data)

        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 1)
        self.assert_correct(spn, data, data)

        # test that we can pass whatever dataset, and the scopes are being respected
        # this is important for inner nodes
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 3)
        r = 0.1 * data[:, 0] + 0.9 * data[:, 0]
        r = r.reshape(-1, 1)
        self.assert_correct(spn, data, r)

        # test that it fails if the weights are not normalized
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        spn.weights[1] = 0.2
        data = np.random.rand(10, 3)
        with self.assertRaises(AssertionError):
            l = likelihood(spn, data)
        with self.assertRaises(AssertionError):
            log_likelihood(spn, data)

        # test the log space
        spn = 0.1 * Leaf(scope=0) + 0.9 * Leaf(scope=0)
        data = np.random.rand(10, 3)
        r = 0.1 * data[:, 0] + 0.9 * data[:, 0]
        r = r.reshape(-1, 1)
        self.assert_correct(spn, data, r)
Beispiel #20
0
    def test_piecewise_linear_constant(self):
        piecewise_spn = ((0.5 * PiecewiseLinear([1, 2], [1, 1], [], scope=[0]) +
                          0.5 * PiecewiseLinear([-2, -1], [1, 1], [], scope=[0])))

        evidence = np.array([[-3000]])
        results = likelihood(piecewise_spn, evidence)
        expected_results = np.array([[1]])
        self.assertTrue(np.all(np.equal(results, expected_results)))
Beispiel #21
0
    def test_piecewise_linear_simple(self):
        piecewise_spn = 0.5 * PiecewiseLinear([0, 1, 2], [0, 1, 0], [], scope=[0]) + \
                        0.5 * PiecewiseLinear([-2, -1, 0], [0, 1, 0], [], scope=[0])

        evidence = np.array([[-2], [-1.5], [-1], [-0.5], [0], [0.5], [1], [1.5], [2], [3], [-3]])
        results = likelihood(piecewise_spn, evidence)
        expected_results = np.array([[0], [0.25], [0.5], [0.25], [0], [0.25], [0.5], [0.25], [0], [0], [0]])
        self.assertTrue(np.all(np.equal(results, expected_results)))
Beispiel #22
0
    def test_sum_multiple_dimension(self):
        add_node_likelihood(Leaf, identity_ll)

        # test basic computations in multiple dimensions
        spn = 0.5 * Leaf(scope=[0, 1]) + 0.5 * Leaf(scope=[0, 1])
        data = np.random.rand(10, 2)
        l = likelihood(spn, data)
        self.assert_correct(spn, data, data[:, 0] * data[:, 1])
Beispiel #23
0
def plot_likelihoods(spn, classes, plot_pdf, res=100, test_sample=None):
    """Generates for each class a 2D heightmap of likelihoods."""
    if len(classes) > 10:
        raise Exception(
            "Not more than 10 distinct classes allowed for likelihood "
            "plot, but given %d." % len(classes))

    # Samples
    lin = np.linspace(0.5, 127.5, res)
    xgrid, ygrid = np.meshgrid(lin, lin)
    samples = np.asarray(list(itertools.product(lin, lin)), dtype=np.float32)

    # Compute likelihood values for each sample regarding each class
    n = res**2
    likelihoods = np.empty((0, n))
    for c in classes:
        data = np.column_stack((samples, c * np.ones(n)))
        likelihoods_of_c = likelihood(spn, data).reshape((1, n)) * 100000
        likelihoods = np.append(likelihoods, likelihoods_of_c, axis=0)

    # Determine colormap levels
    l_max = np.max(likelihoods)
    l_min = np.min(likelihoods)
    levels = np.linspace(l_min, l_max, 15, endpoint=True)

    # Get default colormap for default colors
    def_cmap = plt.get_cmap("tab10")

    # Plot the likelihoods
    for i, c in enumerate(classes):
        fig, ax = plt.subplots(figsize=(4, 3.5))
        zgrid = griddata(samples, likelihoods[i], (xgrid, ygrid))
        cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            "", [(1, 1, 1, 0), def_cmap(i)])
        cont = plt.contourf(xgrid, ygrid, zgrid, levels, cmap=cmap)
        if test_sample is not None:
            ax.scatter(test_sample[0],
                       test_sample[1],
                       c='gold',
                       s=200,
                       edgecolors='w',
                       linewidth='3',
                       label=r'$z_{\mathrm{test}}$')
            plt.legend()
        # plt.title('Learned Probability Distribution\nfor Class %d' % c)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.axis('equal')
        ax.set_xlim([0, 128])
        ax.set_ylim([0, 128])
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size=0.3, pad=0.1)
        fig.colorbar(cont, cax=cax, ticks=[])
        plot_pdf.savefig(fig)
        plt.show()
Beispiel #24
0
    def assert_correct(self, node, x, result):
        self.tested.add(type(node))

        data = np.array([x], dtype=np.float).reshape(-1, 1)
        node.scope = [0]
        l = likelihood(node, data)
        self.assertAlmostEqual(result, l[0, 0], 5)
        self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))

        data = np.random.rand(10, 10)
        data[:, 5] = x
        node.scope = [5]
        l = likelihood(node, data)
        self.assertEqual(l.shape[0], data.shape[0])
        self.assertEqual(l.shape[1], 1)
        self.assertTrue(np.isclose(np.var(l), 0))
        self.assertTrue(np.alltrue(np.isclose(result, l[0, 0])))
        self.assertTrue(np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))
Beispiel #25
0
    def assert_correct_node_sampling_discrete(self, node, samples, plot):
        node.scope = [0]
        rand_gen = np.random.RandomState(1234)
        samples_gen = sample_parametric_node(node, 1000000, rand_gen)

        fvals, fobs = np.unique(samples, return_counts=True)

        # H_0 data comes from same dist
        test_outside_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0])
        # reject H_0 (data comes from dist) if p < 0.05
        # we pass the test if they come from the dist, pass if p >= 0.05
        self.assertGreaterEqual(test_outside_samples.pvalue, 0.05)

        fvals, fobs = np.unique(samples_gen, return_counts=True)

        test_generated_samples = chisquare(fobs, (likelihood(node, fvals.reshape(-1, 1)) * samples.shape[0])[:, 0])
        # reject H_0 (data comes from dist) if p < 0.05
        # we pass the test if they come from the dist, pass if p >= 0.05
        self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
Beispiel #26
0
def approximate_density(dist_node, X, bins=100):
    if dist_node.type.meta_type == MetaType.DISCRETE:
        x = np.array(
            [i for i in range(int(np.nanmin(X)),
                              int(np.nanmax(X)) + 1)])
    else:
        x = np.linspace(np.nanmin(X), np.nanmax(X), bins)
    x = x.reshape(-1, 1)
    y = likelihood(dist_node, x)
    return x[:, 0], y[:, 0]
Beispiel #27
0
    def leaf_expectation(node, data, dtype=np.float64, **kwargs):
        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_expectation:
                exps = np.zeros((data.shape[0], 1), dtype=dtype)
                exps[:] = node_expectation[t_node](node)
                return exps
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence)
def expectation_recursive_batch(node, feature_scope, inverted_features,
                                relevant_scope, evidence, node_expectation,
                                node_likelihoods):
    if isinstance(node, Product):

        llchildren = np.concatenate([
            expectation_recursive_batch(
                child, feature_scope, inverted_features, relevant_scope,
                evidence, node_expectation, node_likelihoods)
            for child in node.children
            if len(relevant_scope.intersection(child.scope)) > 0
        ],
                                    axis=1)
        return np.nanprod(llchildren, axis=1).reshape(-1, 1)

    elif isinstance(node, Sum):
        if len(relevant_scope.intersection(node.scope)) == 0:
            return np.full((evidence.shape[0], 1), np.nan)

        llchildren = np.concatenate([
            expectation_recursive_batch(
                child, feature_scope, inverted_features, relevant_scope,
                evidence, node_expectation, node_likelihoods)
            for child in node.children
        ],
                                    axis=1)

        relevant_children_idx = np.where(np.isnan(llchildren[0]) == False)[0]
        if len(relevant_children_idx) == 0:
            return np.array([np.nan])

        weights_normalizer = sum(node.weights[j]
                                 for j in relevant_children_idx)
        b = np.array(node.weights)[relevant_children_idx] / weights_normalizer

        return np.dot(llchildren[:, relevant_children_idx], b).reshape(-1, 1)

    else:
        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_expectation:
                exps = np.zeros((evidence.shape[0], 1))

                feature_idx = feature_scope.index(node.scope[0])
                inverted = inverted_features[feature_idx]

                exps[:] = node_expectation[t_node](node,
                                                   evidence,
                                                   inverted=inverted)
                return exps
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence, node_likelihood=node_likelihoods)
    def leaf_expectation(node, data, dtype=np.float64, **kwargs):

        if node.scope[0] in feature_scope:
            t_node = type(node)
            if t_node in node_distinct_vals:
                vals = node_distinct_vals[t_node](node, evidence)
                return vals
            else:
                raise Exception('Node type unknown: ' + str(t_node))

        return likelihood(node, evidence, node_likelihood=node_likelihoods)
Beispiel #30
0
    def assert_correct(self, node, x, result):
        self.tested.add(type(node))

        data = np.array([x], dtype=np.float).reshape(1, -1)
        node.scope = list(range(data.shape[1]))
        l = likelihood(node, data)
        self.assertAlmostEqual(result, l[0, 0], 5)
        self.assertTrue(
            np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))

        new_scope = (np.array(node.scope) + 5).tolist()
        data = np.random.rand(10, max(new_scope) + 2)
        data[:, new_scope] = x
        node.scope = new_scope
        l = likelihood(node, data)
        self.assertEqual(l.shape[0], data.shape[0])
        self.assertEqual(l.shape[1], 1)
        self.assertTrue(np.isclose(np.var(l), 0))
        self.assertTrue(np.alltrue(np.isclose(result, l[0, 0])))
        self.assertTrue(
            np.alltrue(np.isclose(np.log(l), log_likelihood(node, data))))