Esempio n. 1
0
    def test_leaf_categorical(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([20, 20], np.eye(2), 500),
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Categorical])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l0 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 0, x))
        l1 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 1, x))
        l2 = likelihood(leaf, concatenate_yx(np.ones_like(y) * 2, x))

        np.testing.assert_array_almost_equal(l0 + l1 + l2, 1.0)

        self.assertTrue(np.all(l0[1000:1500] > 0.85))
        self.assertTrue(np.all(l0[0:1000] < 0.15))

        self.assertTrue(np.all(l1[500:1000] > 0.85))
        self.assertTrue(np.all(l1[0:500] < 0.15))
        self.assertTrue(np.all(l1[1000:1500] < 0.15))

        self.assertTrue(np.all(l2[0:500] > 0.85))
        self.assertTrue(np.all(l2[500:15000] < 0.15))
def remove_non_informative_features(data=None,
                                    node_id=0,
                                    scope=None,
                                    context=0,
                                    uninformative_features_idx=None,
                                    **kwargs):
    assert uninformative_features_idx is not None, "parameter uninformative_features_idx can't be None"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    non_zero_variance_rvs = []
    non_zero_variance_idx = []
    result = []
    for idx, zero_var in enumerate(uninformative_features_idx):
        rv = scope[idx]

        if not zero_var:
            non_zero_variance_rvs.append(rv)
            non_zero_variance_idx.append(idx)
            continue

        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, idx].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))
    assert len(result) > 0
    if len(non_zero_variance_idx) > 0:
        prod_node.children.append(None)
        result.append((
            SplittingOperations.GET_NEXT_OP,
            {
                "data": concatenate_yx(data[:, non_zero_variance_idx], x),
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": non_zero_variance_rvs,
            },
        ))

    return prod_node, result
def naive_factorization(data=None,
                        node_id=0,
                        context=None,
                        scope=None,
                        **kwargs):
    assert scope is not None, "No scope"

    prod_node = Product()
    prod_node.scope = scope
    prod_node.id = node_id

    y, x = get_YX(data, context.feature_size)

    result = []
    for i, rv in enumerate(scope):
        prod_node.children.append(None)
        data_slice = concatenate_yx(y[:, i].reshape(-1, 1), x)
        result.append((
            SplittingOperations.CREATE_LEAF_NODE,
            {
                "data": data_slice,
                "parent_id": prod_node.id,
                "pos": len(prod_node.children) - 1,
                "scope": [rv],
            },
        ))

    return prod_node, result
Esempio n. 4
0
    def test_leaf_mpe_conditional(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1)

        # associates y=0 with X=[10,10]
        # associates y=1 with X=[1,1]

        data = concatenate_yx(y, x)

        cspn = CSPNClassifier([Bernoulli] * y.shape[1], min_instances_slice=4990, cluster_univariate=True)
        cspn.fit(x, y)

        res = mpe(cspn.cspn, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 0)

        res = mpe(cspn.cspn, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)

        res = mpe(cspn.cspn, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)
        self.assertAlmostEqual(res[1, 0], 0)

        with self.assertRaises(AssertionError):
            mpe(cspn.cspn, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Esempio n. 5
0
    def test_leaf_no_variance_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([1] * 1000).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data[:, 0] = 2
        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.398942280401432)

        data3 = np.array(data)
        data3[:, 0] = 3
        leaf = create_conditional_leaf(data3, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertAlmostEqual(np.var(l[:, 0]), 0)
        self.assertAlmostEqual(l[0, 0], 0.241970724519143)
Esempio n. 6
0
    def test_naive_factorization(self):
        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)
        result = naive_factorization(data=data2,
                                     parent=parent,
                                     pos=0,
                                     context=ctx,
                                     scope=list(scope))

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(parent.children[0], result[0][1]['parent'])

        y, x = get_YX(data, 4)

        self.assertEqual(len(result), len(scope))
        for i, s in enumerate(scope):
            r = result[i]
            self.assertEqual(len(r), 2)
            self.assertEqual(r[0], SplittingOperations.CREATE_LEAF_NODE)
            self.assertEqual(type(r[1]['parent']), Product)
            self.assertEqual(r[1]['pos'], i)
            self.assertListEqual(r[1]['scope'], [s])
            self.assertListEqual(r[1]['data'].tolist(),
                                 concatenate_yx(y[:, i], x).tolist())
Esempio n. 7
0
    def test_leaf_bernoulli_bootstrap(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 100),
                np.random.multivariate_normal([1, 1], np.eye(2), 100),
            ),
            axis=0,
        )
        y = np.array([1] * 100 + [0] * 100).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        l = likelihood(leaf, data)
        neg_data = np.concatenate([1 - y, x], axis=1)
        lneg = likelihood(leaf, neg_data)

        np.testing.assert_array_almost_equal(l + lneg, 1.0)

        self.assertTrue(np.all(l >= 0.5))
        self.assertTrue(np.all(lneg < 0.5))
Esempio n. 8
0
    def fit(self, X, y=None):
        self.context = Context(
            parametric_types=self.parametric_types).add_domains(y)
        self.context.feature_size = X.shape[1]
        self.num_labels = y.shape[1]

        def label_conditional(y, x):
            from sklearn.cluster import KMeans

            clusters = KMeans(n_clusters=2,
                              random_state=17,
                              precompute_distances=True).fit_predict(x)
            return clusters

        self.cspn = learn_cspn_structure(
            concatenate_yx(y, X),
            self.context,
            split_rows=get_split_rows_conditional_Gower(),
            # split_rows=get_split_rows_KMeans(),
            # split_cols=get_split_cols_RDC_py(),
            split_cols=getCIGroup(alpha=self.alpha),
            # creeate_leaf = create_leaf_node,
            create_leaf=create_conditional_leaf,
            label_conditional=label_conditional,
            **self.kwargs)

        return self
Esempio n. 9
0
    def test_leaf_mpe_bernoulli(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array([0] * 5000 + [1] * 5000).reshape(-1, 1)

        # associates y=0 with X=[10,10]
        # associates y=1 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 0)

        res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)

        res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 1)
        self.assertAlmostEqual(res[1, 0], 0)

        with self.assertRaises(AssertionError):
            mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Esempio n. 10
0
    def test_leaf_mpe_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(np.random.normal(20, 2, 5000).tolist() + np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        # leaf = create_conditional_leaf(data, ds_context, [0])
        leaf = create_parametric_leaf(data, ds_context, [0])

        res = mpe(leaf, np.array([np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 20.435226001909466)

        res = mpe(leaf, np.array([np.nan, 1, 1]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 59.4752193542575)

        res = mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10]).reshape(-1, 3))
        self.assertAlmostEqual(res[0, 0], 59.4752193542575)
        self.assertAlmostEqual(res[1, 0], 20.435226001909466)

        with self.assertRaises(AssertionError):
            mpe(leaf, np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10, 10]).reshape(-1, 3))
Esempio n. 11
0
    def test_datasets(self):
        # start with jester
        ds = "jester"
        ev = "ev80"
        name, features, validation, train, test, n_discrete, n_bernoulli = get_binary_data(
            ds)
        _, features_msk, validation_msk, train_msk, test_msk, n_discrete_msk, n_bernoulli_msk = get_binary_mask(
            ds, ev)
        col_msk = np.isnan(train_msk)[0]
        train_x, valid_x, test_x = train[:,
                                         ~col_msk], validation[:,
                                                               ~col_msk], test[:,
                                                                               ~col_msk]
        train_y, valid_y, test_y = train[:,
                                         col_msk], validation[:,
                                                              col_msk], test[:,
                                                                             col_msk]

        cspn = CSPNClassifier(parametric_types=[Bernoulli] * train_y.shape[1],
                              alpha=0.0001,
                              min_splitting_instances=3000,
                              min_clustering_instances=2000)
        cspn.fit(train_x, y=train_y)

        ll = cspn.score_samples(concatenate_yx(test_y, test_x))

        print(ll.mean())
Esempio n. 12
0
    def test_leaf_gaussian(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(
            np.random.normal(20, 2, 5000).tolist() +
            np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        self.assertFalse(np.any(np.isnan(likelihood(leaf, data))))

        self.assertGreater(get_ll(leaf, [20, 10, 10]),
                           get_ll(leaf, [20, 1, 1]))
        self.assertGreater(get_ll(leaf, [60, 1, 1]),
                           get_ll(leaf, [60, 10, 10]))
        self.assertAlmostEqual(get_ll(leaf, [60, 1, 1]), 0.3476232862652)
        self.assertAlmostEqual(get_ll(leaf, [20, 10, 10]), 0.3628922322773634)
Esempio n. 13
0
    def test_conditional(self):
        labels = np.c_[np.zeros((500, 1)), np.ones((500, 1))]
        features = np.c_[
            np.r_[np.random.normal(5, 1, (500, 2)), np.random.normal(10, 1, (500, 2))]
        ]

        train_data = concatenate_yx(labels, features)

        ds_context = Context(
            parametric_types=[Bernoulli] * labels.shape[1]
        ).add_domains(labels)
        ds_context.feature_size = 2

        def label_conditional(y, x):
            from sklearn.cluster import KMeans

            clusters = KMeans(
                n_clusters=2, random_state=17, precompute_distances=True
            ).fit_predict(y)
            return clusters

        spn = learn_cspn_structure(
            train_data,
            ds_context,
            split_rows=get_split_conditional_rows_KMeans(),
            split_cols=getCIGroup(),
            create_leaf=create_conditional_leaf,
            label_conditional=label_conditional,
            cluster_univariate=True,
        )
Esempio n. 14
0
    def fit(self, X, y=None):
        y = y.reshape(y.shape[0], -1)
        self.num_labels = y.shape[1]
        self.context = Context(parametric_types=[Bernoulli] *
                               self.num_labels).add_domains(y)
        self.context.feature_size = X.shape[1]
        self.scope = list(range(y.shape[1]))
        data = concatenate_yx(y, X)

        cspn_type = 1
        if cspn_type == 0:
            self.cspn = create_conditional_leaf(data, self.context, self.scope)
        elif cspn_type == 1:
            split_rows = get_split_conditional_rows_KMeans()
            self.cspn, subtasks = create_sum(data=data,
                                             node_id=0,
                                             parent_id=0,
                                             pos=0,
                                             context=self.context,
                                             scope=self.scope,
                                             split_rows=split_rows)
            for i, subtask in enumerate(subtasks):
                self.cspn.children[i] = create_conditional_leaf(
                    subtask[1]['data'], self.context, subtask[1]['scope'])
            print(self.cspn)
Esempio n. 15
0
    def test_leaf_sampling_multilabel(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.concatenate(
            (
                np.array([0] * 5000 + [1] * 5000).reshape(-1, 1),
                np.array([1] * 5000 + [0] * 5000).reshape(-1, 1),
            ),
            axis=1,
        )

        # associates y0=0 with X=[10,10]
        # associates y0=1 with X=[1,1]
        # associates y1=1 with X=[10,10]
        # associates y1=0 with X=[1,1]

        data = concatenate_yx(y, x)

        cspn = CSPNClassifier([Bernoulli] * y.shape[1],
                              min_instances_slice=4990,
                              cluster_univariate=True)
        cspn.fit(x, y)

        res = sample_instances(
            cspn.cspn,
            np.array([np.nan, np.nan, 10, 10] * 1000).reshape(-1, 4), 17)
        self.assertAlmostEqual(np.unique(res[:, 0]), 0)
        self.assertAlmostEqual(np.unique(res[:, 1]), 1)

        res = sample_instances(
            cspn.cspn,
            np.array([np.nan, np.nan, 1, 1] * 1000).reshape(-1, 4), 17)
        self.assertAlmostEqual(np.unique(res[:, 0]), 1)
        self.assertAlmostEqual(np.unique(res[:, 1]), 0)

        res = sample_instances(
            cspn.cspn,
            np.array([np.nan, 0, 1, 1, np.nan, 1, 10, 10] * 1000).reshape(
                -1, 4), 17)
        self.assertAlmostEqual(np.unique(res[::2, 0]), 1)
        self.assertAlmostEqual(np.unique(res[1::2, 0]), 0)
        self.assertAlmostEqual(np.unique(res[::2, 1]), 0)
        self.assertAlmostEqual(np.unique(res[1::2, 1]), 1)

        with self.assertRaises(AssertionError):
            sample_instances(
                cspn.cspn,
                np.array([np.nan, 1, 1, 1, np.nan, 0, 10, 10, 1, 1, 10,
                          10]).reshape(-1, 4), 17)
Esempio n. 16
0
    def predict(self, X, check_input=True):
        if self.cspn is None:
            raise RuntimeError("Classifier not fitted")

        y = np.array([np.nan] * X.shape[0] * len(self.cspn.scope)).reshape(
            X.shape[0], -1)

        test_data = concatenate_yx(y, X)

        mpe_y = ExactMPE(self.cspn, test_data, self.context)

        return mpe_y
Esempio n. 17
0
def split_conditional_data_by_clusters(y, x, clusters, scope, rows=True):
    assert not rows, "split conditional only for columns"

    nscope = np.asarray(scope)
    unique_clusters = np.unique(clusters)
    result = []

    for uc in unique_clusters:
        col_idx = clusters == uc
        local_data = concatenate_yx(y[:, col_idx].reshape((x.shape[0], -1)), x)
        proportion = 1
        result.append((local_data, nscope[col_idx].tolist(), proportion))
    return result
Esempio n. 18
0
    def predict_proba(self, X):
        y = np.ones((X.shape[0], self.num_labels))
        y[:] = np.nan

        test_data = concatenate_yx(y, X)

        results = np.ones_like(y)

        for n in self.cspn.scope:
            local_test = np.array(test_data)
            local_test[:, n] = 1
            results[:, n] = likelihood(self.cspn, local_test)[:, 0]

        return results
Esempio n. 19
0
def predict_proba(self, X):
    y = np.ones((X.shape[0], self.num_labels))
    y[:] = np.nan

    test_data = concatenate_yx(y, X)

    results = np.ones_like(y)
    local_test = np.array(test_data)

    for n in range(2):
        local_test = np.array(test_data)
        local_test[:, n] = 1
        results[:, n] = likelihood(self.cspn, local_test)[:, 0]

    rbinc = np.zeros((X.shape[0], 2))
    rbinc[:, 0] = 1 - results[:, 0]
    rbinc[:, 1] = results[:, 0]
    return rbinc
Esempio n. 20
0
def supervised_leaf_likelihood(node, data=None, dtype=np.float64):
    assert len(node.scope) == 1, node.scope

    y, x = get_YX(data, node.feature_size)
    y = y[:, node.scope]

    probs = np.ones((y.shape[0], 1), dtype=dtype)

    marg_ids = np.isnan(y[:, 0])

    if np.sum(~marg_ids) > 0:
        observations_data = concatenate_yx(y[~marg_ids], x[~marg_ids])

        probs[~marg_ids] = node.predictor.predict_proba(observations_data)

    probs[np.isclose(probs, 0)] = 0.000000001

    return probs
Esempio n. 21
0
    def test_leaf_sampling(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 5000),
                np.random.multivariate_normal([1, 1], np.eye(2), 5000),
            ),
            axis=0,
        )
        y = np.array(
            np.random.normal(20, 2, 5000).tolist() +
            np.random.normal(60, 2, 5000).tolist()).reshape(-1, 1)

        # associates y=20 with X=[10,10]
        # associates y=60 with X=[1,1]

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Gaussian])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = sample_instances(
            leaf,
            np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), 17)
        self.assertAlmostEqual(np.mean(res[:, 0]), 20.456669723751173)

        res = sample_instances(leaf,
                               np.array([np.nan, 1, 1] * 1000).reshape(-1, 3),
                               17)
        self.assertAlmostEqual(np.mean(res[:, 0]), 59.496663076099196)

        res = sample_instances(
            leaf,
            np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3), 17)
        self.assertAlmostEqual(np.mean(res[::2, 0]), 59.546359637084564)
        self.assertAlmostEqual(np.mean(res[1::2, 0]), 20.452118792501008)

        with self.assertRaises(AssertionError):
            sample_instances(
                leaf,
                np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10,
                          10]).reshape(-1, 3), 17)
Esempio n. 22
0
    def test_remove_non_informative_features(self):
        np.random.seed(17)
        data = np.arange(0, 1000).reshape(-1, 8)
        data[:, 1] = 1
        data[:, 3] = 3

        parent = Sum()
        parent.children.append(None)

        ctx = Context()
        ctx.feature_size = 4

        scope = [1, 3, 4, 6]
        data2 = np.array(data)

        y, x = get_YX(data, 4)

        uninformative_features_idx = np.var(y, 0) == 0
        result = remove_non_informative_features(
            data=data2,
            parent=parent,
            pos=0,
            context=ctx,
            scope=list(scope),
            uninformative_features_idx=uninformative_features_idx)

        self.assertListEqual(data.tolist(), data2.tolist())

        self.assertEqual(len(parent.children[0].children), len(result))

        resulting_scopes = [[3], [6], [1, 4]]
        resulting_data_y = [y[:, 1], y[:, 3], y[:, [0, 2]]]

        for i, r in enumerate(result):
            self.assertEqual(len(r), 2)
            self.assertEqual(type(r[1]['parent']), Product)
            self.assertEqual(parent.children[0], r[1]['parent'])
            self.assertListEqual(r[1]['scope'], resulting_scopes[i])
            self.assertEqual(r[1]['pos'], i)

            self.assertListEqual(
                r[1]['data'].tolist(),
                concatenate_yx(resulting_data_y[i], x).tolist())
Esempio n. 23
0
    def test_leaf_no_variance_bernoulli(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([1] * 1000).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Bernoulli])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])
        l = likelihood(leaf, data)
        self.assertTrue(np.all(l >= 0.5))
Esempio n. 24
0
    def test_leaf_sampling_categorical(self):
        np.random.seed(17)
        x = np.concatenate(
            (
                np.random.multivariate_normal([20, 20], np.eye(2), 500),
                np.random.multivariate_normal([10, 10], np.eye(2), 500),
                np.random.multivariate_normal([1, 1], np.eye(2), 500),
            ),
            axis=0,
        )
        y = np.array([2] * 500 + [1] * 500 + [0] * 500).reshape(-1, 1)

        data = concatenate_yx(y, x)

        ds_context = Context(parametric_types=[Categorical])
        ds_context.feature_size = 2

        leaf = create_conditional_leaf(data, ds_context, [0])

        res = sample_instances(
            leaf,
            np.array([np.nan, 10, 10] * 1000).reshape(-1, 3), RandomState(17))
        self.assertAlmostEqual(np.mean(res[:, 0]), 1, 1)

        res = sample_instances(leaf,
                               np.array([np.nan, 1, 1] * 1000).reshape(-1, 3),
                               RandomState(17))
        self.assertAlmostEqual(np.mean(res[:, 0]), 0, 1)

        res = sample_instances(
            leaf,
            np.array([np.nan, 1, 1, np.nan, 10, 10] * 1000).reshape(-1, 3),
            RandomState(17))
        self.assertAlmostEqual(np.mean(res[::2, 0]), 0, 1)
        self.assertAlmostEqual(np.mean(res[1::2, 0]), 1, 1)

        with self.assertRaises(AssertionError):
            sample_instances(
                leaf,
                np.array([np.nan, 1, 1, np.nan, 10, 10, 5, 10,
                          10]).reshape(-1, 3), RandomState(17))
Esempio n. 25
0
        y[:] = 0
        data = np.zeros_like(to_ohe(y[:, 0].astype(int), n_people))
        data = np.eye(n_people)
        # data[:, 9] = 1
        # data[:, 11] = 1
        # data[:] = 1
        sample_images.insert(0, data)


    else:
        y = np.zeros((num_images, block_size))
        y[:] = np.nan

        X = np.concatenate(sample_images, axis=1)

        tr_block = sample_instances(spn, concatenate_yx(y, X), rng, in_place=False)

        y = tr_block[:, 0:block_size]

        sample_images.insert(0, y)

all_sample_images = np.concatenate(sample_images, axis=1)
samples_person_id = np.argmax(all_sample_images[:, -n_people:], axis=1)
all_sample_images = all_sample_images[:, 0:-n_people]  # remove person id

block_ids = tuple(list(reversed(range((num_blocks[0] * num_blocks[1])))))

sample_img_blocks = stitch_imgs(all_sample_images.shape[0], img_size=images[0].shape, num_blocks=num_blocks,
                                blocks={block_ids: all_sample_images})
result_scaled = []
for i in range(num_images):