Exemple #1
0
def test_logl_dbn():
    variables = ["a", "b", "c", "d"]

    static_bn = GaussianNetwork(["a", "b", "c", "d"], [("a", "c"), ("b", "c"),
                                                       ("c", "d")])
    static_bn = GaussianNetwork(["a", "b", "c", "d"], [("a", "c"), ("b", "c"),
                                                       ("c", "d")])
    gbn = DynamicGaussianNetwork(variables, 2)

    static_bn = gbn.static_bn()
    static_bn.add_arc("a_t_2", "c_t_2")
    static_bn.add_arc("b_t_2", "c_t_2")
    static_bn.add_arc("c_t_2", "d_t_2")
    static_bn.add_arc("a_t_1", "c_t_1")
    static_bn.add_arc("b_t_1", "c_t_1")
    static_bn.add_arc("c_t_1", "d_t_1")

    transition_bn = gbn.transition_bn()
    transition_bn.add_arc("a_t_2", "a_t_0")
    transition_bn.add_arc("b_t_2", "b_t_0")
    transition_bn.add_arc("c_t_2", "c_t_0")
    transition_bn.add_arc("d_t_2", "d_t_0")
    transition_bn.add_arc("a_t_1", "a_t_0")
    transition_bn.add_arc("b_t_1", "b_t_0")
    transition_bn.add_arc("c_t_1", "c_t_0")
    transition_bn.add_arc("d_t_1", "d_t_0")

    gbn.fit(df)

    test_df = util_test.generate_normal_data(100)
    ground_truth_ll = numpy_logl(gbn, util_test.generate_normal_data(100))
    ll = gbn.logl(test_df)
    assert np.all(np.isclose(ground_truth_ll, ll))
Exemple #2
0
def test_kde_slogl_null():
    def _test_kde_slogl_null_iter(variables, _df, _test_df):
        cpd = pbn.KDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        scipy_kde = gaussian_kde(
            npdata.T,
            bw_method=lambda s: np.power(4 / (s.d + 2), 1 /
                                         (s.d + 4)) * s.scotts_factor())

        test_npdata = _test_df.loc[:, variables].to_numpy()

        assert np.all(
            np.isclose(cpd.slogl(_test_df),
                       np.nansum(scipy_kde.logpdf(test_npdata.T))))

    TEST_SIZE = 50

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b',
                                                           'c']]:
        _test_kde_slogl_null_iter(variables, df, df_null)
        _test_kde_slogl_null_iter(variables, df_float, df_null_float)

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(df_null),
        cpd2.slogl(df_null))), "Order of evidence changes slogl() result."

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(df_null_float), cpd2.slogl(
            df_null_float))), "Order of evidence changes slogl() result."
Exemple #3
0
def test_productkde_slogl():
    def _test_productkde_slogl_iter(variables, _df, _test_df):
        cpd = pbn.ProductKDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        final_scipy_kde = gaussian_kde(npdata.T)
        final_scipy_kde.covariance = np.diag(cpd.bandwidth)
        final_scipy_kde.inv_cov = np.diag(1. / cpd.bandwidth)
        final_scipy_kde.log_det = cpd.bandwidth.shape[0] * np.log(2*np.pi) + np.log(cpd.bandwidth).sum()

        test_npdata = _test_df.loc[:, variables].to_numpy()
        assert np.all(np.isclose(cpd.slogl(_test_df), final_scipy_kde.logpdf(test_npdata.T).sum()))

    test_df = util_test.generate_normal_data(50, seed=1)
    test_df_float = test_df.astype('float32')

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b', 'c']]:
        _test_productkde_slogl_iter(variables, df, test_df)
        _test_productkde_slogl_iter(variables, df_float, test_df_float)

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(cpd.slogl(test_df), cpd2.slogl(test_df))), "Order of evidence changes slogl() result."

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(cpd.slogl(test_df_float), cpd2.slogl(test_df_float), atol=0.0005)), "Order of evidence changes slogl() result."
def test_lg_slogl_null():
    test_df = util_test.generate_normal_data(5000)

    np.random.seed(0)
    a_null = np.random.randint(0, 5000, size=100)
    b_null = np.random.randint(0, 5000, size=100)
    c_null = np.random.randint(0, 5000, size=100)
    d_null = np.random.randint(0, 5000, size=100)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(df_null), np.nansum(numpy_logpdf(df_null, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ") with null values."

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(df_null), cpd2.slogl(df_null))), "The order of the evidence changes the slogl() result."
Exemple #5
0
def test_productkde_logl_null():
    def _test_productkde_logl_null_iter(variables, _df, _test_df):
        cpd = pbn.ProductKDE(variables)
        cpd.fit(_df)

        logl = cpd.logl(_test_df)

        npdata = _df.loc[:, variables].to_numpy()
        final_scipy_kde = gaussian_kde(npdata.T)
        final_scipy_kde.covariance = np.diag(cpd.bandwidth)
        final_scipy_kde.inv_cov = np.diag(1. / cpd.bandwidth)
        final_scipy_kde.log_det = cpd.bandwidth.shape[0] * np.log(2*np.pi) + np.log(cpd.bandwidth).sum()

        test_npdata = _test_df.loc[:, variables].to_numpy()
        scipy = final_scipy_kde.logpdf(test_npdata.T)

        if npdata.dtype == "float32":
            assert np.all(np.isclose(logl, scipy, atol=0.0005, equal_nan=True))
        else:
            assert np.all(np.isclose(logl, scipy, equal_nan=True))

    TEST_SIZE = 50

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b', 'c']]:
        _test_productkde_logl_null_iter(variables, df, df_null)
        _test_productkde_logl_null_iter(variables, df_float, df_null_float)

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(cpd.logl(df_null), cpd2.logl(df_null), equal_nan=True)), "Order of evidence changes logl() result."

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(cpd.logl(df_null_float), cpd2.logl(df_null_float), atol=0.0005, equal_nan=True)), "Order of evidence changes logl() result."
Exemple #6
0
def test_kde_logl():
    def _test_kde_logl_iter(variables, _df, _test_df):
        cpd = pbn.KDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        scipy_kde = gaussian_kde(
            npdata.T,
            bw_method=lambda s: np.power(4 / (s.d + 2), 1 /
                                         (s.d + 4)) * s.scotts_factor())

        test_npdata = _test_df.loc[:, variables].to_numpy()

        logl = cpd.logl(_test_df)
        scipy = scipy_kde.logpdf(test_npdata.T)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(logl, scipy, atol=0.0005))
        else:
            assert np.all(np.isclose(logl, scipy))

    test_df = util_test.generate_normal_data(50, seed=1)
    test_df_float = test_df.astype('float32')

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b',
                                                           'c']]:
        _test_kde_logl_iter(variables, df, test_df)
        _test_kde_logl_iter(variables, df_float, test_df_float)

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.logl(test_df),
        cpd2.logl(test_df))), "Order of evidence changes logl() result."

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(
        cpd.logl(test_df_float),
        cpd2.logl(test_df_float))), "Order of evidence changes logl() result."
Exemple #7
0
def test_ckde_slogl():
    def _test_ckde_slogl(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)
        scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint,
                                       scipy_kde_marg, variable, evidence)

        if np.all(_test_df.dtypes == "float32"):
            # Allow an error of 0.0005 for each training instance.
            assert np.isclose(cpd.slogl(_test_df),
                              scipy_logl.sum(),
                              atol=0.0005 * _df.shape[0])
        else:
            assert np.isclose(cpd.slogl(_test_df), scipy_logl.sum())

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_slogl(variable, evidence, df, test_df)
        _test_ckde_slogl(variable, evidence, df_small, test_df)
        _test_ckde_slogl(variable, evidence, df_float, test_df_float)
        _test_ckde_slogl(variable, evidence, df_small_float, test_df_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(test_df),
        cpd2.slogl(test_df))), "Order of evidence changes slogl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(test_df_float), cpd2.slogl(
            test_df_float))), "Order of evidence changes slogl() result."
def test_lg_slogl():
    test_df = util_test.generate_normal_data(5000)

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(test_df), np.sum(numpy_logpdf(test_df, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ")"

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(test_df), cpd2.slogl(test_df))), "The order of the evidence changes the slogl() result."
Exemple #9
0
def test_ckde_cdf():
    def _test_ckde_cdf(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)
        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)

        cdf = cpd.cdf(_test_df)
        scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg,
                               variable, evidence)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(cdf, scipy, atol=0.0005))
        else:
            assert np.all(np.isclose(cdf, scipy))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_cdf(variable, evidence, df, test_df)
        _test_ckde_cdf(variable, evidence, df_small, test_df)
        _test_ckde_cdf(variable, evidence, df_float, test_df_float)
        _test_ckde_cdf(variable, evidence, df_small_float, test_df_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.cdf(test_df),
        cpd2.cdf(test_df))), "Order of evidence changes logl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.cdf(test_df_float),
                   cpd2.cdf(test_df_float),
                   atol=0.0005)), "Order of evidence changes logl() result."
Exemple #10
0
def test_logl():
    spbn = SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                             ('b', 'd'), ('c', 'd')])

    spbn.fit(df)

    test_df = util_test.generate_normal_data(5000)
    ll = spbn.logl(test_df)
    sll = spbn.slogl(test_df)

    sum_ll = np.zeros((5000, ))
    sum_sll = 0

    for n in spbn.nodes():
        cpd = spbn.cpd(n)
        l = cpd.logl(test_df)
        s = cpd.slogl(test_df)
        assert np.all(np.isclose(s, l.sum()))
        sum_ll += l
        sum_sll += s

    assert np.all(np.isclose(ll, sum_ll))
    assert np.isclose(sll, ll.sum())
    assert sll == sum_sll
Exemple #11
0
import pytest
import numpy as np
import pybnesian as pbn
import util_test

SIZE = 10000
df = util_test.generate_normal_data(SIZE)


def test_create_change_node():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])

    cv = pbn.CVLikelihood(df)

    node_op = pbn.ChangeNodeTypeSet()

    with pytest.raises(ValueError) as ex:
        node_op.cache_scores(gbn, cv)
    assert "can only be used with non-homogeneous" in str(ex.value)


def test_lists():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.set_arc_blacklist([("b", "a")])
    arc_op.set_arc_whitelist([("b", "c")])
    arc_op.set_max_indegree(3)
    arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())])
import numpy as np
import pybnesian as pbn
from pybnesian import BayesianNetworkType, BayesianNetwork
import util_test

df = util_test.generate_normal_data(1000)


def test_hc_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)
    start = pbn.GaussianNetwork(column_names)

    # Check algorithm with BN with nodes removed.
    column_names.insert(1, 'e')
    column_names.insert(3, 'f')
    start_removed_nodes = pbn.GaussianNetwork(column_names)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_node('f')

    arc_set = pbn.ArcOperatorSet()

    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1)
    assert res.num_arcs() == 1
Exemple #13
0
def test_ckde_cdf_null():
    def _test_ckde_cdf_null(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)

        cdf = cpd.cdf(_test_df)
        scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg,
                               variable, evidence)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(cdf, scipy, atol=0.0005, equal_nan=True))
        else:
            assert np.all(np.isclose(cdf, scipy, equal_nan=True))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_cdf_null(variable, evidence, df, df_null)
        _test_ckde_cdf_null(variable, evidence, df_small, df_null)
        _test_ckde_cdf_null(variable, evidence, df_float, df_null_float)
        _test_ckde_cdf_null(variable, evidence, df_small_float, df_null_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(
        np.isclose(cpd.cdf(df_null), cpd2.cdf(df_null),
                   equal_nan=True)), "Order of evidence changes cdf() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.cdf(df_null_float),
                   cpd2.cdf(df_null_float),
                   atol=0.0005,
                   equal_nan=True)), "Order of evidence changes cdf() result."
Exemple #14
0
def test_ckde_slogl_null():
    def _test_ckde_slogl_null(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)
        scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint,
                                       scipy_kde_marg, variable, evidence)

        if np.all(_test_df.dtypes == "float32"):
            # Allow an error of 0.0005 for each training instance.
            assert np.isclose(cpd.slogl(_test_df),
                              np.nansum(scipy_logl),
                              atol=0.0005 * _df.shape[0])
        else:
            assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_slogl_null(variable, evidence, df, df_null)
        _test_ckde_slogl_null(variable, evidence, df_small, df_null)
        _test_ckde_slogl_null(variable, evidence, df_float, df_null_float)
        _test_ckde_slogl_null(variable, evidence, df_small_float,
                              df_null_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(df_null),
        cpd2.slogl(df_null))), "Order of evidence changes slogl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(df_null_float), cpd2.slogl(
            df_null_float))), "Order of evidence changes slogl() result."
Exemple #15
0
import pytest
import numpy as np
import pyarrow as pa
import pandas as pd
import pybnesian as pbn
from scipy.stats import gaussian_kde
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
from scipy.special import logsumexp

import util_test

SIZE = 10000
SMALL_SIZE = 10
TEST_SIZE = 50
df = util_test.generate_normal_data(SIZE, seed=0)
df_small = util_test.generate_normal_data(SMALL_SIZE, seed=0)
df_float = df.astype('float32')
df_small_float = df_small.astype('float32')


def test_variable():
    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        cpd = pbn.CKDE(variable, evidence)
        assert cpd.variable() == variable


def test_evidence():
    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]: