Python generate_normal_data Exemples, util_test.generate_normal_data Python Exemples

Exemple #1

0

Afficher le fichier

def test_logl_dbn():
    variables = ["a", "b", "c", "d"]

    static_bn = GaussianNetwork(["a", "b", "c", "d"], [("a", "c"), ("b", "c"),
                                                       ("c", "d")])
    static_bn = GaussianNetwork(["a", "b", "c", "d"], [("a", "c"), ("b", "c"),
                                                       ("c", "d")])
    gbn = DynamicGaussianNetwork(variables, 2)

    static_bn = gbn.static_bn()
    static_bn.add_arc("a_t_2", "c_t_2")
    static_bn.add_arc("b_t_2", "c_t_2")
    static_bn.add_arc("c_t_2", "d_t_2")
    static_bn.add_arc("a_t_1", "c_t_1")
    static_bn.add_arc("b_t_1", "c_t_1")
    static_bn.add_arc("c_t_1", "d_t_1")

    transition_bn = gbn.transition_bn()
    transition_bn.add_arc("a_t_2", "a_t_0")
    transition_bn.add_arc("b_t_2", "b_t_0")
    transition_bn.add_arc("c_t_2", "c_t_0")
    transition_bn.add_arc("d_t_2", "d_t_0")
    transition_bn.add_arc("a_t_1", "a_t_0")
    transition_bn.add_arc("b_t_1", "b_t_0")
    transition_bn.add_arc("c_t_1", "c_t_0")
    transition_bn.add_arc("d_t_1", "d_t_0")

    gbn.fit(df)

    test_df = util_test.generate_normal_data(100)
    ground_truth_ll = numpy_logl(gbn, util_test.generate_normal_data(100))
    ll = gbn.logl(test_df)
    assert np.all(np.isclose(ground_truth_ll, ll))

Exemple #2

0

Afficher le fichier

def test_kde_slogl_null():
    def _test_kde_slogl_null_iter(variables, _df, _test_df):
        cpd = pbn.KDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        scipy_kde = gaussian_kde(
            npdata.T,
            bw_method=lambda s: np.power(4 / (s.d + 2), 1 /
                                         (s.d + 4)) * s.scotts_factor())

        test_npdata = _test_df.loc[:, variables].to_numpy()

        assert np.all(
            np.isclose(cpd.slogl(_test_df),
                       np.nansum(scipy_kde.logpdf(test_npdata.T))))

    TEST_SIZE = 50

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b',
                                                           'c']]:
        _test_kde_slogl_null_iter(variables, df, df_null)
        _test_kde_slogl_null_iter(variables, df_float, df_null_float)

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(df_null),
        cpd2.slogl(df_null))), "Order of evidence changes slogl() result."

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(df_null_float), cpd2.slogl(
            df_null_float))), "Order of evidence changes slogl() result."

Exemple #3

0

Afficher le fichier

def test_productkde_slogl():
    def _test_productkde_slogl_iter(variables, _df, _test_df):
        cpd = pbn.ProductKDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        final_scipy_kde = gaussian_kde(npdata.T)
        final_scipy_kde.covariance = np.diag(cpd.bandwidth)
        final_scipy_kde.inv_cov = np.diag(1. / cpd.bandwidth)
        final_scipy_kde.log_det = cpd.bandwidth.shape[0] * np.log(2*np.pi) + np.log(cpd.bandwidth).sum()

        test_npdata = _test_df.loc[:, variables].to_numpy()
        assert np.all(np.isclose(cpd.slogl(_test_df), final_scipy_kde.logpdf(test_npdata.T).sum()))

    test_df = util_test.generate_normal_data(50, seed=1)
    test_df_float = test_df.astype('float32')

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b', 'c']]:
        _test_productkde_slogl_iter(variables, df, test_df)
        _test_productkde_slogl_iter(variables, df_float, test_df_float)

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(cpd.slogl(test_df), cpd2.slogl(test_df))), "Order of evidence changes slogl() result."

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(cpd.slogl(test_df_float), cpd2.slogl(test_df_float), atol=0.0005)), "Order of evidence changes slogl() result."

Exemple #4

0

Afficher le fichier

Fichier : LinearGaussianCPD_test.py Projet : davenza/PyBNesian

def test_lg_slogl_null():
    test_df = util_test.generate_normal_data(5000)

    np.random.seed(0)
    a_null = np.random.randint(0, 5000, size=100)
    b_null = np.random.randint(0, 5000, size=100)
    c_null = np.random.randint(0, 5000, size=100)
    d_null = np.random.randint(0, 5000, size=100)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(df_null), np.nansum(numpy_logpdf(df_null, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ") with null values."

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(df_null), cpd2.slogl(df_null))), "The order of the evidence changes the slogl() result."

Exemple #5

0

Afficher le fichier

def test_productkde_logl_null():
    def _test_productkde_logl_null_iter(variables, _df, _test_df):
        cpd = pbn.ProductKDE(variables)
        cpd.fit(_df)

        logl = cpd.logl(_test_df)

        npdata = _df.loc[:, variables].to_numpy()
        final_scipy_kde = gaussian_kde(npdata.T)
        final_scipy_kde.covariance = np.diag(cpd.bandwidth)
        final_scipy_kde.inv_cov = np.diag(1. / cpd.bandwidth)
        final_scipy_kde.log_det = cpd.bandwidth.shape[0] * np.log(2*np.pi) + np.log(cpd.bandwidth).sum()

        test_npdata = _test_df.loc[:, variables].to_numpy()
        scipy = final_scipy_kde.logpdf(test_npdata.T)

        if npdata.dtype == "float32":
            assert np.all(np.isclose(logl, scipy, atol=0.0005, equal_nan=True))
        else:
            assert np.all(np.isclose(logl, scipy, equal_nan=True))

    TEST_SIZE = 50

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b', 'c']]:
        _test_productkde_logl_null_iter(variables, df, df_null)
        _test_productkde_logl_null_iter(variables, df_float, df_null_float)

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(cpd.logl(df_null), cpd2.logl(df_null), equal_nan=True)), "Order of evidence changes logl() result."

    cpd = pbn.ProductKDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.ProductKDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(cpd.logl(df_null_float), cpd2.logl(df_null_float), atol=0.0005, equal_nan=True)), "Order of evidence changes logl() result."

Exemple #6

0

Afficher le fichier

def test_kde_logl():
    def _test_kde_logl_iter(variables, _df, _test_df):
        cpd = pbn.KDE(variables)
        cpd.fit(_df)

        npdata = _df.loc[:, variables].to_numpy()
        scipy_kde = gaussian_kde(
            npdata.T,
            bw_method=lambda s: np.power(4 / (s.d + 2), 1 /
                                         (s.d + 4)) * s.scotts_factor())

        test_npdata = _test_df.loc[:, variables].to_numpy()

        logl = cpd.logl(_test_df)
        scipy = scipy_kde.logpdf(test_npdata.T)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(logl, scipy, atol=0.0005))
        else:
            assert np.all(np.isclose(logl, scipy))

    test_df = util_test.generate_normal_data(50, seed=1)
    test_df_float = test_df.astype('float32')

    for variables in [['a'], ['b', 'a'], ['c', 'a', 'b'], ['d', 'a', 'b',
                                                           'c']]:
        _test_kde_logl_iter(variables, df, test_df)
        _test_kde_logl_iter(variables, df_float, test_df_float)

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.logl(test_df),
        cpd2.logl(test_df))), "Order of evidence changes logl() result."

    cpd = pbn.KDE(['d', 'a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.KDE(['a', 'c', 'd', 'b'])
    cpd2.fit(df_float)
    assert np.all(np.isclose(
        cpd.logl(test_df_float),
        cpd2.logl(test_df_float))), "Order of evidence changes logl() result."

Exemple #7

0

Afficher le fichier

Fichier : CKDE_test.py Projet : davenza/PyBNesian

def test_ckde_slogl():
    def _test_ckde_slogl(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)
        scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint,
                                       scipy_kde_marg, variable, evidence)

        if np.all(_test_df.dtypes == "float32"):
            # Allow an error of 0.0005 for each training instance.
            assert np.isclose(cpd.slogl(_test_df),
                              scipy_logl.sum(),
                              atol=0.0005 * _df.shape[0])
        else:
            assert np.isclose(cpd.slogl(_test_df), scipy_logl.sum())

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_slogl(variable, evidence, df, test_df)
        _test_ckde_slogl(variable, evidence, df_small, test_df)
        _test_ckde_slogl(variable, evidence, df_float, test_df_float)
        _test_ckde_slogl(variable, evidence, df_small_float, test_df_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(test_df),
        cpd2.slogl(test_df))), "Order of evidence changes slogl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(test_df_float), cpd2.slogl(
            test_df_float))), "Order of evidence changes slogl() result."

Exemple #8

0

Afficher le fichier

Fichier : LinearGaussianCPD_test.py Projet : davenza/PyBNesian

def test_lg_slogl():
    test_df = util_test.generate_normal_data(5000)

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(test_df), np.sum(numpy_logpdf(test_df, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ")"

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(test_df), cpd2.slogl(test_df))), "The order of the evidence changes the slogl() result."

Exemple #9

0

Afficher le fichier

Fichier : CKDE_test.py Projet : davenza/PyBNesian

def test_ckde_cdf():
    def _test_ckde_cdf(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)
        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)

        cdf = cpd.cdf(_test_df)
        scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg,
                               variable, evidence)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(cdf, scipy, atol=0.0005))
        else:
            assert np.all(np.isclose(cdf, scipy))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_cdf(variable, evidence, df, test_df)
        _test_ckde_cdf(variable, evidence, df_small, test_df)
        _test_ckde_cdf(variable, evidence, df_float, test_df_float)
        _test_ckde_cdf(variable, evidence, df_small_float, test_df_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.cdf(test_df),
        cpd2.cdf(test_df))), "Order of evidence changes logl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.cdf(test_df_float),
                   cpd2.cdf(test_df_float),
                   atol=0.0005)), "Order of evidence changes logl() result."

Exemple #10

0

Afficher le fichier

def test_logl():
    spbn = SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                             ('b', 'd'), ('c', 'd')])

    spbn.fit(df)

    test_df = util_test.generate_normal_data(5000)
    ll = spbn.logl(test_df)
    sll = spbn.slogl(test_df)

    sum_ll = np.zeros((5000, ))
    sum_sll = 0

    for n in spbn.nodes():
        cpd = spbn.cpd(n)
        l = cpd.logl(test_df)
        s = cpd.slogl(test_df)
        assert np.all(np.isclose(s, l.sum()))
        sum_ll += l
        sum_sll += s

    assert np.all(np.isclose(ll, sum_ll))
    assert np.isclose(sll, ll.sum())
    assert sll == sum_sll

Exemple #11

0

Afficher le fichier

Fichier : operatorset_test.py Projet : davenza/PyBNesian

import pytest
import numpy as np
import pybnesian as pbn
import util_test

SIZE = 10000
df = util_test.generate_normal_data(SIZE)


def test_create_change_node():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])

    cv = pbn.CVLikelihood(df)

    node_op = pbn.ChangeNodeTypeSet()

    with pytest.raises(ValueError) as ex:
        node_op.cache_scores(gbn, cv)
    assert "can only be used with non-homogeneous" in str(ex.value)


def test_lists():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.set_arc_blacklist([("b", "a")])
    arc_op.set_arc_whitelist([("b", "c")])
    arc_op.set_max_indegree(3)
    arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())])

Exemple #12

0

Afficher le fichier

Fichier : hillclimbing_test.py Projet : davenza/PyBNesian

import numpy as np
import pybnesian as pbn
from pybnesian import BayesianNetworkType, BayesianNetwork
import util_test

df = util_test.generate_normal_data(1000)


def test_hc_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)
    start = pbn.GaussianNetwork(column_names)

    # Check algorithm with BN with nodes removed.
    column_names.insert(1, 'e')
    column_names.insert(3, 'f')
    start_removed_nodes = pbn.GaussianNetwork(column_names)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_node('f')

    arc_set = pbn.ArcOperatorSet()

    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1)
    assert res.num_arcs() == 1

Exemple #13

0

Afficher le fichier

Fichier : CKDE_test.py Projet : davenza/PyBNesian

def test_ckde_cdf_null():
    def _test_ckde_cdf_null(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)

        cdf = cpd.cdf(_test_df)
        scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg,
                               variable, evidence)

        if np.all(_df.dtypes == 'float32'):
            assert np.all(np.isclose(cdf, scipy, atol=0.0005, equal_nan=True))
        else:
            assert np.all(np.isclose(cdf, scipy, equal_nan=True))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_cdf_null(variable, evidence, df, df_null)
        _test_ckde_cdf_null(variable, evidence, df_small, df_null)
        _test_ckde_cdf_null(variable, evidence, df_float, df_null_float)
        _test_ckde_cdf_null(variable, evidence, df_small_float, df_null_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(
        np.isclose(cpd.cdf(df_null), cpd2.cdf(df_null),
                   equal_nan=True)), "Order of evidence changes cdf() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.cdf(df_null_float),
                   cpd2.cdf(df_null_float),
                   atol=0.0005,
                   equal_nan=True)), "Order of evidence changes cdf() result."

Exemple #14

0

Afficher le fichier

Fichier : CKDE_test.py Projet : davenza/PyBNesian

def test_ckde_slogl_null():
    def _test_ckde_slogl_null(variable, evidence, _df, _test_df):
        cpd = pbn.CKDE(variable, evidence)
        cpd.fit(_df)

        scipy_kde_joint, scipy_kde_marg = train_scipy_ckde(
            _df, variable, evidence)
        scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint,
                                       scipy_kde_marg, variable, evidence)

        if np.all(_test_df.dtypes == "float32"):
            # Allow an error of 0.0005 for each training instance.
            assert np.isclose(cpd.slogl(_test_df),
                              np.nansum(scipy_logl),
                              atol=0.0005 * _df.shape[0])
        else:
            assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl))

    test_df = util_test.generate_normal_data(TEST_SIZE, seed=1)
    test_df_float = test_df.astype('float32')

    np.random.seed(0)
    a_null = np.random.randint(0, TEST_SIZE, size=10)
    b_null = np.random.randint(0, TEST_SIZE, size=10)
    c_null = np.random.randint(0, TEST_SIZE, size=10)
    d_null = np.random.randint(0, TEST_SIZE, size=10)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    df_null_float = test_df_float.copy()
    df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan
    df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan
    df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan
    df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        _test_ckde_slogl_null(variable, evidence, df, df_null)
        _test_ckde_slogl_null(variable, evidence, df_small, df_null)
        _test_ckde_slogl_null(variable, evidence, df_float, df_null_float)
        _test_ckde_slogl_null(variable, evidence, df_small_float,
                              df_null_float)

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df)
    assert np.all(np.isclose(
        cpd.slogl(df_null),
        cpd2.slogl(df_null))), "Order of evidence changes slogl() result."

    cpd = pbn.CKDE('d', ['a', 'b', 'c'])
    cpd.fit(df_float)
    cpd2 = pbn.CKDE('d', ['c', 'b', 'a'])
    cpd2.fit(df_float)
    assert np.all(
        np.isclose(cpd.slogl(df_null_float), cpd2.slogl(
            df_null_float))), "Order of evidence changes slogl() result."

Exemple #15

0

Afficher le fichier

Fichier : CKDE_test.py Projet : davenza/PyBNesian

import pytest
import numpy as np
import pyarrow as pa
import pandas as pd
import pybnesian as pbn
from scipy.stats import gaussian_kde
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
from scipy.special import logsumexp

import util_test

SIZE = 10000
SMALL_SIZE = 10
TEST_SIZE = 50
df = util_test.generate_normal_data(SIZE, seed=0)
df_small = util_test.generate_normal_data(SMALL_SIZE, seed=0)
df_float = df.astype('float32')
df_small_float = df_small.astype('float32')


def test_variable():
    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]:
        cpd = pbn.CKDE(variable, evidence)
        assert cpd.variable() == variable


def test_evidence():
    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']),
                               ('d', ['a', 'b', 'c'])]: