예제 #1
0
def test_holdout_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    hl = pbn.HoldoutLikelihood(df_null, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
예제 #2
0
def test_holdout_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    hl = pbn.HoldoutLikelihood(df, 0.2, 0)

    assert np.isclose(
        hl.score(gbn),
        (hl.local_score(gbn, 'a', []) + hl.local_score(gbn, 'b', ['a']) +
         hl.local_score(gbn, 'c', ['a', 'b']) +
         hl.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(hl.score(spbn),
                      (hl.local_score(spbn, 'a') + hl.local_score(spbn, 'b') +
                       hl.local_score(spbn, 'c') + hl.local_score(spbn, 'd')))
예제 #3
0
def test_holdout_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    hl = pbn.HoldoutLikelihood(df, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
예제 #4
0
def test_holdout_local_score_gbn():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    hl = pbn.HoldoutLikelihood(df, 0.2, seed)

    assert np.isclose(
        hl.local_score(gbn, 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(gbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(gbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(gbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(hl.local_score(gbn, 'd', ['a', 'b', 'c']),
                      hl.local_score(gbn, 'd', ['b', 'c', 'a']))

    assert hl.local_score(gbn, 'a') == hl.local_score(gbn, 'a',
                                                      gbn.parents('a'))
    assert hl.local_score(gbn, 'b') == hl.local_score(gbn, 'b',
                                                      gbn.parents('b'))
    assert hl.local_score(gbn, 'c') == hl.local_score(gbn, 'c',
                                                      gbn.parents('c'))
    assert hl.local_score(gbn, 'd') == hl.local_score(gbn, 'd',
                                                      gbn.parents('d'))
예제 #5
0
def test_holdout_create():
    s = pbn.HoldoutLikelihood(df)
    assert s.training_data().num_rows == 0.8 * SIZE
    assert s.test_data().num_rows == 0.2 * SIZE

    s = pbn.HoldoutLikelihood(df, 0.5)
    assert s.training_data().num_rows == 0.5 * SIZE
    assert s.test_data().num_rows == 0.5 * SIZE

    s = pbn.HoldoutLikelihood(df, 0.2, 0)
    s2 = pbn.HoldoutLikelihood(df, 0.2, 0)

    assert s.training_data().equals(s2.training_data())
    assert s.test_data().equals(s2.test_data())

    with pytest.raises(ValueError) as ex:
        s = pbn.HoldoutLikelihood(df, 10, 0)
    assert "test_ratio must be a number" in str(ex.value)

    with pytest.raises(ValueError) as ex:
        s = pbn.HoldoutLikelihood(df, 0, 0)
    assert "test_ratio must be a number" in str(ex.value)