Ejemplo n.º 1
0
def test_cvl_local_score_gbn():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    cvl = pbn.CVLikelihood(df, 10, seed)

    assert np.isclose(
        cvl.local_score(gbn, 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score(gbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a']))
    assert np.isclose(
        cvl.local_score(gbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(gbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(cvl.local_score(gbn, 'd', ['a', 'b', 'c']),
                      cvl.local_score(gbn, 'd', ['b', 'c', 'a']))

    assert cvl.local_score(gbn,
                           'a') == cvl.local_score(gbn, 'a', gbn.parents('a'))
    assert cvl.local_score(gbn,
                           'b') == cvl.local_score(gbn, 'b', gbn.parents('b'))
    assert cvl.local_score(gbn,
                           'c') == cvl.local_score(gbn, 'c', gbn.parents('c'))
    assert cvl.local_score(gbn,
                           'd') == cvl.local_score(gbn, 'd', gbn.parents('d'))
Ejemplo n.º 2
0
def test_create_change_node():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])

    cv = pbn.CVLikelihood(df)

    node_op = pbn.ChangeNodeTypeSet()

    with pytest.raises(ValueError) as ex:
        node_op.cache_scores(gbn, cv)
    assert "can only be used with non-homogeneous" in str(ex.value)
Ejemplo n.º 3
0
def test_cvl_create():
    s = pbn.CVLikelihood(df)
    assert len(list(s.cv)) == 10
    s = pbn.CVLikelihood(df, 5)
    assert len(list(s.cv)) == 5

    s = pbn.CVLikelihood(df, 10, 0)
    assert len(list(s.cv)) == 10
    s2 = pbn.CVLikelihood(df, 10, 0)
    assert len(list(s2.cv)) == 10

    for (train_cv, test_cv), (train_cv2, test_cv2) in zip(s.cv, s2.cv):
        assert train_cv.equals(
            train_cv2), "Train CV DataFrames with the same seed are not equal."
        assert test_cv.equals(
            test_cv2), "Test CV DataFrames with the same seed are not equal."

    with pytest.raises(ValueError) as ex:
        s = pbn.CVLikelihood(df, SIZE + 1)
    assert "Cannot split" in str(ex.value)
Ejemplo n.º 4
0
def test_cvl_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    cvl = pbn.CVLikelihood(df, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a']))
    assert np.isclose(cvl.local_score(spbn, 'c', ['a', 'b']),
                      numpy_local_score(pbn.CKDEType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['b', 'c', 'a']))
Ejemplo n.º 5
0
def test_cvl_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    cv = pbn.CVLikelihood(df, 10, 0)

    assert np.isclose(
        cv.score(gbn),
        (cv.local_score(gbn, 'a', []) + cv.local_score(gbn, 'b', ['a']) +
         cv.local_score(gbn, 'c', ['a', 'b']) +
         cv.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(cv.score(spbn),
                      (cv.local_score(spbn, 'a') + cv.local_score(spbn, 'b') +
                       cv.local_score(spbn, 'c') + cv.local_score(spbn, 'd')))
Ejemplo n.º 6
0
def test_cvl_local_score_gbn_null():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    cvl = pbn.CVLikelihood(df_null, 10, seed)

    assert np.isclose(
        cvl.local_score(gbn, 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score(gbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score(gbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c',
                          ['a', 'b']))
    assert np.isclose(
        cvl.local_score(gbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(cvl.local_score(gbn, 'd', ['a', 'b', 'c']),
                      cvl.local_score(gbn, 'd', ['b', 'c', 'a']))

    assert cvl.local_score(gbn,
                           'a') == cvl.local_score(gbn, 'a', gbn.parents('a'))
    assert cvl.local_score(gbn,
                           'b') == cvl.local_score(gbn, 'b', gbn.parents('b'))
    assert cvl.local_score(gbn,
                           'c') == cvl.local_score(gbn, 'c', gbn.parents('c'))
    assert cvl.local_score(gbn,
                           'd') == cvl.local_score(gbn, 'd', gbn.parents('d'))
Ejemplo n.º 7
0
def test_find_max():
    spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd'])
    cv = pbn.CVLikelihood(df)
    arcs = pbn.ArcOperatorSet()
    node_type = pbn.ChangeNodeTypeSet()

    arcs.cache_scores(spbn, cv)
    spbn.set_unknown_node_types(df)
    node_type.cache_scores(spbn, cv)

    arcs_max = arcs.find_max(spbn)
    node_max = node_type.find_max(spbn)

    pool = pbn.OperatorPool([arcs, node_type])
    pool.cache_scores(spbn, cv)

    op_combined = pool.find_max(spbn)

    if arcs_max.delta() >= node_max.delta():
        assert op_combined == arcs_max
    else:
        assert op_combined == node_max
Ejemplo n.º 8
0
def test_cvl_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    cvl = pbn.CVLikelihood(df_null, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(), df_null, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c',
                          ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['b', 'c', 'a']))