Esempio n. 1
0
def test_opposite():
    bn = pbn.SemiparametricBN(["a", "b"])
    o = pbn.AddArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'a'
    assert oppo.target() == 'b'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.RemoveArc

    o = pbn.RemoveArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'a'
    assert oppo.target() == 'b'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.AddArc

    o = pbn.FlipArc("a", "b", 1)
    oppo = o.opposite(bn)
    assert oppo.source() == 'b'
    assert oppo.target() == 'a'
    assert oppo.delta() == -1
    assert type(oppo) == pbn.FlipArc

    bn.set_node_type("a", pbn.LinearGaussianCPDType())
    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    oppo = o.opposite(bn)
    assert oppo.node() == 'a'
    assert oppo.node_type() == pbn.LinearGaussianCPDType()
    assert oppo.delta() == -1
    assert type(oppo) == pbn.ChangeNodeType
def test_holdout_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    hl = pbn.HoldoutLikelihood(df_null, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
Esempio n. 3
0
def test_cvl_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    cvl = pbn.CVLikelihood(df, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a']))
    assert np.isclose(cvl.local_score(spbn, 'c', ['a', 'b']),
                      numpy_local_score(pbn.CKDEType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df, 'd', ['b', 'c', 'a']))
Esempio n. 4
0
def test_apply():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    assert gbn.num_arcs() == 0
    assert not gbn.has_arc('a', 'b')

    o = pbn.AddArc("a", "b", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 1
    assert gbn.has_arc('a', 'b')

    o = pbn.FlipArc("a", "b", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 1
    assert not gbn.has_arc('a', 'b')
    assert gbn.has_arc('b', 'a')

    o = pbn.RemoveArc("b", "a", 1)
    o.apply(gbn)
    assert gbn.num_arcs() == 0
    assert not gbn.has_arc('b', 'a')

    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    with pytest.raises(ValueError) as ex:
        o.apply(gbn)
    assert "Wrong factor type" in str(ex.value)

    spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd'])
    assert spbn.num_arcs() == 0

    o = pbn.ChangeNodeType("a", pbn.CKDEType(), 1)
    assert (spbn.node_type('a') == pbn.UnknownFactorType())
    o.apply(spbn)
    assert (spbn.node_type('a') == pbn.CKDEType())

    assert not spbn.has_arc('a', 'b')
    o = pbn.AddArc("a", "b", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 1
    assert spbn.has_arc('a', 'b')

    o = pbn.FlipArc("a", "b", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 1
    assert not spbn.has_arc('a', 'b')
    assert spbn.has_arc('b', 'a')

    o = pbn.RemoveArc("b", "a", 1)
    o.apply(spbn)
    assert spbn.num_arcs() == 0
    assert not spbn.has_arc('b', 'a')
def test_holdout_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    hl = pbn.HoldoutLikelihood(df, 0.2, 0)

    assert np.isclose(
        hl.score(gbn),
        (hl.local_score(gbn, 'a', []) + hl.local_score(gbn, 'b', ['a']) +
         hl.local_score(gbn, 'c', ['a', 'b']) +
         hl.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(hl.score(spbn),
                      (hl.local_score(spbn, 'a') + hl.local_score(spbn, 'b') +
                       hl.local_score(spbn, 'c') + hl.local_score(spbn, 'd')))
Esempio n. 6
0
def test_cvl_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'),
                               ('b', 'd'), ('c', 'd')])

    cv = pbn.CVLikelihood(df, 10, 0)

    assert np.isclose(
        cv.score(gbn),
        (cv.local_score(gbn, 'a', []) + cv.local_score(gbn, 'b', ['a']) +
         cv.local_score(gbn, 'c', ['a', 'b']) +
         cv.local_score(gbn, 'd', ['a', 'b', 'c'])))

    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    assert np.isclose(cv.score(spbn),
                      (cv.local_score(spbn, 'a') + cv.local_score(spbn, 'b') +
                       cv.local_score(spbn, 'c') + cv.local_score(spbn, 'd')))
def test_holdout_local_score_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    hl = pbn.HoldoutLikelihood(df, 0.2, seed)

    assert np.isclose(
        hl.local_score(spbn, 'a', []),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'a', []))
    assert np.isclose(
        hl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'b', ['a']))
    assert np.isclose(
        hl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'c', ['a', 'b']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['a', 'b', 'c']))
    assert np.isclose(
        hl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(),
                          hl.training_data().to_pandas(),
                          hl.test_data().to_pandas(), 'd', ['b', 'c', 'a']))

    assert hl.local_score(spbn, 'a') == hl.local_score(spbn, 'a',
                                                       spbn.parents('a'))
    assert hl.local_score(spbn, 'b') == hl.local_score(spbn, 'b',
                                                       spbn.parents('b'))
    assert hl.local_score(spbn, 'c') == hl.local_score(spbn, 'c',
                                                       spbn.parents('c'))
    assert hl.local_score(spbn, 'd') == hl.local_score(spbn, 'd',
                                                       spbn.parents('d'))
Esempio n. 8
0
def test_find_max():
    spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd'])
    cv = pbn.CVLikelihood(df)
    arcs = pbn.ArcOperatorSet()
    node_type = pbn.ChangeNodeTypeSet()

    arcs.cache_scores(spbn, cv)
    spbn.set_unknown_node_types(df)
    node_type.cache_scores(spbn, cv)

    arcs_max = arcs.find_max(spbn)
    node_max = node_type.find_max(spbn)

    pool = pbn.OperatorPool([arcs, node_type])
    pool.cache_scores(spbn, cv)

    op_combined = pool.find_max(spbn)

    if arcs_max.delta() >= node_max.delta():
        assert op_combined == arcs_max
    else:
        assert op_combined == node_max
Esempio n. 9
0
def test_cvl_local_score_null_spbn():
    spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                 ('b', 'c'), ('b', 'd'), ('c', 'd')],
                                [('a', pbn.CKDEType()), ('c', pbn.CKDEType())])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    cvl = pbn.CVLikelihood(df_null, 10, seed)

    assert np.isclose(cvl.local_score(spbn, 'a', []),
                      numpy_local_score(pbn.CKDEType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score(spbn, 'b', ['a']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score(spbn, 'c', ['a', 'b']),
        numpy_local_score(pbn.CKDEType(), df_null, 'c', ['a', 'b']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score(spbn, 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd',
                          ['b', 'c', 'a']))

    assert cvl.local_score(spbn,
                           'a') == cvl.local_score(spbn, 'a',
                                                   spbn.parents('a'))
    assert cvl.local_score(spbn,
                           'b') == cvl.local_score(spbn, 'b',
                                                   spbn.parents('b'))
    assert cvl.local_score(spbn,
                           'c') == cvl.local_score(spbn, 'c',
                                                   spbn.parents('c'))
    assert cvl.local_score(spbn,
                           'd') == cvl.local_score(spbn, 'd',
                                                   spbn.parents('d'))

    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', []))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']),
        numpy_local_score(pbn.CKDEType(), df_null, 'b', ['a']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c',
                                  ['a', 'b']),
        numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c',
                          ['a', 'b']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['a', 'b', 'c']))
    assert np.isclose(
        cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']),
        numpy_local_score(pbn.CKDEType(), df_null, 'd', ['b', 'c', 'a']))