Example #1
0
def test_bic_local_score_null():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'], [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])

    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan
    
    bic = pbn.BIC(df_null)
    
    assert np.isclose(bic.local_score(gbn, 'a', []), numpy_local_score(df_null, 'a', []))
    assert np.isclose(bic.local_score(gbn, 'b', ['a']), numpy_local_score(df_null, 'b', ['a']))
    assert np.isclose(bic.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(df_null, 'c', ['a', 'b']))
    assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df_null, 'd', ['a', 'b', 'c']))
    assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df_null, 'd', ['b', 'c', 'a']))

    assert bic.local_score(gbn, 'a') == bic.local_score(gbn, 'a', gbn.parents('a'))
    assert bic.local_score(gbn, 'b') == bic.local_score(gbn, 'b', gbn.parents('b'))
    assert bic.local_score(gbn, 'c') == bic.local_score(gbn, 'c', gbn.parents('c'))
    assert bic.local_score(gbn, 'd') == bic.local_score(gbn, 'd', gbn.parents('d'))
Example #2
0
def test_hc_conditional_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)

    start = pbn.ConditionalGaussianNetwork(column_names[2:], column_names[:2])

    nodes = column_names[2:]
    nodes.insert(1, 'e')
    interface_nodes = column_names[:2]
    interface_nodes.insert(1, 'f')
    start_removed_nodes = pbn.ConditionalGaussianNetwork(
        nodes, interface_nodes)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_interface_node('f')

    arc_set = pbn.ArcOperatorSet()
    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1, verbose=False)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              max_iters=1,
                              verbose=False)
    assert res_removed.num_arcs() == 1
    added_arc_removed = res_removed.arcs()[0]
    assert added_arc == added_arc_removed or added_arc == added_arc_removed[::
                                                                            -1]
    assert np.isclose(op_delta,
                      bic.score(res_removed) - bic.score(start_removed_nodes))

    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc[1], [added_arc[0]]) -
        bic.local_score(res, added_arc[1], []))
    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) -
        bic.local_score(res, added_arc_removed[1], []))

    res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01))
    assert res.num_arcs() == start.num_arcs()
    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              epsilon=(op_delta + 0.01))
    assert res_removed.num_arcs() == start_removed_nodes.num_arcs()

    res = hc.estimate(arc_set, bic, start, verbose=False)
    assert all(map(lambda arc: not res.is_interface(arc[1]), res.arcs()))
    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)
    assert all(
        map(lambda arc: not res_removed.is_interface(arc[1]),
            res_removed.arcs()))
Example #3
0
def test_bic_score():
    gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])
    
    bic = pbn.BIC(df)
    
    assert np.isclose(bic.score(gbn), (bic.local_score(gbn, 'a', []) + 
                              bic.local_score(gbn, 'b', ['a']) + 
                              bic.local_score(gbn, 'c', ['a', 'b']) +
                              bic.local_score(gbn, 'd', ['a', 'b', 'c'])))
Example #4
0
def test_nomax():
    gbn = pbn.GaussianNetwork(['a', 'b'])

    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet(whitelist=[("a", "b")])
    arc_op.cache_scores(gbn, bic)

    op = arc_op.find_max(gbn)

    assert op is None
Example #5
0
def test_newbn_estimate_validation():
    start = NewBN(["a", "b", "c", "d"])
    hc = pbn.GreedyHillClimbing()
    arc = pbn.ArcOperatorSet()
    bic = pbn.BIC(df)

    estimated = hc.estimate(arc, bic, start)

    assert type(start) == type(estimated)
    assert estimated.extra_data == "extra"
Example #6
0
def test_new_specific_bn_type():
    sp1 = SpecificNetwork(["a", "b", "c", "d"])
    sp2 = SpecificNetwork(["a", "b", "c", "d"], [("a", "b")])
    sp3 = SpecificNetwork(["a", "b", "c", "d"])

    assert sp1.type() == sp2.type()
    assert sp1.type() == sp3.type()
    assert sp2.type() == sp3.type()

    assert sp1.can_add_arc("a", "b")
    assert not sp1.can_add_arc("b", "a")
    assert not sp1.can_add_arc("c", "d")

    assert sp1.num_arcs() == sp3.num_arcs() == 0
    assert sp2.arcs() == [("a", "b")]

    df = util_test.generate_normal_data_indep(1000)
    bic = pbn.BIC(df)

    start = SpecificNetwork(["a", "b", "c", "d"])

    hc = pbn.GreedyHillClimbing()
    estimated = hc.estimate(pbn.ArcOperatorSet(), bic, start)
    assert estimated.type() == start.type()
    assert all([s == "a" for s, t in estimated.arcs()])

    # #######################
    # Conditional BN
    # #######################

    csp1 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"])
    csp2 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"], [("a", "b")])
    csp3 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"])

    assert csp1.type() == csp2.type()
    assert csp1.type() == csp3.type()
    assert csp2.type() == csp3.type()

    assert csp1.can_add_arc("a", "b")
    assert not csp1.can_add_arc("b", "a")
    assert not csp1.can_add_arc("c", "d")

    assert csp1.num_arcs() == csp3.num_arcs() == 0
    assert csp2.arcs() == [("a", "b")]

    cstart = ConditionalSpecificNetwork(["a", "c"], ["b", "d"])

    hc = pbn.GreedyHillClimbing()
    cestimated = hc.estimate(pbn.ArcOperatorSet(), bic, cstart)
    assert cestimated.type() == cstart.type()
    assert all([s == "a" for s, t in cestimated.arcs()])
Example #7
0
def test_bic_local_score():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'], [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])
    
    bic = pbn.BIC(df)
    
    assert np.isclose(bic.local_score(gbn, 'a', []), numpy_local_score(df, 'a', []))
    assert np.isclose(bic.local_score(gbn, 'b', ['a']), numpy_local_score(df, 'b', ['a']))
    assert np.isclose(bic.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(df, 'c', ['a', 'b']))
    assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df, 'd', ['a', 'b', 'c']))
    assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df, 'd', ['b', 'c', 'a']))

    assert bic.local_score(gbn, 'a') == bic.local_score(gbn, 'a', gbn.parents('a'))
    assert bic.local_score(gbn, 'b') == bic.local_score(gbn, 'b', gbn.parents('b'))
    assert bic.local_score(gbn, 'c') == bic.local_score(gbn, 'c', gbn.parents('c'))
    assert bic.local_score(gbn, 'd') == bic.local_score(gbn, 'd', gbn.parents('d'))
Example #8
0
def test_check_max_score():
    gbn = pbn.GaussianNetwork(['c', 'd'])

    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.cache_scores(gbn, bic)
    op = arc_op.find_max(gbn)

    assert np.isclose(
        op.delta(),
        (bic.local_score(gbn, 'd', ['c']) - bic.local_score(gbn, 'd')))

    # BIC is decomposable so the best operation is the arc in reverse direction.
    arc_op.set_arc_blacklist([(op.source(), op.target())])
    arc_op.cache_scores(gbn, bic)

    op2 = arc_op.find_max(gbn)

    assert op.source() == op2.target()
    assert op.target() == op2.source()
    assert (type(op) == type(op2)) and (type(op) == pbn.AddArc)
Example #9
0
def test_lists():
    gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'])
    bic = pbn.BIC(df)
    arc_op = pbn.ArcOperatorSet()

    arc_op.set_arc_blacklist([("b", "a")])
    arc_op.set_arc_whitelist([("b", "c")])
    arc_op.set_max_indegree(3)
    arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())])

    arc_op.cache_scores(gbn, bic)

    arc_op.set_arc_blacklist([("e", "a")])

    with pytest.raises(IndexError) as ex:
        arc_op.cache_scores(gbn, bic)
    assert "not present in the graph" in str(ex.value)

    arc_op.set_arc_whitelist([("e", "a")])

    with pytest.raises(IndexError) as ex:
        arc_op.cache_scores(gbn, bic)
    assert "not present in the graph" in str(ex.value)
Example #10
0
def test_hc_estimate():
    bic = pbn.BIC(df)
    column_names = list(df.columns.values)
    start = pbn.GaussianNetwork(column_names)

    # Check algorithm with BN with nodes removed.
    column_names.insert(1, 'e')
    column_names.insert(3, 'f')
    start_removed_nodes = pbn.GaussianNetwork(column_names)
    start_removed_nodes.remove_node('e')
    start_removed_nodes.remove_node('f')

    arc_set = pbn.ArcOperatorSet()

    hc = pbn.GreedyHillClimbing()

    res = hc.estimate(arc_set, bic, start, max_iters=1)
    assert res.num_arcs() == 1
    added_arc = res.arcs()[0]
    op_delta = bic.score(res) - bic.score(start)

    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1)
    assert res.num_arcs() == 1
    added_arc_removed = res_removed.arcs()[0]
    assert added_arc == added_arc_removed or added_arc == added_arc_removed[::
                                                                            -1]
    assert np.isclose(op_delta,
                      bic.score(res_removed) - bic.score(start_removed_nodes))

    # BIC is score equivalent, so if we blacklist the added_arc, its reverse will be added.
    res = hc.estimate(arc_set,
                      bic,
                      start,
                      max_iters=1,
                      arc_blacklist=[added_arc])
    assert res.num_arcs() == 1
    reversed_arc = res.arcs()[0][::-1]
    assert added_arc == reversed_arc

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              max_iters=1,
                              arc_blacklist=[added_arc_removed])
    assert res.num_arcs() == 1
    reversed_arc_removed = res_removed.arcs()[0][::-1]
    assert added_arc_removed == reversed_arc_removed

    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc[1], [added_arc[0]]) -
        bic.local_score(res, added_arc[1], []))
    assert np.isclose(
        op_delta,
        bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) -
        bic.local_score(res, added_arc_removed[1], []))

    res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01))
    assert res.num_arcs() == start.num_arcs()

    res_removed = hc.estimate(arc_set,
                              bic,
                              start_removed_nodes,
                              epsilon=(op_delta + 0.01))
    assert res_removed.num_arcs() == start_removed_nodes.num_arcs()

    # Can't compare models because the arcs could be oriented in different direction,
    # leading to a different search path. Execute the code, just to check no error is given.
    res = hc.estimate(arc_set, bic, start, verbose=False)
    res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)