def test_bic_local_score_null(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'], [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) np.random.seed(0) a_null = np.random.randint(0, SIZE, size=100) b_null = np.random.randint(0, SIZE, size=100) c_null = np.random.randint(0, SIZE, size=100) d_null = np.random.randint(0, SIZE, size=100) df_null = df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan bic = pbn.BIC(df_null) assert np.isclose(bic.local_score(gbn, 'a', []), numpy_local_score(df_null, 'a', [])) assert np.isclose(bic.local_score(gbn, 'b', ['a']), numpy_local_score(df_null, 'b', ['a'])) assert np.isclose(bic.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(df_null, 'c', ['a', 'b'])) assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df_null, 'd', ['a', 'b', 'c'])) assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df_null, 'd', ['b', 'c', 'a'])) assert bic.local_score(gbn, 'a') == bic.local_score(gbn, 'a', gbn.parents('a')) assert bic.local_score(gbn, 'b') == bic.local_score(gbn, 'b', gbn.parents('b')) assert bic.local_score(gbn, 'c') == bic.local_score(gbn, 'c', gbn.parents('c')) assert bic.local_score(gbn, 'd') == bic.local_score(gbn, 'd', gbn.parents('d'))
def test_hc_conditional_estimate(): bic = pbn.BIC(df) column_names = list(df.columns.values) start = pbn.ConditionalGaussianNetwork(column_names[2:], column_names[:2]) nodes = column_names[2:] nodes.insert(1, 'e') interface_nodes = column_names[:2] interface_nodes.insert(1, 'f') start_removed_nodes = pbn.ConditionalGaussianNetwork( nodes, interface_nodes) start_removed_nodes.remove_node('e') start_removed_nodes.remove_interface_node('f') arc_set = pbn.ArcOperatorSet() hc = pbn.GreedyHillClimbing() res = hc.estimate(arc_set, bic, start, max_iters=1, verbose=False) assert res.num_arcs() == 1 added_arc = res.arcs()[0] op_delta = bic.score(res) - bic.score(start) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1, verbose=False) assert res_removed.num_arcs() == 1 added_arc_removed = res_removed.arcs()[0] assert added_arc == added_arc_removed or added_arc == added_arc_removed[:: -1] assert np.isclose(op_delta, bic.score(res_removed) - bic.score(start_removed_nodes)) assert np.isclose( op_delta, bic.local_score(res, added_arc[1], [added_arc[0]]) - bic.local_score(res, added_arc[1], [])) assert np.isclose( op_delta, bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) - bic.local_score(res, added_arc_removed[1], [])) res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01)) assert res.num_arcs() == start.num_arcs() res_removed = hc.estimate(arc_set, bic, start_removed_nodes, epsilon=(op_delta + 0.01)) assert res_removed.num_arcs() == start_removed_nodes.num_arcs() res = hc.estimate(arc_set, bic, start, verbose=False) assert all(map(lambda arc: not res.is_interface(arc[1]), res.arcs())) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False) assert all( map(lambda arc: not res_removed.is_interface(arc[1]), res_removed.arcs()))
def test_bic_score(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) bic = pbn.BIC(df) assert np.isclose(bic.score(gbn), (bic.local_score(gbn, 'a', []) + bic.local_score(gbn, 'b', ['a']) + bic.local_score(gbn, 'c', ['a', 'b']) + bic.local_score(gbn, 'd', ['a', 'b', 'c'])))
def test_nomax(): gbn = pbn.GaussianNetwork(['a', 'b']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet(whitelist=[("a", "b")]) arc_op.cache_scores(gbn, bic) op = arc_op.find_max(gbn) assert op is None
def test_newbn_estimate_validation(): start = NewBN(["a", "b", "c", "d"]) hc = pbn.GreedyHillClimbing() arc = pbn.ArcOperatorSet() bic = pbn.BIC(df) estimated = hc.estimate(arc, bic, start) assert type(start) == type(estimated) assert estimated.extra_data == "extra"
def test_new_specific_bn_type(): sp1 = SpecificNetwork(["a", "b", "c", "d"]) sp2 = SpecificNetwork(["a", "b", "c", "d"], [("a", "b")]) sp3 = SpecificNetwork(["a", "b", "c", "d"]) assert sp1.type() == sp2.type() assert sp1.type() == sp3.type() assert sp2.type() == sp3.type() assert sp1.can_add_arc("a", "b") assert not sp1.can_add_arc("b", "a") assert not sp1.can_add_arc("c", "d") assert sp1.num_arcs() == sp3.num_arcs() == 0 assert sp2.arcs() == [("a", "b")] df = util_test.generate_normal_data_indep(1000) bic = pbn.BIC(df) start = SpecificNetwork(["a", "b", "c", "d"]) hc = pbn.GreedyHillClimbing() estimated = hc.estimate(pbn.ArcOperatorSet(), bic, start) assert estimated.type() == start.type() assert all([s == "a" for s, t in estimated.arcs()]) # ####################### # Conditional BN # ####################### csp1 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"]) csp2 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"], [("a", "b")]) csp3 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"]) assert csp1.type() == csp2.type() assert csp1.type() == csp3.type() assert csp2.type() == csp3.type() assert csp1.can_add_arc("a", "b") assert not csp1.can_add_arc("b", "a") assert not csp1.can_add_arc("c", "d") assert csp1.num_arcs() == csp3.num_arcs() == 0 assert csp2.arcs() == [("a", "b")] cstart = ConditionalSpecificNetwork(["a", "c"], ["b", "d"]) hc = pbn.GreedyHillClimbing() cestimated = hc.estimate(pbn.ArcOperatorSet(), bic, cstart) assert cestimated.type() == cstart.type() assert all([s == "a" for s, t in cestimated.arcs()])
def test_bic_local_score(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd'], [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) bic = pbn.BIC(df) assert np.isclose(bic.local_score(gbn, 'a', []), numpy_local_score(df, 'a', [])) assert np.isclose(bic.local_score(gbn, 'b', ['a']), numpy_local_score(df, 'b', ['a'])) assert np.isclose(bic.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(df, 'c', ['a', 'b'])) assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df, 'd', ['a', 'b', 'c'])) assert np.isclose(bic.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(df, 'd', ['b', 'c', 'a'])) assert bic.local_score(gbn, 'a') == bic.local_score(gbn, 'a', gbn.parents('a')) assert bic.local_score(gbn, 'b') == bic.local_score(gbn, 'b', gbn.parents('b')) assert bic.local_score(gbn, 'c') == bic.local_score(gbn, 'c', gbn.parents('c')) assert bic.local_score(gbn, 'd') == bic.local_score(gbn, 'd', gbn.parents('d'))
def test_check_max_score(): gbn = pbn.GaussianNetwork(['c', 'd']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet() arc_op.cache_scores(gbn, bic) op = arc_op.find_max(gbn) assert np.isclose( op.delta(), (bic.local_score(gbn, 'd', ['c']) - bic.local_score(gbn, 'd'))) # BIC is decomposable so the best operation is the arc in reverse direction. arc_op.set_arc_blacklist([(op.source(), op.target())]) arc_op.cache_scores(gbn, bic) op2 = arc_op.find_max(gbn) assert op.source() == op2.target() assert op.target() == op2.source() assert (type(op) == type(op2)) and (type(op) == pbn.AddArc)
def test_lists(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet() arc_op.set_arc_blacklist([("b", "a")]) arc_op.set_arc_whitelist([("b", "c")]) arc_op.set_max_indegree(3) arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())]) arc_op.cache_scores(gbn, bic) arc_op.set_arc_blacklist([("e", "a")]) with pytest.raises(IndexError) as ex: arc_op.cache_scores(gbn, bic) assert "not present in the graph" in str(ex.value) arc_op.set_arc_whitelist([("e", "a")]) with pytest.raises(IndexError) as ex: arc_op.cache_scores(gbn, bic) assert "not present in the graph" in str(ex.value)
def test_hc_estimate(): bic = pbn.BIC(df) column_names = list(df.columns.values) start = pbn.GaussianNetwork(column_names) # Check algorithm with BN with nodes removed. column_names.insert(1, 'e') column_names.insert(3, 'f') start_removed_nodes = pbn.GaussianNetwork(column_names) start_removed_nodes.remove_node('e') start_removed_nodes.remove_node('f') arc_set = pbn.ArcOperatorSet() hc = pbn.GreedyHillClimbing() res = hc.estimate(arc_set, bic, start, max_iters=1) assert res.num_arcs() == 1 added_arc = res.arcs()[0] op_delta = bic.score(res) - bic.score(start) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1) assert res.num_arcs() == 1 added_arc_removed = res_removed.arcs()[0] assert added_arc == added_arc_removed or added_arc == added_arc_removed[:: -1] assert np.isclose(op_delta, bic.score(res_removed) - bic.score(start_removed_nodes)) # BIC is score equivalent, so if we blacklist the added_arc, its reverse will be added. res = hc.estimate(arc_set, bic, start, max_iters=1, arc_blacklist=[added_arc]) assert res.num_arcs() == 1 reversed_arc = res.arcs()[0][::-1] assert added_arc == reversed_arc res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1, arc_blacklist=[added_arc_removed]) assert res.num_arcs() == 1 reversed_arc_removed = res_removed.arcs()[0][::-1] assert added_arc_removed == reversed_arc_removed assert np.isclose( op_delta, bic.local_score(res, added_arc[1], [added_arc[0]]) - bic.local_score(res, added_arc[1], [])) assert np.isclose( op_delta, bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) - bic.local_score(res, added_arc_removed[1], [])) res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01)) assert res.num_arcs() == start.num_arcs() res_removed = hc.estimate(arc_set, bic, start_removed_nodes, epsilon=(op_delta + 0.01)) assert res_removed.num_arcs() == start_removed_nodes.num_arcs() # Can't compare models because the arcs could be oriented in different direction, # leading to a different search path. Execute the code, just to check no error is given. res = hc.estimate(arc_set, bic, start, verbose=False) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)