def test_new_specific_bn_type(): sp1 = SpecificNetwork(["a", "b", "c", "d"]) sp2 = SpecificNetwork(["a", "b", "c", "d"], [("a", "b")]) sp3 = SpecificNetwork(["a", "b", "c", "d"]) assert sp1.type() == sp2.type() assert sp1.type() == sp3.type() assert sp2.type() == sp3.type() assert sp1.can_add_arc("a", "b") assert not sp1.can_add_arc("b", "a") assert not sp1.can_add_arc("c", "d") assert sp1.num_arcs() == sp3.num_arcs() == 0 assert sp2.arcs() == [("a", "b")] df = util_test.generate_normal_data_indep(1000) bic = pbn.BIC(df) start = SpecificNetwork(["a", "b", "c", "d"]) hc = pbn.GreedyHillClimbing() estimated = hc.estimate(pbn.ArcOperatorSet(), bic, start) assert estimated.type() == start.type() assert all([s == "a" for s, t in estimated.arcs()]) # ####################### # Conditional BN # ####################### csp1 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"]) csp2 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"], [("a", "b")]) csp3 = ConditionalSpecificNetwork(["a", "b"], ["c", "d"]) assert csp1.type() == csp2.type() assert csp1.type() == csp3.type() assert csp2.type() == csp3.type() assert csp1.can_add_arc("a", "b") assert not csp1.can_add_arc("b", "a") assert not csp1.can_add_arc("c", "d") assert csp1.num_arcs() == csp3.num_arcs() == 0 assert csp2.arcs() == [("a", "b")] cstart = ConditionalSpecificNetwork(["a", "c"], ["b", "d"]) hc = pbn.GreedyHillClimbing() cestimated = hc.estimate(pbn.ArcOperatorSet(), bic, cstart) assert cestimated.type() == cstart.type() assert all([s == "a" for s, t in cestimated.arcs()])
def test_create(): arcs = pbn.ArcOperatorSet() node_type = pbn.ChangeNodeTypeSet() pool = pbn.OperatorPool([arcs, node_type]) with pytest.raises(ValueError) as ex: pool = pbn.OperatorPool([]) assert "cannot be empty" in str(ex.value)
def test_hc_conditional_estimate(): bic = pbn.BIC(df) column_names = list(df.columns.values) start = pbn.ConditionalGaussianNetwork(column_names[2:], column_names[:2]) nodes = column_names[2:] nodes.insert(1, 'e') interface_nodes = column_names[:2] interface_nodes.insert(1, 'f') start_removed_nodes = pbn.ConditionalGaussianNetwork( nodes, interface_nodes) start_removed_nodes.remove_node('e') start_removed_nodes.remove_interface_node('f') arc_set = pbn.ArcOperatorSet() hc = pbn.GreedyHillClimbing() res = hc.estimate(arc_set, bic, start, max_iters=1, verbose=False) assert res.num_arcs() == 1 added_arc = res.arcs()[0] op_delta = bic.score(res) - bic.score(start) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1, verbose=False) assert res_removed.num_arcs() == 1 added_arc_removed = res_removed.arcs()[0] assert added_arc == added_arc_removed or added_arc == added_arc_removed[:: -1] assert np.isclose(op_delta, bic.score(res_removed) - bic.score(start_removed_nodes)) assert np.isclose( op_delta, bic.local_score(res, added_arc[1], [added_arc[0]]) - bic.local_score(res, added_arc[1], [])) assert np.isclose( op_delta, bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) - bic.local_score(res, added_arc_removed[1], [])) res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01)) assert res.num_arcs() == start.num_arcs() res_removed = hc.estimate(arc_set, bic, start_removed_nodes, epsilon=(op_delta + 0.01)) assert res_removed.num_arcs() == start_removed_nodes.num_arcs() res = hc.estimate(arc_set, bic, start, verbose=False) assert all(map(lambda arc: not res.is_interface(arc[1]), res.arcs())) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False) assert all( map(lambda arc: not res_removed.is_interface(arc[1]), res_removed.arcs()))
def test_nomax(): gbn = pbn.GaussianNetwork(['a', 'b']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet(whitelist=[("a", "b")]) arc_op.cache_scores(gbn, bic) op = arc_op.find_max(gbn) assert op is None
def test_newbn_estimate_validation(): start = NewBN(["a", "b", "c", "d"]) hc = pbn.GreedyHillClimbing() arc = pbn.ArcOperatorSet() bic = pbn.BIC(df) estimated = hc.estimate(arc, bic, start) assert type(start) == type(estimated) assert estimated.extra_data == "extra"
def test_check_max_score(): gbn = pbn.GaussianNetwork(['c', 'd']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet() arc_op.cache_scores(gbn, bic) op = arc_op.find_max(gbn) assert np.isclose( op.delta(), (bic.local_score(gbn, 'd', ['c']) - bic.local_score(gbn, 'd'))) # BIC is decomposable so the best operation is the arc in reverse direction. arc_op.set_arc_blacklist([(op.source(), op.target())]) arc_op.cache_scores(gbn, bic) op2 = arc_op.find_max(gbn) assert op.source() == op2.target() assert op.target() == op2.source() assert (type(op) == type(op2)) and (type(op) == pbn.AddArc)
def test_find_max(): spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd']) cv = pbn.CVLikelihood(df) arcs = pbn.ArcOperatorSet() node_type = pbn.ChangeNodeTypeSet() arcs.cache_scores(spbn, cv) spbn.set_unknown_node_types(df) node_type.cache_scores(spbn, cv) arcs_max = arcs.find_max(spbn) node_max = node_type.find_max(spbn) pool = pbn.OperatorPool([arcs, node_type]) pool.cache_scores(spbn, cv) op_combined = pool.find_max(spbn) if arcs_max.delta() >= node_max.delta(): assert op_combined == arcs_max else: assert op_combined == node_max
def test_lists(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd']) bic = pbn.BIC(df) arc_op = pbn.ArcOperatorSet() arc_op.set_arc_blacklist([("b", "a")]) arc_op.set_arc_whitelist([("b", "c")]) arc_op.set_max_indegree(3) arc_op.set_type_whitelist([("a", pbn.LinearGaussianCPDType())]) arc_op.cache_scores(gbn, bic) arc_op.set_arc_blacklist([("e", "a")]) with pytest.raises(IndexError) as ex: arc_op.cache_scores(gbn, bic) assert "not present in the graph" in str(ex.value) arc_op.set_arc_whitelist([("e", "a")]) with pytest.raises(IndexError) as ex: arc_op.cache_scores(gbn, bic) assert "not present in the graph" in str(ex.value)
def test_hc_estimate(): bic = pbn.BIC(df) column_names = list(df.columns.values) start = pbn.GaussianNetwork(column_names) # Check algorithm with BN with nodes removed. column_names.insert(1, 'e') column_names.insert(3, 'f') start_removed_nodes = pbn.GaussianNetwork(column_names) start_removed_nodes.remove_node('e') start_removed_nodes.remove_node('f') arc_set = pbn.ArcOperatorSet() hc = pbn.GreedyHillClimbing() res = hc.estimate(arc_set, bic, start, max_iters=1) assert res.num_arcs() == 1 added_arc = res.arcs()[0] op_delta = bic.score(res) - bic.score(start) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1) assert res.num_arcs() == 1 added_arc_removed = res_removed.arcs()[0] assert added_arc == added_arc_removed or added_arc == added_arc_removed[:: -1] assert np.isclose(op_delta, bic.score(res_removed) - bic.score(start_removed_nodes)) # BIC is score equivalent, so if we blacklist the added_arc, its reverse will be added. res = hc.estimate(arc_set, bic, start, max_iters=1, arc_blacklist=[added_arc]) assert res.num_arcs() == 1 reversed_arc = res.arcs()[0][::-1] assert added_arc == reversed_arc res_removed = hc.estimate(arc_set, bic, start_removed_nodes, max_iters=1, arc_blacklist=[added_arc_removed]) assert res.num_arcs() == 1 reversed_arc_removed = res_removed.arcs()[0][::-1] assert added_arc_removed == reversed_arc_removed assert np.isclose( op_delta, bic.local_score(res, added_arc[1], [added_arc[0]]) - bic.local_score(res, added_arc[1], [])) assert np.isclose( op_delta, bic.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) - bic.local_score(res, added_arc_removed[1], [])) res = hc.estimate(arc_set, bic, start, epsilon=(op_delta + 0.01)) assert res.num_arcs() == start.num_arcs() res_removed = hc.estimate(arc_set, bic, start_removed_nodes, epsilon=(op_delta + 0.01)) assert res_removed.num_arcs() == start_removed_nodes.num_arcs() # Can't compare models because the arcs could be oriented in different direction, # leading to a different search path. Execute the code, just to check no error is given. res = hc.estimate(arc_set, bic, start, verbose=False) res_removed = hc.estimate(arc_set, bic, start_removed_nodes, verbose=False)
def test_hc_estimate_validation(): column_names = list(df.columns.values) start = pbn.GaussianNetwork(column_names) column_names.insert(1, 'e') column_names.insert(4, 'f') start_removed_nodes = pbn.GaussianNetwork(column_names) start_removed_nodes.remove_node('e') start_removed_nodes.remove_node('f') vl = pbn.ValidatedLikelihood(df) arc_set = pbn.ArcOperatorSet() hc = pbn.GreedyHillClimbing() res = hc.estimate(arc_set, vl, start, max_iters=1) assert res.num_arcs() == 1 added_arc = res.arcs()[0] op_delta = vl.cv_lik.score(res) - vl.cv_lik.score(start) res_removed = hc.estimate(arc_set, vl, start_removed_nodes, max_iters=1) assert res_removed.num_arcs() == 1 added_arc_removed = res_removed.arcs()[0] assert added_arc == added_arc_removed or added_arc == added_arc_removed[:: -1] assert np.isclose( op_delta, vl.cv_lik.score(res_removed) - vl.cv_lik.score(start_removed_nodes)) assert np.isclose( op_delta, vl.cv_lik.local_score(res, added_arc[1], [added_arc[0]]) - vl.cv_lik.local_score(res, added_arc[1], [])) assert np.isclose( op_delta, vl.cv_lik.local_score(res, added_arc_removed[1], [added_arc_removed[0]]) - vl.cv_lik.local_score(res, added_arc_removed[1], [])) # CV is score equivalent for GBNs, so if we blacklist the added_edge, its reverse will be added. res = hc.estimate(arc_set, vl, start, max_iters=1, arc_blacklist=[added_arc]) assert res.num_arcs() == 1 reversed_arc = res.arcs()[0][::-1] assert added_arc == reversed_arc res_removed = hc.estimate(arc_set, vl, start_removed_nodes, max_iters=1, arc_blacklist=[added_arc_removed]) assert res_removed.num_arcs() == 1 reversed_arc_removed = res_removed.arcs()[0][::-1] assert reversed_arc == reversed_arc_removed res = hc.estimate(arc_set, vl, start, epsilon=(op_delta + 0.01)) assert res.num_arcs() == start.num_arcs() res_removed = hc.estimate(arc_set, vl, start_removed_nodes, epsilon=(op_delta + 0.01)) assert res_removed.num_arcs() == start_removed_nodes.num_arcs() # Can't compare models because the arcs could be oriented in different direction, # leading to a different search path. Execute the code, just to check no error is given. res = hc.estimate(arc_set, vl, start, verbose=False) res_removed = hc.estimate(arc_set, vl, start_removed_nodes, verbose=False)