def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR): print('Making bayes net') graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p' if os.path.isfile(graph_file) and load == True: print('Loading saved graph from file...') G = pickle.load(open(graph_file, 'rb')) G.check_model() else: print('loading data...') training_labels, go_dict = load_label_data() if subtree: labels_list = _subtree_labels() print(labels_list) else: labels_list = go_dict.keys() print('adding nodes and edges...') G = BayesianModel() G.add_edges_from([(label, label + '_hat') for label in labels_list]) obo_graph = obonet.read_obo(OBODB_FILE) for label in labels_list: children = [ c for c in networkx.ancestors(obo_graph, label) if c in labels_list ] for child in children: G.add_edge(child, label) predicted_cpds = get_model_cpds(labels_list=labels_list, modelsdir=MODEL_CPDS_DIR) for cpd in predicted_cpds: G.add_cpds(cpd) true_label_cpds = get_true_label_cpds(training_labels, go_dict, labels_list=labels_list) for cpd in true_label_cpds: G.add_cpds(cpd) remove_list = [] for node in G.nodes(): if G.get_cpds(node) == None: remove_list.append(node) # remove_list.append(node+'_hat') for node in remove_list: if node in G: G.remove_node(node) G.check_model() pickle.dump(G, open(graph_file, 'wb')) return G
class Network_handler: ''' Handles creation and usage of the probabilistic network over CERN's data. Can deal only with a SINGLE file-priority combination. Note that the methods of this class have numbers and must be called in order. ''' def __init__(self, pnh, gh): ''' Constructor ''' extractor = pnh.get_data_extractor() self.best_model = BayesianModel() self.training_instances = "" self.device_considered = pnh.get_device() self.priority_considered = pnh.get_priority() self.markov = MarkovModel() self.general_handler = gh self.variables_names = extractor.get_variable_names() self.rankedDevices = extractor.get_ranked_devices() self.data = pnh.get_dataframe() self.file_writer = pnh.get_file_writer() self.file_suffix = pnh.get_file_suffix() def learn_structure(self, method, scoring_method, log=True): ''' (4) Method that builds the structure of the data ----------------- Parameters: method : The technique used to search for the structure -> scoring_approx - To use an approximated search with scoring method -> scoring_exhaustive - To use an exhaustive search with scoring method -> constraint - To use the constraint based technique scoring_method : K2, bic, bdeu log - "True" if you want to print debug information in the console ''' #Select the scoring method for the local search of the structure if scoring_method == "K2": scores = K2Score(self.data) elif scoring_method == "bic": scores = BicScore(self.data) elif scoring_method == "bdeu": scores = BdeuScore(self.data) #Select the actual method if method == "scoring_approx": est = HillClimbSearch(self.data, scores) elif method == "scoring_exhaustive": est = ExhaustiveSearch(self.data, scores) elif method == "constraint": est = ConstraintBasedEstimator(self.data) self.best_model = est.estimate() self.eliminate_isolated_nodes( ) # REMOVE all nodes not connected to anything else for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) self.log("Method used for structural learning: " + method, log) #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log) self.log("Search terminated", log) def estimate_parameters(self, log=True): ''' (5) Estimates the parameters of the found network ''' estimator = BayesianEstimator(self.best_model, self.data) self.file_writer.write_txt("Number of nodes: " + str(len(self.variables_names))) self.file_writer.write_txt("Complete list: " + str(self.variables_names)) for node in self.best_model.nodes(): cpd = estimator.estimate_cpd(node, prior_type='K2') self.best_model.add_cpds(cpd) self.log(cpd, log) self.file_writer.write_txt(cpd.__str__()) def inference(self, variables, evidence, mode="auto", log=True): ''' (6) Computes the inference over some variables of the network (given some evidence) ''' inference = VariableElimination(self.best_model) #inference = BeliefPropagation(self.markov) #inference = Mplp(self.best_model) header = "------------------- INFERENCE ------------------------" self.log(header, log) self.file_writer.write_txt(header, newline=True) self.file_writer.write_txt("(With parents all set to value 1)") if mode == "auto": self.log(" (with parents all set to value 1)", log) for node in self.best_model.nodes(): variables = [node] parents = self.best_model.get_parents(node) evidence = dict() for p in parents: evidence[p] = 1 phi_query = inference.query(variables, evidence) for key in phi_query: self.file_writer.write_txt(str(phi_query[key])) self.log(phi_query[key], log) elif mode == "manual": phi_query = inference.query(variables, evidence) for key in phi_query: self.log(phi_query[key], log) ''' map_query = inference.map_query(variables, evidence) print(map_query) ''' def draw_network(self, label_choice, location_choice, location, log): ''' (7) Draws the bayesian network. ---- location_choice = True iff we want to show the location of devices in the graph. label_choice = "single" if we want to show single label, "double" for double label of arcs location = 0,1,2 depending by the location (H0, H1, H2) ''' bn_graph = gv.Digraph(format="png") # Extract color based on the building if location_choice: devices = self.variables_names device_location = dict() device_locationH1 = dict() #For H0 for d in devices: allDevicesLocations = self.general_handler.get_device_locations( ) device_location[d] = allDevicesLocations[d][0] device_locationH1[d] = allDevicesLocations[d][1] #temp for H1 location_color = self.assign_color(device_location) location_colorH1 = self.assign_color(device_locationH1) ''' # Logging and saving info self.log(device_location, log) self.log(location_color, log) self.file_writer.write_txt(device_location, newline = True) self.file_writer.write_txt(location_color, newline = True) ''' # Creating the subgraphs, one for each location: loc_subgraphs = dict() for loc in location_color: name = "cluster_" + loc loc_subgraphs[loc] = gv.Digraph(name) loc_subgraphs[loc].graph_attr[ 'label'] = loc #Label with name to be visualized in the image # Create nodes for node in self.best_model.nodes(): if location_choice: locationH0 = device_location[node] locationH1 = device_locationH1[node] loc_subgraphs[locationH0].node( node, style='filled', fillcolor=location_colorH1[locationH1] ) #add the node to the right subgraph #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0 else: bn_graph.node(node) # Add all subgraphs in the final graph: if location_choice: for loc in loc_subgraphs: bn_graph.subgraph(loc_subgraphs[loc]) # Create and color edges for edge in self.best_model.edges_iter(): inference = VariableElimination(self.best_model) label = "" # Inference for first label and color of edges variables = [edge[1]] evidence = dict() evidence[edge[0]] = 1 phi_query = inference.query(variables, evidence) value = phi_query[edge[1]].values[1] value = round(value, 2) if label_choice == "single": label = str(value) if label_choice == "double": # Inference for second label variables = [edge[0]] evidence = dict() evidence[edge[1]] = 1 phi_query = inference.query(variables, evidence) value_inv = phi_query[edge[0]].values[1] value_inv = round(value_inv, 2) label = str(value) + "|" + str(value_inv) if value >= 0.75: bn_graph.edge(edge[0], edge[1], color="red", label=label) else: bn_graph.edge(edge[0], edge[1], color="black", label=label) # Save the .png graph if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix else: if location_choice: locat = "_H0H1" else: locat = "" imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat bn_graph.render(imgPath) os.remove(imgPath) #remove the source code generated by graphviz def data_info(self, selection, log): ''' (9) Prints or logs some extra information about the data or the network ''' # 1 - DEVICE FREQUENCY AND OCCURRENCES if 1 in selection: self.file_writer.write_txt( "Device ranking (max 20 devices are visualized)", newline=True) i = 1 for dr in self.rankedDevices: self.file_writer.write_txt(dr[0] + " \t" + str(dr[1]) + "\t" + str(dr[2])) i = i + 1 if i == 20: break # 2 - EDGES OF THE NETWORK if 2 in selection: self.file_writer.write_txt("Edges of the network:", newline=True) for edge in self.best_model.edges_iter(): self.file_writer.write_txt(str(edge)) # 3 - MARKOV NETWORK if 3 in selection: self.markov = self.best_model.to_markov_model( ) #create the markov model from the BN nice_graph = pydot.Dot(graph_type='graph') for node in self.markov.nodes(): node_pydot = pydot.Node(node) nice_graph.add_node(node_pydot) for edge in self.markov.edges(): edge_pydot = pydot.Edge(edge[0], edge[1], color="black") nice_graph.add_edge(edge_pydot) nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-markov.png') self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True) for factor in self.markov.factors: self.log("MARKOV---------------------------------------", log) self.log(factor, log) self.file_writer.write_txt(factor.__str__()) # 4 - INFERENCE NETWORK if 4 in selection: nice_graph = pydot.Dot(graph_type='digraph') nodes = self.best_model.nodes() inference = VariableElimination(self.best_model) for node1 in nodes: pos = nodes.index(node1) + 1 for i in range(pos, len(nodes)): node2 = nodes[i] variables = [node2] evidence = dict() evidence[node1] = 1 phi_query = inference.query(variables, evidence) prob1 = phi_query[node2].values[ 1] #probability of direct activation (inference from node1=1 to node2) variables = [node1] evidence = dict() evidence[node2] = 1 phi_query = inference.query(variables, evidence) prob2 = phi_query[node1].values[ 1] #probability of inverse activation (inference from node2=1 to node1) prob1 = round(prob1, 2) prob2 = round(prob2, 2) if prob1 >= 0.75 and ( prob1 - prob2 ) <= 0.40: #add direct arc from node1 to node2 ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="red", label=double_label)) elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="red", label=double_label)) elif prob1 >= 0.75 and prob2 >= 0.75: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="orange", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="orange", label=double_label)) elif prob1 >= 0.55 and prob2 >= 0.55: ls = [node1, node2] self.fix_node_presence(ls, nice_graph) if prob1 >= prob2: double_label = str(prob1) + "|" + str(prob2) nice_graph.add_edge( pydot.Edge(node1, node2, color="black", label=double_label)) else: double_label = str(prob2) + "|" + str(prob1) nice_graph.add_edge( pydot.Edge(node2, node1, color="black", label=double_label)) if self.device_considered == "CUSTOM": imgPath = '../../output/CUSTOM' + self.file_suffix nice_graph.write_png(imgPath + "-inference_network.png") else: nice_graph.write_png('../../output/' + self.device_considered + '_' + self.priority_considered + '-inference_network.png') def fix_node_presence(self, nodes, pydot_graph): ''' Adds the list of nodes to the graph, if they are not already present ''' for node in nodes: if node not in pydot_graph.get_nodes(): pydot_graph.add_node(pydot.Node(node)) def eliminate_isolated_nodes(self): ''' If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph ''' for nodeX in self.best_model.nodes(): tup = [item for item in self.best_model.edges() if nodeX in item] if not tup: self.file_writer.write_txt( "Node " + str(nodeX) + " has no edges: it has been eliminated.") self.best_model.remove_node(nodeX) if self.best_model.nodes() == []: raise DataError("No nodes left in this file-priority combination.") def assign_color(self, device_location): ''' Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS) ''' system_color = [ 'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey', 'Light Red', 'Light Blue', 'Light Green' ] location_color = dict() # key = location; value = color for dev, loc in device_location.items(): if loc not in location_color: color = system_color[0] system_color.remove(color) location_color[loc] = color return location_color def log(self, text, log): ''' Prints the text in the console, if the "log" condition is True. ''' if log: print(text)
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128 ] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"), ("b", "c")]) self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")]) diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD( "grade", 3, values=[ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"), ("i", "l")]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")] or (edge[1], edge[0]) in [("a", "b"), ("a", "d"), ("b", "c"), ("d", "b"), ("e", "d")]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"), ("a", "b")]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ["a", "b", "c", "d", "e"]) for edge in moral_graph.edges(): self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")] or (edge[1], edge[0]) in [("a", "b"), ("c", "b"), ("d", "a"), ("d", "b"), ("d", "e")]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of("g") ancenstors2 = self.G2._get_ancestors_of("d") ancenstors3 = self.G2._get_ancestors_of(["i", "l"]) self.assertEqual(ancenstors1, {"d", "i", "g"}) self.assertEqual(ancenstors2, {"d"}) self.assertEqual(ancenstors3, {"g", "i", "l", "d"}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, "h") def test_get_cardinality(self): self.assertDictEqual(self.G1.get_cardinality(), { "diff": 2, "intel": 3, "grade": 3 }) def test_get_cardinality_with_node(self): self.assertEqual(self.G1.get_cardinality("diff"), 2) self.assertEqual(self.G1.get_cardinality("intel"), 3) self.assertEqual(self.G1.get_cardinality("grade"), 3) def test_local_independencies(self): self.assertEqual(self.G.local_independencies("a"), Independencies(["a", ["b", "c"]])) self.assertEqual( self.G.local_independencies("c"), Independencies(["c", ["a", "d", "e"], "b"]), ) self.assertEqual(self.G.local_independencies("d"), Independencies(["d", "c", ["b", "a"]])) self.assertEqual( self.G.local_independencies("e"), Independencies(["e", ["c", "b", "a"], "d"]), ) self.assertEqual(self.G.local_independencies("b"), Independencies(["b", "a"])) self.assertEqual(self.G1.local_independencies("grade"), Independencies()) def test_get_independencies(self): chain = BayesianModel([("X", "Y"), ("Y", "Z")]) self.assertEqual(chain.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) fork = BayesianModel([("Y", "X"), ("Y", "Z")]) self.assertEqual(fork.get_independencies(), Independencies(("X", "Z", "Y"), ("Z", "X", "Y"))) collider = BayesianModel([("X", "Y"), ("Z", "Y")]) self.assertEqual(collider.get_independencies(), Independencies(("X", "Z"), ("Z", "X"))) def test_is_imap(self): val = [ 0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128, ] JPD = JointProbabilityDistribution(["diff", "intel", "grade"], [2, 3, 3], val) fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_markov_blanet(self): G = DAG([ ("x", "y"), ("z", "y"), ("y", "w"), ("y", "v"), ("u", "w"), ("s", "v"), ("w", "t"), ("w", "m"), ("v", "n"), ("v", "q"), ]) self.assertEqual(set(G.get_markov_blanket("y")), set(["s", "w", "x", "u", "z", "v"])) def test_get_immoralities(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")}) G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")]) self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")}) G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"), ("w", "x")]) self.assertEqual(G2.get_immoralities(), {("w", "z")}) def test_is_iequivalent(self): G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")]) G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds("diff")), id(model_copy.get_cpds("diff"))) self.G1.remove_cpds("diff") diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds("diff"), model_copy.get_cpds("diff")) self.G1.remove_node("intel") self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node("diff") self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") def test_remove_nodes_from(self): self.G1.remove_nodes_from(["diff", "grade"]) self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"])) self.assertRaises(ValueError, self.G1.get_cpds, "diff") self.assertRaises(ValueError, self.G1.get_cpds, "grade") def tearDown(self): del self.G del self.G1
class TestBayesianModelMethods(unittest.TestCase): def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')]) def test_moral_graph(self): moral_graph = self.G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')]) def test_moral_graph_with_edge_present_over_parents(self): G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')]) moral_graph = G.moralize() self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e']) for edge in moral_graph.edges(): self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')]) def test_get_ancestors_of_success(self): ancenstors1 = self.G2._get_ancestors_of('g') ancenstors2 = self.G2._get_ancestors_of('d') ancenstors3 = self.G2._get_ancestors_of(['i', 'l']) self.assertEqual(ancenstors1, {'d', 'i', 'g'}) self.assertEqual(ancenstors2, {'d'}) self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'}) def test_get_ancestors_of_failure(self): self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h') def test_local_independencies(self): self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']])) self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b'])) self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']])) self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd'])) self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a'])) self.assertEqual(self.G1.local_independencies('grade'), Independencies()) def test_get_independencies(self): chain = BayesianModel([('X', 'Y'), ('Y', 'Z')]) self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) fork = BayesianModel([('Y', 'X'), ('Y', 'Z')]) self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y'))) collider = BayesianModel([('X', 'Y'), ('Z', 'Y')]) self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X'))) def test_is_imap(self): val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128] JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val) fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val) self.assertTrue(self.G1.is_imap(JPD)) self.assertRaises(TypeError, self.G1.is_imap, fac) def test_get_immoralities(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')}) G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')]) self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')}) G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')]) self.assertEqual(G2.get_immoralities(), {('w', 'z')}) def test_is_iequivalent(self): G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')]) self.assertRaises(TypeError, G.is_iequivalent, MarkovModel()) G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')]) G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')]) self.assertTrue(G1.is_iequivalent(G2)) G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')]) self.assertFalse(G3.is_iequivalent(G2)) def test_copy(self): model_copy = self.G1.copy() self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) self.assertNotEqual(id(self.G1.get_cpds('diff')), id(model_copy.get_cpds('diff'))) self.G1.remove_cpds('diff') diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]]) self.G1.add_cpds(diff_cpd) self.assertNotEqual(self.G1.get_cpds('diff'), model_copy.get_cpds('diff')) self.G1.remove_node('intel') self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes())) self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges())) def test_remove_node(self): self.G1.remove_node('diff') self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') def test_remove_nodes_from(self): self.G1.remove_nodes_from(['diff', 'grade']) self.assertEqual(sorted(self.G1.nodes()), sorted(['intel'])) self.assertRaises(ValueError, self.G1.get_cpds, 'diff') self.assertRaises(ValueError, self.G1.get_cpds, 'grade') def tearDown(self): del self.G del self.G1