Exemple #1
0
def make_bayes_net(load=False, subtree=True, modelsdir=MODEL_CPDS_DIR):
    print('Making bayes net')
    graph_file = RUNNING_MODEL_DIR + '/' + 'graph.p'
    if os.path.isfile(graph_file) and load == True:
        print('Loading saved graph from file...')
        G = pickle.load(open(graph_file, 'rb'))
        G.check_model()
    else:
        print('loading data...')
        training_labels, go_dict = load_label_data()
        if subtree:
            labels_list = _subtree_labels()
            print(labels_list)
        else:
            labels_list = go_dict.keys()

        print('adding nodes and edges...')
        G = BayesianModel()
        G.add_edges_from([(label, label + '_hat') for label in labels_list])
        obo_graph = obonet.read_obo(OBODB_FILE)
        for label in labels_list:
            children = [
                c for c in networkx.ancestors(obo_graph, label)
                if c in labels_list
            ]
            for child in children:
                G.add_edge(child, label)

        predicted_cpds = get_model_cpds(labels_list=labels_list,
                                        modelsdir=MODEL_CPDS_DIR)
        for cpd in predicted_cpds:
            G.add_cpds(cpd)
        true_label_cpds = get_true_label_cpds(training_labels,
                                              go_dict,
                                              labels_list=labels_list)
        for cpd in true_label_cpds:
            G.add_cpds(cpd)
        remove_list = []
        for node in G.nodes():
            if G.get_cpds(node) == None:
                remove_list.append(node)
                # remove_list.append(node+'_hat')
        for node in remove_list:
            if node in G:
                G.remove_node(node)
        G.check_model()
        pickle.dump(G, open(graph_file, 'wb'))
    return G
Exemple #2
0
class Network_handler:
    '''
    Handles creation and usage of the probabilistic network over CERN's data.
    Can deal only with a SINGLE file-priority combination.
    Note that the methods of this class have numbers and must be called in order.
    '''
    def __init__(self, pnh, gh):
        '''
        Constructor
        '''
        extractor = pnh.get_data_extractor()
        self.best_model = BayesianModel()
        self.training_instances = ""
        self.device_considered = pnh.get_device()
        self.priority_considered = pnh.get_priority()
        self.markov = MarkovModel()
        self.general_handler = gh
        self.variables_names = extractor.get_variable_names()
        self.rankedDevices = extractor.get_ranked_devices()
        self.data = pnh.get_dataframe()
        self.file_writer = pnh.get_file_writer()
        self.file_suffix = pnh.get_file_suffix()

    def learn_structure(self, method, scoring_method, log=True):
        ''' (4)
        Method that builds the structure of the data
        -----------------
        Parameters:
        method          : The technique used to search for the structure
            -> scoring_approx     - To use an approximated search with scoring method
            -> scoring_exhaustive - To use an exhaustive search with scoring method
            -> constraint         - To use the constraint based technique
        scoring_method : K2, bic, bdeu
        log             - "True" if you want to print debug information in the console    
        '''

        #Select the scoring method for the local search of the structure
        if scoring_method == "K2":
            scores = K2Score(self.data)
        elif scoring_method == "bic":
            scores = BicScore(self.data)
        elif scoring_method == "bdeu":
            scores = BdeuScore(self.data)

        #Select the actual method
        if method == "scoring_approx":
            est = HillClimbSearch(self.data, scores)
        elif method == "scoring_exhaustive":
            est = ExhaustiveSearch(self.data, scores)
        elif method == "constraint":
            est = ConstraintBasedEstimator(self.data)

        self.best_model = est.estimate()
        self.eliminate_isolated_nodes(
        )  # REMOVE all nodes not connected to anything else

        for edge in self.best_model.edges_iter():
            self.file_writer.write_txt(str(edge))

        self.log("Method used for structural learning: " + method, log)
        #self.log("Training instances skipped: " + str(self.extractor.get_skipped_lines()), log)
        self.log("Search terminated", log)

    def estimate_parameters(self, log=True):
        ''' (5)
        Estimates the parameters of the found network
        '''
        estimator = BayesianEstimator(self.best_model, self.data)
        self.file_writer.write_txt("Number of nodes: " +
                                   str(len(self.variables_names)))
        self.file_writer.write_txt("Complete list: " +
                                   str(self.variables_names))

        for node in self.best_model.nodes():
            cpd = estimator.estimate_cpd(node, prior_type='K2')
            self.best_model.add_cpds(cpd)
            self.log(cpd, log)
            self.file_writer.write_txt(cpd.__str__())

    def inference(self, variables, evidence, mode="auto", log=True):
        ''' (6)
        Computes the inference over some variables of the network (given some evidence)
        '''

        inference = VariableElimination(self.best_model)
        #inference = BeliefPropagation(self.markov)
        #inference = Mplp(self.best_model)
        header = "------------------- INFERENCE ------------------------"
        self.log(header, log)
        self.file_writer.write_txt(header, newline=True)
        self.file_writer.write_txt("(With parents all set to value 1)")

        if mode == "auto":
            self.log("          (with parents all set to value 1)", log)
            for node in self.best_model.nodes():
                variables = [node]
                parents = self.best_model.get_parents(node)
                evidence = dict()
                for p in parents:
                    evidence[p] = 1
                phi_query = inference.query(variables, evidence)
                for key in phi_query:
                    self.file_writer.write_txt(str(phi_query[key]))
                    self.log(phi_query[key], log)

        elif mode == "manual":
            phi_query = inference.query(variables, evidence)
            for key in phi_query:
                self.log(phi_query[key], log)
            '''
            map_query = inference.map_query(variables, evidence)
            print(map_query)
            '''

    def draw_network(self, label_choice, location_choice, location, log):
        ''' (7) 
        Draws the bayesian network.
        ----
        location_choice = True iff we want to show the location of devices in the graph.
        label_choice = "single" if we want to show single label, "double" for double label of arcs
        location = 0,1,2 depending by the location (H0, H1, H2)
        '''
        bn_graph = gv.Digraph(format="png")

        # Extract color based on the building
        if location_choice:

            devices = self.variables_names
            device_location = dict()
            device_locationH1 = dict()

            #For H0
            for d in devices:
                allDevicesLocations = self.general_handler.get_device_locations(
                )
                device_location[d] = allDevicesLocations[d][0]
                device_locationH1[d] = allDevicesLocations[d][1]  #temp for H1
            location_color = self.assign_color(device_location)
            location_colorH1 = self.assign_color(device_locationH1)
            '''
            # Logging and saving info
            self.log(device_location, log)
            self.log(location_color, log)
            self.file_writer.write_txt(device_location, newline = True)
            self.file_writer.write_txt(location_color, newline = True)
            '''

            # Creating the subgraphs, one for each location:
            loc_subgraphs = dict()
            for loc in location_color:
                name = "cluster_" + loc
                loc_subgraphs[loc] = gv.Digraph(name)
                loc_subgraphs[loc].graph_attr[
                    'label'] = loc  #Label with name to be visualized in the image

        # Create nodes
        for node in self.best_model.nodes():
            if location_choice:
                locationH0 = device_location[node]
                locationH1 = device_locationH1[node]
                loc_subgraphs[locationH0].node(
                    node,
                    style='filled',
                    fillcolor=location_colorH1[locationH1]
                )  #add the node to the right subgraph
                #loc_subgraphs[locationH0].node(node) #USE THIS TO ADD ONLY H0
            else:
                bn_graph.node(node)

        # Add all subgraphs in the final graph:
        if location_choice:
            for loc in loc_subgraphs:
                bn_graph.subgraph(loc_subgraphs[loc])

        # Create and color edges
        for edge in self.best_model.edges_iter():
            inference = VariableElimination(self.best_model)
            label = ""

            # Inference for first label and color of edges
            variables = [edge[1]]
            evidence = dict()
            evidence[edge[0]] = 1
            phi_query = inference.query(variables, evidence)
            value = phi_query[edge[1]].values[1]
            value = round(value, 2)

            if label_choice == "single":
                label = str(value)

            if label_choice == "double":
                # Inference for second label
                variables = [edge[0]]
                evidence = dict()
                evidence[edge[1]] = 1
                phi_query = inference.query(variables, evidence)
                value_inv = phi_query[edge[0]].values[1]
                value_inv = round(value_inv, 2)
                label = str(value) + "|" + str(value_inv)

            if value >= 0.75:
                bn_graph.edge(edge[0], edge[1], color="red", label=label)
            else:
                bn_graph.edge(edge[0], edge[1], color="black", label=label)

        # Save the .png graph
        if self.device_considered == "CUSTOM":
            imgPath = '../../output/CUSTOM' + self.file_suffix
        else:
            if location_choice:
                locat = "_H0H1"
            else:
                locat = ""
            imgPath = '../../output/' + self.device_considered + '_' + self.priority_considered + locat
        bn_graph.render(imgPath)
        os.remove(imgPath)  #remove the source code generated by graphviz

    def data_info(self, selection, log):
        ''' (9) Prints or logs some extra information about the data or the network
        '''
        # 1 - DEVICE FREQUENCY AND OCCURRENCES
        if 1 in selection:
            self.file_writer.write_txt(
                "Device ranking (max 20 devices are visualized)", newline=True)
            i = 1
            for dr in self.rankedDevices:
                self.file_writer.write_txt(dr[0] + "             \t" +
                                           str(dr[1]) + "\t" + str(dr[2]))
                i = i + 1
                if i == 20:
                    break

        # 2 - EDGES OF THE NETWORK
        if 2 in selection:
            self.file_writer.write_txt("Edges of the network:", newline=True)
            for edge in self.best_model.edges_iter():
                self.file_writer.write_txt(str(edge))

        # 3 - MARKOV NETWORK
        if 3 in selection:
            self.markov = self.best_model.to_markov_model(
            )  #create the markov model from the BN
            nice_graph = pydot.Dot(graph_type='graph')
            for node in self.markov.nodes():
                node_pydot = pydot.Node(node)
                nice_graph.add_node(node_pydot)
            for edge in self.markov.edges():
                edge_pydot = pydot.Edge(edge[0], edge[1], color="black")
                nice_graph.add_edge(edge_pydot)
            nice_graph.write_png('../../output/' + self.device_considered +
                                 '_' + self.priority_considered +
                                 '-markov.png')

            self.file_writer.write_txt("MARKOV NETWORK FACTORS:", newline=True)
            for factor in self.markov.factors:
                self.log("MARKOV---------------------------------------", log)
                self.log(factor, log)
                self.file_writer.write_txt(factor.__str__())

        # 4 - INFERENCE NETWORK
        if 4 in selection:
            nice_graph = pydot.Dot(graph_type='digraph')
            nodes = self.best_model.nodes()
            inference = VariableElimination(self.best_model)
            for node1 in nodes:
                pos = nodes.index(node1) + 1
                for i in range(pos, len(nodes)):
                    node2 = nodes[i]
                    variables = [node2]
                    evidence = dict()
                    evidence[node1] = 1
                    phi_query = inference.query(variables, evidence)
                    prob1 = phi_query[node2].values[
                        1]  #probability of direct activation (inference from node1=1 to node2)
                    variables = [node1]
                    evidence = dict()
                    evidence[node2] = 1
                    phi_query = inference.query(variables, evidence)
                    prob2 = phi_query[node1].values[
                        1]  #probability of inverse activation (inference from node2=1 to node1)
                    prob1 = round(prob1, 2)
                    prob2 = round(prob2, 2)
                    if prob1 >= 0.75 and (
                            prob1 - prob2
                    ) <= 0.40:  #add direct arc from node1 to node2
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob1) + "|" + str(prob2)
                        nice_graph.add_edge(
                            pydot.Edge(node1,
                                       node2,
                                       color="red",
                                       label=double_label))
                    elif prob2 >= 0.75 and (prob2 - prob1) <= 0.40:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        double_label = str(prob2) + "|" + str(prob1)
                        nice_graph.add_edge(
                            pydot.Edge(node2,
                                       node1,
                                       color="red",
                                       label=double_label))
                    elif prob1 >= 0.75 and prob2 >= 0.75:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="orange",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="orange",
                                           label=double_label))
                    elif prob1 >= 0.55 and prob2 >= 0.55:
                        ls = [node1, node2]
                        self.fix_node_presence(ls, nice_graph)
                        if prob1 >= prob2:
                            double_label = str(prob1) + "|" + str(prob2)
                            nice_graph.add_edge(
                                pydot.Edge(node1,
                                           node2,
                                           color="black",
                                           label=double_label))
                        else:
                            double_label = str(prob2) + "|" + str(prob1)
                            nice_graph.add_edge(
                                pydot.Edge(node2,
                                           node1,
                                           color="black",
                                           label=double_label))

            if self.device_considered == "CUSTOM":
                imgPath = '../../output/CUSTOM' + self.file_suffix
                nice_graph.write_png(imgPath + "-inference_network.png")
            else:
                nice_graph.write_png('../../output/' + self.device_considered +
                                     '_' + self.priority_considered +
                                     '-inference_network.png')

    def fix_node_presence(self, nodes, pydot_graph):
        ''' Adds the list of nodes to the graph, if they are not already present '''
        for node in nodes:
            if node not in pydot_graph.get_nodes():
                pydot_graph.add_node(pydot.Node(node))

    def eliminate_isolated_nodes(self):
        '''
        If a node doesn't have any incoming or outgoing edge, it is eliminated from the graph
        '''
        for nodeX in self.best_model.nodes():
            tup = [item for item in self.best_model.edges() if nodeX in item]
            if not tup:
                self.file_writer.write_txt(
                    "Node " + str(nodeX) +
                    " has no edges: it has been eliminated.")
                self.best_model.remove_node(nodeX)
        if self.best_model.nodes() == []:
            raise DataError("No nodes left in this file-priority combination.")

    def assign_color(self, device_location):
        '''
        Returns a dictionary with the location as key and the assigned colour as value (WORKS WITH MAX 10 DIFFERENT LOCATIONS)
        '''
        system_color = [
            'Blue', 'Green', 'Red', 'Purple', 'Yellow', 'Red', 'Grey',
            'Light Red', 'Light Blue', 'Light Green'
        ]
        location_color = dict()  # key = location; value = color
        for dev, loc in device_location.items():
            if loc not in location_color:
                color = system_color[0]
                system_color.remove(color)
                location_color[loc] = color
        return location_color

    def log(self, text, log):
        ''' Prints the text in the console, if the "log" condition is True. '''
        if log:
            print(text)
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                                ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade',
                               3,
                               values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                       [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'),
                                 ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'),
                                     ('d', 'b'), ('e', 'd')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'),
                                                      ('b', 'c'), ('d', 'b'),
                                                      ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'),
                           ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'),
                                     ('d', 'b'), ('d', 'e')]
                            or (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'),
                                                      ('d', 'a'), ('d', 'b'),
                                                      ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'),
                         Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'),
                         Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'),
                         Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'),
                         Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'),
                         Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(),
                         Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(),
                         Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [
            0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032, 0.04,
            0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128
        ]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'),
                            ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([("a", "d"), ("b", "d"), ("d", "e"),
                                ("b", "c")])
        self.G1 = BayesianModel([("diff", "grade"), ("intel", "grade")])
        diff_cpd = TabularCPD("diff", 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD("intel", 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            values=[
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([("d", "g"), ("g", "l"), ("i", "g"),
                                 ("i", "l")])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("a", "d"), ("b", "c"),
                                     ("d", "b"), ("e", "d")]
                            or (edge[1], edge[0]) in [("a", "b"), ("a", "d"),
                                                      ("b", "c"), ("d", "b"),
                                                      ("e", "d")])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([("a", "d"), ("d", "e"), ("b", "d"), ("b", "c"),
                           ("a", "b")])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()),
                             ["a", "b", "c", "d", "e"])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [("a", "b"), ("c", "b"), ("d", "a"),
                                     ("d", "b"), ("d", "e")]
                            or (edge[1], edge[0]) in [("a", "b"), ("c", "b"),
                                                      ("d", "a"), ("d", "b"),
                                                      ("d", "e")])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of("g")
        ancenstors2 = self.G2._get_ancestors_of("d")
        ancenstors3 = self.G2._get_ancestors_of(["i", "l"])
        self.assertEqual(ancenstors1, {"d", "i", "g"})
        self.assertEqual(ancenstors2, {"d"})
        self.assertEqual(ancenstors3, {"g", "i", "l", "d"})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, "h")

    def test_get_cardinality(self):
        self.assertDictEqual(self.G1.get_cardinality(), {
            "diff": 2,
            "intel": 3,
            "grade": 3
        })

    def test_get_cardinality_with_node(self):
        self.assertEqual(self.G1.get_cardinality("diff"), 2)
        self.assertEqual(self.G1.get_cardinality("intel"), 3)
        self.assertEqual(self.G1.get_cardinality("grade"), 3)

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies("a"),
                         Independencies(["a", ["b", "c"]]))
        self.assertEqual(
            self.G.local_independencies("c"),
            Independencies(["c", ["a", "d", "e"], "b"]),
        )
        self.assertEqual(self.G.local_independencies("d"),
                         Independencies(["d", "c", ["b", "a"]]))
        self.assertEqual(
            self.G.local_independencies("e"),
            Independencies(["e", ["c", "b", "a"], "d"]),
        )
        self.assertEqual(self.G.local_independencies("b"),
                         Independencies(["b", "a"]))
        self.assertEqual(self.G1.local_independencies("grade"),
                         Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([("X", "Y"), ("Y", "Z")])
        self.assertEqual(chain.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        fork = BayesianModel([("Y", "X"), ("Y", "Z")])
        self.assertEqual(fork.get_independencies(),
                         Independencies(("X", "Z", "Y"), ("Z", "X", "Y")))
        collider = BayesianModel([("X", "Y"), ("Z", "Y")])
        self.assertEqual(collider.get_independencies(),
                         Independencies(("X", "Z"), ("Z", "X")))

    def test_is_imap(self):
        val = [
            0.01,
            0.01,
            0.08,
            0.006,
            0.006,
            0.048,
            0.004,
            0.004,
            0.032,
            0.04,
            0.04,
            0.32,
            0.024,
            0.024,
            0.192,
            0.016,
            0.016,
            0.128,
        ]
        JPD = JointProbabilityDistribution(["diff", "intel", "grade"],
                                           [2, 3, 3], val)
        fac = DiscreteFactor(["diff", "intel", "grade"], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_markov_blanet(self):
        G = DAG([
            ("x", "y"),
            ("z", "y"),
            ("y", "w"),
            ("y", "v"),
            ("u", "w"),
            ("s", "v"),
            ("w", "t"),
            ("w", "m"),
            ("v", "n"),
            ("v", "q"),
        ])
        self.assertEqual(set(G.get_markov_blanket("y")),
                         set(["s", "w", "x", "u", "z", "v"]))

    def test_get_immoralities(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertEqual(G.get_immoralities(), {("w", "x"), ("w", "z")})
        G1 = BayesianModel([("x", "y"), ("z", "y"), ("z", "x"), ("w", "y")])
        self.assertEqual(G1.get_immoralities(), {("w", "x"), ("w", "z")})
        G2 = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y"),
                            ("w", "x")])
        self.assertEqual(G2.get_immoralities(), {("w", "z")})

    def test_is_iequivalent(self):
        G = BayesianModel([("x", "y"), ("z", "y"), ("x", "z"), ("w", "y")])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([("V", "W"), ("W", "X"), ("X", "Y"), ("Z", "Y")])
        G2 = BayesianModel([("W", "V"), ("X", "W"), ("X", "Y"), ("Z", "Y")])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([("W", "V"), ("W", "X"), ("Y", "X"), ("Z", "Y")])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds("diff")),
                            id(model_copy.get_cpds("diff")))

        self.G1.remove_cpds("diff")
        diff_cpd = TabularCPD("diff", 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds("diff"),
                            model_copy.get_cpds("diff"))

        self.G1.remove_node("intel")
        self.assertNotEqual(sorted(self.G1.nodes()),
                            sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()),
                            sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node("diff")
        self.assertEqual(sorted(self.G1.nodes()), sorted(["grade", "intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(["diff", "grade"])
        self.assertEqual(sorted(self.G1.nodes()), sorted(["intel"]))
        self.assertRaises(ValueError, self.G1.get_cpds, "diff")
        self.assertRaises(ValueError, self.G1.get_cpds, "grade")

    def tearDown(self):
        del self.G
        del self.G1
class TestBayesianModelMethods(unittest.TestCase):

    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])
        self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
        intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
        grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                   [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'], evidence_card=[2, 3])
        self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_get_ancestors_of_success(self):
        ancenstors1 = self.G2._get_ancestors_of('g')
        ancenstors2 = self.G2._get_ancestors_of('d')
        ancenstors3 = self.G2._get_ancestors_of(['i', 'l'])
        self.assertEqual(ancenstors1, {'d', 'i', 'g'})
        self.assertEqual(ancenstors2, {'d'})
        self.assertEqual(ancenstors3, {'g', 'i', 'l', 'd'})

    def test_get_ancestors_of_failure(self):
        self.assertRaises(ValueError, self.G2._get_ancestors_of, 'h')

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c', ['a', 'd', 'e'], 'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d', 'c', ['b', 'a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e', ['c', 'b', 'a'], 'd']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b', 'a']))
        self.assertEqual(self.G1.local_independencies('grade'), Independencies())

    def test_get_independencies(self):
        chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
        self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
        self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
        collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
        self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))

    def test_is_imap(self):
        val = [0.01, 0.01, 0.08, 0.006, 0.006, 0.048, 0.004, 0.004, 0.032,
               0.04, 0.04, 0.32, 0.024, 0.024, 0.192, 0.016, 0.016, 0.128]
        JPD = JointProbabilityDistribution(['diff', 'intel', 'grade'], [2, 3, 3], val)
        fac = DiscreteFactor(['diff', 'intel', 'grade'], [2, 3, 3], val)
        self.assertTrue(self.G1.is_imap(JPD))
        self.assertRaises(TypeError, self.G1.is_imap, fac)

    def test_get_immoralities(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
        self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
        G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
        self.assertEqual(G2.get_immoralities(), {('w', 'z')})

    def test_is_iequivalent(self):
        G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
        self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
        G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
        G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
        self.assertTrue(G1.is_iequivalent(G2))
        G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
        self.assertFalse(G3.is_iequivalent(G2))

    def test_copy(self):
        model_copy = self.G1.copy()
        self.assertEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))
        self.assertNotEqual(id(self.G1.get_cpds('diff')),
                            id(model_copy.get_cpds('diff')))

        self.G1.remove_cpds('diff')
        diff_cpd = TabularCPD('diff', 2, values=[[0.3], [0.7]])
        self.G1.add_cpds(diff_cpd)
        self.assertNotEqual(self.G1.get_cpds('diff'),
                            model_copy.get_cpds('diff'))

        self.G1.remove_node('intel')
        self.assertNotEqual(sorted(self.G1.nodes()), sorted(model_copy.nodes()))
        self.assertNotEqual(sorted(self.G1.edges()), sorted(model_copy.edges()))

    def test_remove_node(self):
        self.G1.remove_node('diff')
        self.assertEqual(sorted(self.G1.nodes()), sorted(['grade', 'intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')

    def test_remove_nodes_from(self):
        self.G1.remove_nodes_from(['diff', 'grade'])
        self.assertEqual(sorted(self.G1.nodes()), sorted(['intel']))
        self.assertRaises(ValueError, self.G1.get_cpds, 'diff')
        self.assertRaises(ValueError, self.G1.get_cpds, 'grade')

    def tearDown(self):
        del self.G
        del self.G1