Example #1
1
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                                [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                                [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'], evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'], 'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = Inference(student)
        self.model2 = Inference(student, state_names=self.sn2)
Example #3
0
class TestBayesianModelMethods(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                                ('d', 'e'), ('b', 'c')])

    def test_moral_graph(self):
        moral_graph = self.G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('a', 'd'), ('b', 'c'), ('d', 'b'), ('e', 'd')])

    def test_moral_graph_with_edge_present_over_parents(self):
        G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
        moral_graph = G.moralize()
        self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
        for edge in moral_graph.edges():
            self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                            (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])

    def test_local_independencies(self):
        self.assertEqual(self.G.local_independencies('a'), Independencies(['a', ['b', 'c']]))
        self.assertEqual(self.G.local_independencies('c'), Independencies(['c',['a','d','e'],'b']))
        self.assertEqual(self.G.local_independencies('d'), Independencies(['d','c',['b','a']]))
        self.assertEqual(self.G.local_independencies('e'), Independencies(['e',['c','b','a'],'d']))
        self.assertEqual(self.G.local_independencies('b'), Independencies(['b','a']))

    def tearDown(self):
        del self.G
    def estimate(self):
        """
        Estimates the `BayesianModel` structure that fits best to the given data set,
        according to the scoring method supplied in the constructor.
        Exhaustively searches through all models. Only estimates network structure, no parametrization.

        Returns
        -------
        model: `BayesianModel` instance
            A `BayesianModel` with maximal score.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.estimators import ExhaustiveSearch
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        >>> data['C'] = data['B']
        >>> est = ExhaustiveSearch(data)
        >>> best_model = est.estimate()
        >>> best_model
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f695c535470>
        >>> best_model.edges()
        [('B', 'C')]
        """

        best_dag = max(self.all_dags(), key=self.scoring_method.score)

        best_model = BayesianModel()
        best_model.add_nodes_from(sorted(best_dag.nodes()))
        best_model.add_edges_from(sorted(best_dag.edges()))
        return best_model
Example #5
0
 def test_moral_graph_with_edge_present_over_parents(self):
     G = BayesianModel([('a', 'd'), ('d', 'e'), ('b', 'd'), ('b', 'c'), ('a', 'b')])
     moral_graph = G.moralize()
     self.assertListEqual(sorted(moral_graph.nodes()), ['a', 'b', 'c', 'd', 'e'])
     for edge in moral_graph.edges():
         self.assertTrue(edge in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')] or
                         (edge[1], edge[0]) in [('a', 'b'), ('c', 'b'), ('d', 'a'), ('d', 'b'), ('d', 'e')])
Example #6
0
    def __init__(self, model):
        self.model = model
        model.check_model()

        if isinstance(model, JunctionTree):
            self.variables = set(chain(*model.nodes()))
        else:
            self.variables = model.nodes()

        self.cardinality = {}
        self.factors = defaultdict(list)

        if isinstance(model, BayesianModel):
            for node in model.nodes():
                cpd = model.get_cpds(node)
                cpd_as_factor = cpd.to_factor()
                self.cardinality[node] = cpd.variable_card

                for var in cpd.variables:
                    self.factors[var].append(cpd_as_factor)

        elif isinstance(model, (MarkovModel, FactorGraph, JunctionTree)):
            self.cardinality = model.get_cardinality()

            for factor in model.get_factors():
                for var in factor.variables:
                    self.factors[var].append(factor)

        elif isinstance(model, DynamicBayesianNetwork):
            self.start_bayesian_model = BayesianModel(model.get_intra_edges(0))
            self.start_bayesian_model.add_cpds(*model.get_cpds(time_slice=0))
            cpd_inter = [model.get_cpds(node) for node in model.get_interface_nodes(1)]
            self.interface_nodes = model.get_interface_nodes(0)
            self.one_and_half_model = BayesianModel(model.get_inter_edges() + model.get_intra_edges(1))
            self.one_and_half_model.add_cpds(*(model.get_cpds(time_slice=1) + cpd_inter))
Example #7
0
class TestInferenceBase(unittest.TestCase):
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.3, 0.4]], evidence='a', evidence_card=[2])
        c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.3, 0.4]], evidence='b', evidence_card=[2])
        d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.2, 0.1]], evidence='c', evidence_card=[2])
        e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.4, 0.1]], evidence='d', evidence_card=[2])
        self.bayesian.add_cpd([a_cpd, b_cpd, c_cpd, d_cpd, e_cpd])

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')])
        factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100]))
        factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))
        factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))
        factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)

    def test_bayesian_inference_init(self):
        infer_bayesian = Inference(self.bayesian)
        self.assertEqual(set(infer_bayesian.variables), {'a', 'b', 'c', 'd', 'e'})
        self.assertEqual(infer_bayesian.cardinality, {'a': 2, 'b': 2, 'c': 2, 'd': 2, 'e': 2})
        # self.assertEqual(infer_bayesian.factors, {'a': [self.bayesian.get_cpd('a').to_factor(),
        #                                                 self.bayesian.get_cpd('b').to_factor()],
        #                                           'b': [self.bayesian.get_cpd('b').to_factor(),
        #                                                 self.bayesian.get_cpd('c').to_factor()],
        #                                           'c': [self.bayesian.get_cpd('c').to_factor(),
        #                                                 self.bayesian.get_cpd('d').to_factor()],
        #                                           'd': [self.bayesian.get_cpd('d').to_factor(),
        #                                                 self.bayesian.get_cpd('e').to_factor()],
        #                                           'e': [self.bayesian.get_cpd('e').to_factor()]})

    def test_markov_inference_init(self):
        infer_markov = Inference(self.markov)
        self.assertEqual(set(infer_markov.variables), {'a', 'b', 'c', 'd'})
        self.assertEqual(infer_markov.cardinality, {'a': 2, 'b': 2, 'c': 2, 'd': 2})
 def test_is_iequivalent(self):
     G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
     self.assertRaises(TypeError, G.is_iequivalent, MarkovModel())
     G1 = BayesianModel([('V', 'W'), ('W', 'X'), ('X', 'Y'), ('Z', 'Y')])
     G2 = BayesianModel([('W', 'V'), ('X', 'W'), ('X', 'Y'), ('Z', 'Y')])
     self.assertTrue(G1.is_iequivalent(G2))
     G3 = BayesianModel([('W', 'V'), ('W', 'X'), ('Y', 'X'), ('Z', 'Y')])
     self.assertFalse(G3.is_iequivalent(G2))
    def setUp(self):
        nodes = {'c': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(c) Brain Tumor',
                       'YPOS': '11935',
                       'XPOS': '15250',
                       'TYPE': 'discrete'},
                 'a': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(a) Metastatic Cancer',
                       'YPOS': '10465',
                       'XPOS': '13495',
                       'TYPE': 'discrete'},
                 'b': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(b) Serum Calcium Increase',
                       'YPOS': '11965',
                       'XPOS': '11290',
                       'TYPE': 'discrete'},
                 'e': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(e) Papilledema',
                       'YPOS': '13240',
                       'XPOS': '17305',
                       'TYPE': 'discrete'},
                 'd': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(d) Coma',
                       'YPOS': '12985',
                       'XPOS': '13960',
                       'TYPE': 'discrete'}}
        model = BayesianModel([('b', 'd'), ('a', 'b'), ('a', 'c'), ('c', 'd'), ('c', 'e')])
        cpd_distribution = {'a': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8]])},
                            'e': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]},
                            'b': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'c': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8],
                                                                        [0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'd': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.9, 0.1],
                                                                        [0.7, 0.3],
                                                                        [0.05, 0.95]]), 'CONDSET': ['b', 'c'], 'CARDINALITY': [2, 2]}}

        tabular_cpds = []
        for var, values in cpd_distribution.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else []
            states = nodes[var]['STATES']
            cpd = TabularCPD(var, len(states), cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)
        model.add_cpds(*tabular_cpds)

        for var, properties in nodes.items():
            model.node[var] = properties

        self.maxDiff = None
        self.writer = XMLBeliefNetwork.XBNWriter(model=model)
    def to_bayesian_model(self):
        """
        Creates a Bayesian Model which is a minimum I-Map for this markov model.

        The ordering of parents may not remain constant. It would depend on the
        ordering of variable in the junction tree (which is not constant) all the
        time.

        Examples
        --------
        >>> from pgmpy.models import MarkovModel
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> mm = MarkovModel()
        >>> mm.add_nodes_from(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7'])
        >>> mm.add_edges_from([('x1', 'x3'), ('x1', 'x4'), ('x2', 'x4'),
        ...                    ('x2', 'x5'), ('x3', 'x6'), ('x4', 'x6'),
        ...                    ('x4', 'x7'), ('x5', 'x7')])
        >>> phi = [DiscreteFactor(edge, [2, 2], np.random.rand(4)) for edge in mm.edges()]
        >>> mm.add_factors(*phi)
        >>> bm = mm.to_bayesian_model()
        """
        from pgmpy.models import BayesianModel

        bm = BayesianModel()
        var_clique_dict = defaultdict(tuple)
        var_order = []

        # Create a junction tree from the markov model.
        # Creation of clique tree involves triangulation, finding maximal cliques
        # and creating a tree from these cliques
        junction_tree = self.to_junction_tree()

        # create an ordering of the nodes based on the ordering of the clique
        # in which it appeared first
        root_node = junction_tree.nodes()[0]
        bfs_edges = nx.bfs_edges(junction_tree, root_node)
        for node in root_node:
            var_clique_dict[node] = root_node
            var_order.append(node)
        for edge in bfs_edges:
            clique_node = edge[1]
            for node in clique_node:
                if not var_clique_dict[node]:
                    var_clique_dict[node] = clique_node
                    var_order.append(node)

        # create a bayesian model by adding edges from parent of node to node as
        # par(x_i) = (var(c_k) - x_i) \cap {x_1, ..., x_{i-1}}
        for node_index in range(len(var_order)):
            node = var_order[node_index]
            node_parents = (set(var_clique_dict[node]) - set([node])).intersection(
                set(var_order[:node_index]))
            bm.add_edges_from([(parent, node) for parent in node_parents])
            # TODO : Convert factor into CPDs
        return bm
class BaseEliminationTest(TestCase):
    def setUp(self):
        self.model = BayesianModel([('diff', 'grade'), ('intel', 'grade'), ('intel', 'sat'),
                                    ('grade', 'reco')])
        raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
        data = pd.DataFrame(raw_data, columns=['diff', 'grade', 'intel', 'sat', 'reco'])
        self.model.fit(data)

    def tearDown(self):
        del self.model
        del self.elimination_order
 def setUp(self):
     self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                             ('d', 'e'), ('b', 'c')])
     self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
     diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
     intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
     grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                            evidence=['diff', 'intel'], evidence_card=[2, 3])
     self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
     self.G2 = BayesianModel([('d', 'g'), ('g', 'l'), ('i', 'g'), ('i', 'l')])
    def test_estimate_from_independencies(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        model = ConstraintBasedEstimator.estimate_from_independencies("ABCD", ind)

        self.assertSetEqual(set(model.edges()),
                            set([('B', 'D'), ('A', 'D'), ('C', 'D')]))

        model1 = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        model2 = ConstraintBasedEstimator.estimate_from_independencies(
                            model1.nodes(),
                            model1.get_independencies())

        self.assertTrue(set(model2.edges()) == set(model1.edges()) or
                        set(model2.edges()) == set([('B', 'C'), ('A', 'C'), ('C', 'E'), ('D', 'B')]))
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])
        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

        self.model2 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.data1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.data2 = pd.DataFrame(data={'A': [0, np.NaN, 1],
                                        'B': [0, 1, 0],
                                        'C': [1, 1, np.NaN],
                                        'D': [np.NaN, 'Y', np.NaN]})

        # data_link - "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv', dtype=str)
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
Example #15
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
Example #16
0
    def setUp(self):
        self.junction_tree = JunctionTree([(('A', 'B'), ('B', 'C')),
                                           (('B', 'C'), ('C', 'D'))])
        phi1 = Factor(['A', 'B'], [2, 3], range(6))
        phi2 = Factor(['B', 'C'], [3, 2], range(6))
        phi3 = Factor(['C', 'D'], [2, 2], range(4))
        self.junction_tree.add_factors(phi1, phi2, phi3)

        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
Example #17
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgmpy.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(var, len(self.variable_states[var]), values,
                                 evidence=self.variable_parents[var],
                                 evidence_card=[len(self.variable_states[evidence_var])
                                                for evidence_var in self.variable_parents[var]])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError('First get states of variables, edges, parents and network name')
Example #18
0
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
    def setUp(self):
        self.maxDiff = None
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]
        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}
        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}
        parents = {'bowel-problem': [],
                   'dog-out': ['bowel-problem', 'family-out'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        self.bayesmodel = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        self.markovmodel = MarkovModel(edges)
        tables = [(['var_0', 'var_1'],
                   ['4.000', '2.400', '1.000', '0.000']),
                  (['var_0', 'var_1', 'var_2'],
                   ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
                    '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = DiscreteFactor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)
Example #20
0
    def setUp(self):
        edges = [
            ["family-out", "dog-out"],
            ["bowel-problem", "dog-out"],
            ["family-out", "light-on"],
            ["dog-out", "hear-bark"],
        ]
        cpds = {
            "bowel-problem": np.array([[0.01], [0.99]]),
            "dog-out": np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]),
            "family-out": np.array([[0.15], [0.85]]),
            "hear-bark": np.array([[0.7, 0.3], [0.01, 0.99]]),
            "light-on": np.array([[0.6, 0.4], [0.05, 0.95]]),
        }
        states = {
            "bowel-problem": ["true", "false"],
            "dog-out": ["true", "false"],
            "family-out": ["true", "false"],
            "hear-bark": ["true", "false"],
            "light-on": ["true", "false"],
        }
        parents = {
            "bowel-problem": [],
            "dog-out": ["family-out", "bowel-problem"],
            "family-out": [],
            "hear-bark": ["dog-out"],
            "light-on": ["family-out"],
        }
        properties = {
            "bowel-problem": ["position = (190, 69)"],
            "dog-out": ["position = (155, 165)"],
            "family-out": ["position = (112, 69)"],
            "hear-bark": ["position = (154, 241)"],
            "light-on": ["position = (73, 165)"],
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(
                var,
                len(states[var]),
                values,
                evidence=parents[var],
                evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]],
            )
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split("="))
                self.model.node[node][prop_name] = prop_value

        self.writer = XMLBIFWriter(model=self.model)
Example #21
0
    def get_model(self):
        model = BayesianModel(self.get_edges())
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            cpd = TabularCPD(var, len(self.variable_states[var]), values,
                             evidence=self.variable_parents[var],
                             evidence_card=[len(self.variable_states[evidence_var])
                                            for evidence_var in self.variable_parents[var]])
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                model.node[node][prop_name] = prop_value

        return model
Example #22
0
class TestBayesianModelFitPredict(unittest.TestCase):
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])

    def test_disconnected_fit(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        self.model_disconnected.fit(values)

        for node in ['A', 'B', 'C', 'D', 'E']:
            cpd = self.model_disconnected.get_cpds(node)
            self.assertEqual(cpd.variable, node)
            np_test.assert_array_equal(cpd.cardinality, np.array([2]))
            value = (values.ix[:, node].value_counts() /
                     values.ix[:, node].value_counts().sum())
            value = value.reindex(sorted(value.index)).values
            np_test.assert_array_equal(cpd.values, value)

    def test_connected_predict(self):
        np.random.seed(42)
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
                              columns=['A', 'B', 'C', 'D', 'E'])
        fit_data = values[:800]
        predict_data = values[800:].copy()
        self.model_connected.fit(fit_data)
        self.assertRaises(ValueError, self.model_connected.predict, predict_data)
        predict_data.drop('E', axis=1, inplace=True)
        e_predict = self.model_connected.predict(predict_data)
        np_test.assert_array_equal(e_predict.values.ravel(),
                                   np.array([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
                                             1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
                                             0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
                                             0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
                                             0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1,
                                             1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0,
                                             1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
                                             0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
                                             1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,
                                             1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
                                             0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0,
                                             1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                                             1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1,
                                             1, 1, 1, 0]))

    def tearDown(self):
        del self.model_connected
        del self.model_disconnected
    def setUp(self):
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]

        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}

        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}

        parents = {'bowel-problem': [],
                   'dog-out': ['family-out', 'bowel-problem'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        properties = {'bowel-problem': ['position = (335, 99)'],
                      'dog-out': ['position = (300, 195)'],
                      'family-out': ['position = (257, 99)'],
                      'hear-bark': ['position = (296, 268)'],
                      'light-on': ['position = (218, 195)']}

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var in sorted(cpds.keys()):
            values = cpds[var]
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = BIFWriter(model=self.model)
 def test_get_independencies(self):
     chain = BayesianModel([('X', 'Y'), ('Y', 'Z')])
     self.assertEqual(chain.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
     fork = BayesianModel([('Y', 'X'), ('Y', 'Z')])
     self.assertEqual(fork.get_independencies(), Independencies(('X', 'Z', 'Y'), ('Z', 'X', 'Y')))
     collider = BayesianModel([('X', 'Y'), ('Z', 'Y')])
     self.assertEqual(collider.get_independencies(), Independencies(('X', 'Z'), ('Z', 'X')))
 def test_get_immoralities(self):
     G = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y')])
     self.assertEqual(G.get_immoralities(), {('w', 'x'), ('w', 'z')})
     G1 = BayesianModel([('x', 'y'), ('z', 'y'), ('z', 'x'), ('w', 'y')])
     self.assertEqual(G1.get_immoralities(), {('w', 'x'), ('w', 'z')})
     G2 = BayesianModel([('x', 'y'), ('z', 'y'), ('x', 'z'), ('w', 'y'), ('w', 'x')])
     self.assertEqual(G2.get_immoralities(), {('w', 'z')})
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2),
                            evidence=['A'], evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
        self.assertRaises(ValueError, self.model.get_cpds, 'B')

        self.model.add_node('B')
        self.assertIsNone(self.model.get_cpds('B'))
    def test_build_skeleton(self):
        ind = Independencies(['B', 'C'], ['A', ['B', 'C'], 'D'])
        ind = ind.closure()
        skel1, sep_sets1 = ConstraintBasedEstimator.build_skeleton("ABCD", ind)
        self.assertTrue(self._edge_list_equal(skel1.edges(), [('A', 'D'), ('B', 'D'), ('C', 'D')]))

        sep_sets_ref1 = {frozenset({'A', 'C'}): (), frozenset({'A', 'B'}): (), frozenset({'C', 'B'}): ()}
        self.assertEqual(sep_sets1, sep_sets_ref1)

        model = BayesianModel([('A', 'C'), ('B', 'C'), ('B', 'D'), ('C', 'E')])
        skel2, sep_sets2 = ConstraintBasedEstimator.build_skeleton(model.nodes(), model.get_independencies())
        self.assertTrue(self._edge_list_equal(skel2, [('D', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'E')]))

        sep_sets_ref2 = {frozenset({'D', 'C'}): ('B',),
                         frozenset({'E', 'B'}): ('C',),
                         frozenset({'A', 'D'}): (),
                         frozenset({'E', 'D'}): ('C',),
                         frozenset({'E', 'A'}): ('C',),
                         frozenset({'A', 'B'}): ()}
        # witnesses/seperators might change on each run, so we cannot compare directly
        self.assertEqual(sep_sets2.keys(), sep_sets_ref2.keys())
        self.assertEqual([len(v) for v in sorted(sep_sets2.values())],
                         [len(v) for v in sorted(sep_sets_ref2.values())])
    def minimal_imap(self, order):
        """
        Returns a Bayesian Model which is minimal IMap of the Joint Probability Distribution
        considering the order of the variables.

        Parameters
        ----------
        order: array-like
            The order of the random variables.

        Examples
        --------
        >>> import numpy as np
        >>> from pgmpy.factors import JointProbabilityDistribution
        >>> prob = JointProbabilityDistribution(['x1', 'x2', 'x3'], [2, 3, 2], np.ones(12)/12)
        >>> bayesian_model = prob.minimal_imap(order=['x2', 'x1', 'x3'])
        >>> bayesian_model
        <pgmpy.models.models.models at 0x7fd7440a9320>
        >>> bayesian_model.edges()
        [('x1', 'x3'), ('x2', 'x3')]
        """
        from pgmpy.models import BayesianModel

        def get_subsets(u):
            for r in range(len(u) + 1):
                for i in itertools.combinations(u, r):
                    yield i

        G = BayesianModel()
        for variable_index in range(len(order)):
            u = order[:variable_index]
            for subset in get_subsets(u):
                if (len(subset) < len(u) and
                    self.check_independence([order[variable_index]], set(u)-set(subset), subset, True)):
                    G.add_edges_from([(variable, order[variable_index]) for variable in subset])
        return G
    def setUp(self):
        self.rand_data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        self.rand_data['C'] = self.rand_data['B']
        self.est_rand = HillClimbSearch(self.rand_data, scoring_method=K2Score(self.rand_data))
        self.model1 = BayesianModel()
        self.model1.add_nodes_from(['A', 'B', 'C'])
        self.model2 = self.model1.copy()
        self.model2.add_edge('A', 'B')

        # link to dataset: "https://www.kaggle.com/c/titanic/download/train.csv"
        self.titanic_data = pd.read_csv('pgmpy/tests/test_estimators/testdata/titanic_train.csv')
        self.titanic_data1 = self.titanic_data[["Survived", "Sex", "Pclass", "Age", "Embarked"]]
        self.titanic_data2 = self.titanic_data[["Survived", "Sex", "Pclass"]]
        self.est_titanic1 = HillClimbSearch(self.titanic_data1)
        self.est_titanic2 = HillClimbSearch(self.titanic_data2)
Example #30
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else []
            states = self.variables[var]['STATES']
            cpd = TabularCPD(var, len(states), cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for var, properties in self.variables.items():
            model.node[var] = properties

        return model
Example #31
0
import pandas as pd
from pgmpy.estimators import BayesianEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
f=open('data7_name.csv','r')
attributes= f.readline().split(',')
heartDisease=pd.read_csv('data7.csv',names=attributes)
print("\nAttributes and datatypes")
print(heartDisease.dtypes)
model=BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('exang','trestbps'),
('trestbps','heartdisease'),('fbs','heartdisease')])
model.fit(heartDisease,BayesianEstimator)
HeartDisease_infer=VariableElimination(model)
print("\n 1. Probability heart disease given age=28")
q=HeartDisease_infer.query(['heartdisease'],{'age':28})
print(q['heartdisease'])
print("\n 2. Probability of heart disease for male")
q=HeartDisease_infer.query(['heartdisease'],{'sex':1})
print(q['heartdisease'])
Example #32
0
    def build_causal_model(self):
        #################################
        # Defining the model structure
        #################################
        # PP = Passenger Position
        # CP = Cab Position
        # DP = Destination Position of the passenger
        # onPP = the cab is on the Passenger Position
        # onDP = the cab is on the Destination Position
        # inC = passenger is in the cab

        self._causal_model = BayesianModel(
            [
                ('PP', 'onPP'),
                ('CP', 'onPP'),
                ('CP', 'onDP'),
                ('DP', 'onDP'),
            
                ('inC', 'X'),
                ('onPP', 'X'),
                ('onDP', 'X'),

                ('onPP', 'Y'),
                ('onDP', 'Y'),
                ('inC', 'Y'),
                ('X', 'Y'),
            ]
        )

        # Defining individual CPDs.
        cpd_PP = TabularCPD(
            variable='PP', 
            variable_card=25, 
            values=[[0.04] for _ in range(0,25)], #All states have the same probability
            state_names={'PP': ['state ' + str(i) for i in range(0,25)]}
            )
        cpd_CP = TabularCPD(
            variable='CP', 
            variable_card=25, 
            values=[[0.04] for _ in range(0,25)], #All states have the same probability
            state_names={'CP': ['cab state ' + str(i) for i in range(0,25)]}
            )        
        cpd_DP = TabularCPD(
            variable='DP',
            variable_card=25,
            values=[
                [0.25], [0], [0], [0], [0.25],
                [0], [0], [0], [0], [0],
                [0], [0], [0], [0], [0],
                [0], [0], [0], [0], [0],
                [0.25], [0], [0], [0.25], [0],        
            ],
            state_names={'DP': ['destination ' + str(i) for i in range(0,25)]}
            )
        cpd_onPP = TabularCPD(
            variable='onPP',
            variable_card=2,
            values=[
                np.ndarray.flatten(np.ones(25) - np.diag(np.ones(25))),
                np.ndarray.flatten(np.diag(np.ones(25)))
            ],
            evidence=['PP', 'CP'],
            evidence_card=[25, 25],
            state_names={
                'onPP': ['False', 'True'],
                'PP': ['state ' + str(i) for i in range(0,25)],
                'CP': ['cab state ' + str(i) for i in range(0,25)]
                }
            ) 
        cpd_onDP = TabularCPD(
            variable='onDP', 
            variable_card=2, 
            values=[
                np.ndarray.flatten(np.ones(25) - np.diag(np.ones(25))),
                np.ndarray.flatten(np.diag(np.ones(25)))
            ],
            evidence=['DP', 'CP'],
            evidence_card=[25, 25],
            state_names={
                'onDP': ['False', 'True'], 
                'DP': ['destination ' + str(i) for i in range(0,25)],
                'CP': ['cab state ' + str(i) for i in range(0,25)]
                }
            )
        cpd_inC = TabularCPD(
            variable='inC', 
            variable_card=2, 
            values=[[0.5], [0.5]],
            state_names={'inC': ['False', 'True']}
            )       
        cpd_X = TabularCPD(
            variable='X', 
            variable_card=2, 
            values=[
                [0.5, 0.5, 0.5, 0, 1, 0.5, 1, 0], 
                [0.5, 0.5, 0.5, 1, 0, 0.5, 0, 1]
            ],
            evidence=['onPP', 'onDP', 'inC'],
            evidence_card=[2, 2, 2],
            state_names={
                'X': ['Pickup', 'Dropoff'],
                'onPP': ['False', 'True'],
                'onDP': ['False', 'True'],
                'inC': ['False', 'True']
                }
            )
        cpd_Y = TabularCPD(
            variable='Y',
            variable_card=2,
            values=[
                [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], 
                [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
            ],
            evidence=['X', 'inC', 'onDP', 'onPP'],
            evidence_card=[2, 2, 2, 2],
            state_names={
                'Y': ['False', 'True'],
                'X': ['Pickup', 'Dropoff'],
                'inC': ['False', 'True'],
                'onDP': ['False', 'True'],
                'onPP': ['False', 'True']
                }
                )

        # Associating the CPDs with the network
        self._causal_model.add_cpds(cpd_PP, cpd_DP, cpd_CP, cpd_onPP, cpd_onDP, cpd_inC, cpd_X, cpd_Y)

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
        # defined and sum to 1.
        self._causal_model.check_model()
Example #33
0
 def test_class_init_with_data_nonstring(self):
     BayesianModel([(1, 2), (2, 3)])
Example #34
0
class BayesianNetwork:
    """
        Base class for Bayesian Network (BN), a probabilistic weighted DAG where nodes represent variables,
        edges represent the causal relationships between variables.

        ``BayesianNetwork`` stores nodes with their possible states, edges and
        conditional probability distributions (CPDs) of each node.

        ``BayesianNetwork`` is built on top of the ``StructureModel``, which is an extension of ``networkx.DiGraph``
        (see :func:`causalnex.structure.structuremodel.StructureModel`).

        In order to define the ``BayesianNetwork``, users should provide a relevant ``StructureModel``.
        Once ``BayesianNetwork`` is initialised, no changes to the ``StructureModel`` can be made
        and CPDs can be learned from the data.

        The learned CPDs can be then used for likelihood estimation and predictions.

        Example:
        ::
            >>> # Create a Bayesian Network with a manually defined DAG.
            >>> from causalnex.structure import StructureModel
            >>> from causalnex.network import BayesianNetwork
            >>>
            >>> sm = StructureModel()
            >>> sm.add_edges_from([
            >>>                    ('rush_hour', 'traffic'),
            >>>                    ('weather', 'traffic')
            >>>                    ])
            >>> bn = BayesianNetwork(sm)
            >>> # A created ``BayesianNetwork`` stores nodes and edges defined by the ``StructureModel``
            >>> bn.nodes
            ['rush_hour', 'traffic', 'weather']
            >>>
            >>> bn.edges
            [('rush_hour', 'traffic'), ('weather', 'traffic')]
            >>> # A ``BayesianNetwork`` doesn't store any CPDs yet
            >>> bn.cpds
            >>> {}
            >>>
            >>> # Learn the nodes' states from the data
            >>> import pandas as pd
            >>> data = pd.DataFrame({
            >>>                      'rush_hour': [True, False, False, False, True, False, True],
            >>>                      'weather': ['Terrible', 'Good', 'Bad', 'Good', 'Bad', 'Bad', 'Good'],
            >>>                      'traffic': ['heavy', 'light', 'heavy', 'light', 'heavy', 'heavy', 'heavy']
            >>>                      })
            >>> bn = bn.fit_node_states(data)
            >>> bn.node_states
            {'rush_hour': {False, True}, 'weather': {'Bad', 'Good', 'Terrible'}, 'traffic': {'heavy', 'light'}}
            >>> # Learn the CPDs from the data
            >>> bn = bn.fit_cpds(data)
            >>> # Use the learned CPDs to make predictions on the unseen data
            >>> test_data = pd.DataFrame({
            >>>                           'rush_hour': [False, False, True, True],
            >>>                           'weather': ['Good', 'Bad', 'Good', 'Bad']
            >>>                           })
            >>> bn.predict(test_data, "traffic").to_dict()
            >>> {'traffic_prediction': {0: 'light', 1: 'heavy', 2: 'heavy', 3: 'heavy'}}
            >>> bn.predict_probability(test_data, "traffic").to_dict()
            {'traffic_prediction': {0: 'light', 1: 'heavy', 2: 'heavy', 3: 'heavy'}}
            {'traffic_light': {0: 0.75, 1: 0.25, 2: 0.3333333333333333, 3: 0.3333333333333333},
             'traffic_heavy': {0: 0.25, 1: 0.75, 2: 0.6666666666666666, 3: 0.6666666666666666}}
        """

    def __init__(self, structure: StructureModel):
        """
        Create a ``BayesianNetwork`` with a DAG defined by ``StructureModel``.

        Args:
            structure: a graph representing a causal relationship between variables.
                       In the structure
                           - cycles are not allowed;
                           - multiple (parallel) edges are not allowed;
                           - isolated nodes and multiple components are not allowed.

        Raises:
            ValueError: If the structure is not a connected DAG.
        """
        n_components = nx.number_weakly_connected_components(structure)

        if n_components > 1:
            raise ValueError(
                "The given structure has {n_components} separated graph components. "
                "Please make sure it has only one.".format(n_components=n_components)
            )

        if not nx.is_directed_acyclic_graph(structure):
            cycle = nx.find_cycle(structure)
            raise ValueError(
                "The given structure is not acyclic. Please review the following cycle: {cycle}".format(
                    cycle=cycle
                )
            )

        # _node_states is a Dict in the form `dict: {node: dict: {state: index}}`.
        # Underlying libraries expect all states to be integers from zero, and
        # thus this dict is used to convert from state -> idx, and then back from idx -> state as required
        self._node_states = None  # type: Dict[str: Dict[Hashable, int]]
        self._structure = structure

        # _model is a pgmpy Bayesian Model.
        # It is used for:
        #                - probability fitting
        #                - predictions
        self._model = BayesianModel()
        self._model.add_edges_from(structure.edges)

    @property
    def structure(self) -> StructureModel:
        """
        ``StructureModel`` defining the DAG of the Bayesian Network.

        Returns:
            A ``StructureModel`` of the Bayesian Network.
        """
        return self._structure

    @property
    def nodes(self) -> List[str]:
        """
        List of all nodes contained within the Bayesian Network.

        Returns:
            A list of node names.
        """
        return list(self._model.nodes)

    @property
    def node_states(self) -> Dict[str, Set[Hashable]]:
        """
        Dictionary of all states that each node can take.

        Returns:
            A dictionary of node and its possible states, in format of `dict: {node: state}`.
        """
        return {node: set(states.keys()) for node, states in self._node_states.items()}

    @node_states.setter
    def node_states(self, nodes: Dict[str, Set[Hashable]]):
        """
        Set the list of nodes that are contained within the Bayesian Network.
        The states of all nodes must be provided.

        Args:
            nodes: A dictionary of node and its possible states, in format of `dict: {node: state}`.

        Raises:
            ValueError: if a node contains a None state.
            KeyError: if a node is missing.
        """
        missing_feature = set(self.nodes).difference(set(nodes.keys()))
        if missing_feature:
            raise KeyError(
                "The data does not cover all the features found in the Bayesian Network. "
                "Please check the following features: {nodes}".format(
                    nodes=missing_feature
                )
            )

        for node, states in nodes.items():
            if any(pd.isnull(list(states))):
                raise ValueError("node '{node}' contains None state".format(node=node))
        self._node_states = {
            n: {v: k for k, v in enumerate(sorted(nodes[n]))} for n in nodes
        }

    @property
    def edges(self) -> List[Tuple[str, str]]:
        """
        List of all edges contained within the Bayesian Network, as a Tuple(from_node, to_node).

        Returns:
            A list of all edges.
        """
        return list(self._model.edges)

    @property
    def cpds(self) -> Dict[str, pd.DataFrame]:
        """
        Conditional Probability Distributions of each node within the Bayesian Network.

        The row-index of each dataframe is all possible states for the node.
        The col-index of each dataframe is a MultiIndex that describes all possible permutations of parent states.

        For example, for a node :math:`P(A | B, D)`, where
        .. math::
            - A \\in \\text{{"a", "b", "c", "d"}}
            - B \\in \\text{{"x", "y", "z"}}
            - C \\in \\text{{False, True}}

        >>> b         x                   y               z
        >>> d     False     True      False True      False     True
        >>> a
        >>> a  0.265306  0.214286  0.066667  0.25  0.444444  0.000000
        >>> b  0.183673  0.214286  0.200000  0.25  0.222222  0.666667
        >>> c  0.285714  0.285714  0.400000  0.25  0.333333  0.333333
        >>> d  0.265306  0.285714  0.333333  0.25  0.000000  0.000000

        Returns:
            Conditional Probability Distributions of each node within the Bayesian Network.
        """
        cpds = dict()
        for cpd in self._model.cpds:

            iterables = [
                sorted(self._node_states[var].keys()) for var in cpd.variables[1:]
            ]
            cols = [""]
            if iterables:
                cols = pd.MultiIndex.from_product(iterables, names=cpd.variables[1:])

            cpds[cpd.variable] = pd.DataFrame(
                cpd.values.reshape(
                    len(self._node_states[cpd.variable]), max(1, len(cols))
                )
            )
            cpds[cpd.variable][cpd.variable] = sorted(
                self._node_states[cpd.variable].keys()
            )
            cpds[cpd.variable].set_index([cpd.variable], inplace=True)
            cpds[cpd.variable].columns = cols

        return cpds

    def fit_node_states(self, df: pd.DataFrame) -> "BayesianNetwork":
        """
        Fit all states of nodes that can appear in the data.
        The dataframe provided should contain every possible state (values that can be taken) for every column.

        Args:
            df: data to fit node states from. Each column indicates a node and each row
                an observed combination of states.

        Returns:
            self

        Raises:
            ValueError: if dataframe contains any missing data.
        """
        self.node_states = {c: set(df[c].unique()) for c in df.columns}

        return self

    def _state_to_index(
        self, df: pd.DataFrame, nodes: List[str] = None
    ) -> pd.DataFrame:
        """
        Transforms all values in df to an integer, as defined by the mapping from fit_node_states.

        Args:
            df: data to transform
            nodes: list of nodes to map to index. None means all.

        Returns:
            The transformed dataframe.

        Raises:
            ValueError: if nodes have not been fit, or if column names do not match node names.
        """

        df.is_copy = False
        cols = nodes if nodes else df.columns
        for col in cols:
            df[col] = df[col].map(self._node_states[col])
        df.is_copy = True
        return df

    def fit_cpds(
        self,
        data: pd.DataFrame,
        method: str = "MaximumLikelihoodEstimator",
        bayes_prior: str = None,
        equivalent_sample_size: int = None,
    ) -> "BayesianNetwork":
        """
        Learn conditional probability distributions for all nodes in the Bayesian Network, conditioned on
        their incoming edges (parents).

        Args:
            data: dataframe containing one column per node in the Bayesian Network.
            method: how to fit probabilities. One of:
                    - "MaximumLikelihoodEstimator": fit probabilities using Maximum Likelihood Estimation;
                    - "BayesianEstimator": fit probabilities using Bayesian Parameter Estimation. Use bayes_prior.
            bayes_prior: how to construct the Bayesian prior used by method="BayesianEstimator". One of:
                         - "K2": shorthand for dirichlet where all pseudo_counts are 1
                                 regardless of variable cardinality;
                         - "BDeu": equivalent of using Dirichlet and using uniform 'pseudo_counts' of
                                   `equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
                                   for each node. Use equivelant_sample_size.
            equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.

        Returns:
            self

        Raises:
            ValueError: if an invalid method or bayes_prior is specified.

        """

        state_names = {k: list(v.values()) for k, v in self._node_states.items()}

        transformed_data = data.copy(deep=True)  # type: pd.DataFrame
        transformed_data = self._state_to_index(transformed_data[self.nodes])

        if method == "MaximumLikelihoodEstimator":
            self._model.fit(
                data=transformed_data,
                estimator=MaximumLikelihoodEstimator,
                state_names=state_names,
            )

        elif method == "BayesianEstimator":
            valid_bayes_priors = ["BDeu", "K2"]
            if bayes_prior not in valid_bayes_priors:
                raise ValueError(
                    "unrecognised bayes_prior, please use on of %s"
                    % " ".join(valid_bayes_priors)
                )

            self._model.fit(
                data=transformed_data,
                estimator=BayesianEstimator,
                prior_type=bayes_prior,
                equivalent_sample_size=equivalent_sample_size,
                state_names=state_names,
            )
        else:
            valid_methods = ["MaximumLikelihoodEstimator", "BayesianEstimator"]
            raise ValueError(
                "unrecognised method, please use on of %s" % " ".join(valid_methods)
            )

        return self

    def fit_node_states_and_cpds(
        self,
        data: pd.DataFrame,
        method: str = "MaximumLikelihoodEstimator",
        bayes_prior: str = None,
        equivalent_sample_size: int = None,
    ) -> "BayesianNetwork":
        """
        Call `fit_node_states` and then `fit_cpds`.

        Args:
            data: dataframe containing one column per node in the Bayesian Network.
            method: how to fit probabilities. One of:
                    - "MaximumLikelihoodEstimator": fit probabilities using Maximum Likelihood Estimation;
                    - "BayesianEstimator": fit probabilities using Bayesian Parameter Estimation. Use bayes_prior.
            bayes_prior: how to construct the Bayesian prior used by method="BayesianEstimator". One of:
                         - "K2": shorthand for dirichlet where all pseudo_counts are 1
                                 regardless of variable cardinality;
                         - "BDeu": equivalent of using dirichlet and using uniform 'pseudo_counts' of
                                   `equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
                                   for each node. Use equivelant_sample_size.
            equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.

        Returns:
            self
        """

        return self.fit_node_states(data).fit_cpds(
            data, method, bayes_prior, equivalent_sample_size
        )

    def predict(self, data: pd.DataFrame, node: str) -> pd.DataFrame:
        """
        Predict the state of a node based on some input data, using the Bayesian Network.

        Args:
            data: data to make prediction.
            node: the node to predict.

        Returns:
            A dataframe of predictions, containing a single column name {node}_prediction.
        """

        if all(parent in data.columns for parent in self._model.get_parents(node)):
            return self._predict_from_complete_data(data, node)

        return self._predict_from_incomplete_data(data, node)

    def _predict_from_complete_data(
        self, data: pd.DataFrame, node: str
    ) -> pd.DataFrame:
        """
        Predicts state of node given all parents of node exist within data.
        This method inspects the CPD of node directly, since all parent states are known.
        This avoids traversing the full network to compute marginals.
        This method is fast.

        Args:
            data: data to make prediction.
            node: the node to predict.

        Returns:
            A dataframe of predictions, containing a single column named {node}_prediction.
        """
        transformed_data = data.copy(deep=True)  # type: pd.DataFrame

        parents = sorted(self._model.get_parents(node))
        cpd = self.cpds[node]

        transformed_data[
            "{node}_prediction".format(node=node)
        ] = transformed_data.apply(
            lambda row: cpd[tuple([row[parent] for parent in parents])].idxmax()
            if parents
            else cpd[""].idxmax(),
            axis=1,
        )
        return transformed_data[[node + "_prediction"]]

    def _predict_from_incomplete_data(
        self, data: pd.DataFrame, node: str
    ) -> pd.DataFrame:
        """
        Predicts state of node when some parents of node do not exist within data.
        This method uses the pgmpy predict function, which predicts the most likely state for every node
        that is not contained within data.
        With incomplete data, pgmpy goes beyond parents in the network to determine the most likely predictions.
        This method is slow.

        Args:
            data: data to make prediction.
            node: the node to predict.

        Returns:
            A dataframe of predictions, containing a single column name {node}_prediction.
        """

        transformed_data = deepcopy(data)  # type: pd.DataFrame
        self._state_to_index(transformed_data)

        # transformed_data.is_copy()

        # pgmpy will predict all missing data, so drop column we want to predict
        transformed_data = transformed_data.drop(columns=[node])

        predictions = self._model.predict(transformed_data)[[node]]

        return predictions.rename(columns={node: node + "_prediction"})

    def predict_probability(self, data: pd.DataFrame, node: str) -> pd.DataFrame:
        """
        Predict the probability of each possible state of a node, based on some input data.

        Args:
            data: data to make prediction.
            node: the node to predict probabilities.

        Returns:
            A dataframe of predicted probabilities, contained one column per possible state, named {node}_{state}.
        """

        if all(parent in data.columns for parent in self._model.get_parents(node)):
            return self._predict_probability_from_complete_data(data, node)

        return self._predict_probability_from_incomplete_data(data, node)

    def _predict_probability_from_complete_data(
        self, data: pd.DataFrame, node: str
    ) -> pd.DataFrame:
        """
        Predict the probability of each possible state of a node, based on some input data.
        This method inspects the CPD of node directly, since all parent states are known.
        This avoids traversing the full network to compute marginals.
        This method is fast.

        Args:
            data: data to make prediction.
            node: the node to predict probabilities.

        Returns:
            A dataframe of predicted probabilities, contained one column per possible state, named {node}_{state}.
        """
        transformed_data = data.copy(deep=True)  # type: pd.DataFrame

        parents = sorted(self._model.get_parents(node))
        cpd = self.cpds[node]

        def lookup_probability(row, s):
            """Retrieve probability from CPD"""
            if parents:
                return cpd[tuple([row[parent] for parent in parents])].loc[s]
            return cpd.at[s, ""]

        for state in self.node_states[node]:
            transformed_data[
                "{n}_{s}".format(n=node, s=state)
            ] = transformed_data.apply(
                lambda row, st=state: lookup_probability(row, st), axis=1
            )

        return transformed_data[
            ["{n}_{s}".format(n=node, s=state) for state in self.node_states[node]]
        ]

    def _predict_probability_from_incomplete_data(
        self, data: pd.DataFrame, node: str
    ) -> pd.DataFrame:
        """
        Predict the probability of each possible state of a node, based on some input data.
        This method uses the pgmpy predict_probability function, which predicts the probability
        of every state for every node that is not contained within data.
        With incomplete data, pgmpy goes beyond parents in the network to determine the most likely predictions.
        This method is slow.

        Args:
            data: data to make prediction.
            node: the node to predict probabilities.

        Returns:
            A dataframe of predicted probabilities, contained one column per possible state, named {node}_{state}.
        """
        transformed_data = data.copy(deep=True)  # type: pd.DataFrame
        self._state_to_index(transformed_data)

        # pgmpy will predict all missing data, so drop column we want to predict
        transformed_data = transformed_data.drop(columns=[node])

        probability = self._model.predict_probability(
            transformed_data
        )  # type: pd.DataFrame

        # keep only probabilities for the node we are interested in
        cols = []
        pattern = re.compile("^{node}_[0-9]+$".format(node=node))
        # disabled open pylint issue (https://github.com/PyCQA/pylint/issues/2962)
        for col in probability.columns:  # pylint: disable=E1133
            if pattern.match(col):
                cols.append(col)
        probability = probability[cols]
        probability.columns = cols

        return probability
Example #35
0
if len(sys.argv)%2 == 1:
    print (len(sys.argv))
    print ("usage: python main.py [data_file] ['DPQ'] [value]")
    exit()

pr = {}
for i in range((int(len(sys.argv)/2-1))):
    pr[sys.argv[2 + 2*i]]=int(sys.argv[2 + 2*i + 1])

data = pd.read_csv(sys.argv[1], ",")
data_size = len(data)

# pr = {}
# data = pd.read_csv('data.csv') #"fisrm.csv"
# data_size = len(data)
model = BayesianModel()
list_edges = [('TQ', 'DFT'), ('DPQ', 'DI'), ('C','DI'),('DI','DFT'),('DI','RD'),('DFT','RD'),('RD','DFO'),('OU','DFO')]

model.add_edges_from(list_edges)
model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10)
for edge in model.edges():
    print(edge)
    print("\n")
infer = VariableElimination(model)

nodes = model.nodes()
Distribution = {}

for key in pr.keys():
    Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)]
    nodes.remove(key)
data_head = data.head()
print(data_head)

data_columns = data.columns

def CreateLinks(columns):
    links = []
    
    for i in range(1, len(columns)):
        couple = (columns[i], columns[0])
        links.append(couple)
    
    return links

links = CreateLinks(data_columns)
model = BayesianModel(links)

pe = ParameterEstimator(model, data)

# Print ParameterEstimator unconditional
pe_symptom1 = pe.state_counts('Symptom_1')
print(pe_symptom1)

# Print ParameterEstimator conditional disease
pe_disease = pe.state_counts('Disease')
print(pe_disease)

mle = MaximumLikelihoodEstimator(model, data)

# Print MaximumLikelihoodEstimator unconditional
mle_symptom1 = mle.estimate_cpd('Symptom_1')
Example #37
0
#                                  |     |
#     _____________________        |     |
#    |                     |       |     |
#    |  getting up late(G) |       |     |__________________________
#    |_____________________|       |                               |
#            |                     |                               |
#            |           __________|__________             ________|_________
#            |__________|                     |           |                  |
#                       |  Late for school(L) |           |  Long Queues(Q)  |
#                       |_____________________|           |__________________|
#
#
###################################################################################

from pgmpy.models import BayesianModel
model = BayesianModel()
# Add nodes to empty bayesian model
# ------------------------------------------------------ ( Traffic Accident -> traffic_jam )
# ------------------------------------------------------ ( Heavy Rain -> traffic_jam )
model.add_nodes_from(['rain', 'traffic_jam'])
model.add_edge('rain', 'traffic_jam')
# If add edge without adding node, node will be automatically added
"Example: "
model.add_edge('accident', 'traffic_jam')
model.nodes()
" ['accident', 'rain', 'traffic_jam'] "
model.edges()
" [('rain', 'traffic_jam'), ('accident', 'traffic_jam')}" # two edges showed
# each node has an associated CPD with it.
from pgmpy.factor import TabularCPD
cpd_rain = TabularCPD('rain', 2, [[0.4], [0.6]])
Example #38
0
class TestGibbsSampling(unittest.TestCase):
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = DiscreteFactor(['C', 'B'], [4, 3],
                                   [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)

    def tearDown(self):
        del self.bayesian_model
        del self.markov_model

    @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_bayesian_model',
           autospec=True)
    @patch('pgmpy.models.MarkovChain.__init__', autospec=True)
    def test_init_bayesian_model(self, init, get_kernel):
        model = MagicMock(spec_set=BayesianModel)
        gibbs = GibbsSampling(model)
        init.assert_called_once_with(gibbs)
        get_kernel.assert_called_once_with(gibbs, model)

    @patch('pgmpy.sampling.GibbsSampling._get_kernel_from_markov_model',
           autospec=True)
    def test_init_markov_model(self, get_kernel):
        model = MagicMock(spec_set=MarkovModel)
        gibbs = GibbsSampling(model)
        get_kernel.assert_called_once_with(gibbs, model)

    def test_get_kernel_from_bayesian_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_bayesian_model(self.bayesian_model)
        self.assertListEqual(list(gibbs.variables),
                             self.bayesian_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {
            'diff': 2,
            'intel': 2,
            'grade': 3
        })

    def test_get_kernel_from_markov_model(self):
        gibbs = GibbsSampling()
        gibbs._get_kernel_from_markov_model(self.markov_model)
        self.assertListEqual(list(gibbs.variables), self.markov_model.nodes())
        self.assertDictEqual(gibbs.cardinalities, {
            'A': 2,
            'B': 3,
            'C': 4,
            'D': 2
        })

    def test_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        sample = self.gibbs.sample(start_state, 2)
        self.assertEquals(len(sample), 2)
        self.assertEquals(len(sample.columns), 3)
        self.assertIn('diff', sample.columns)
        self.assertIn('intel', sample.columns)
        self.assertIn('grade', sample.columns)
        self.assertTrue(set(sample['diff']).issubset({0, 1}))
        self.assertTrue(set(sample['intel']).issubset({0, 1}))
        self.assertTrue(set(sample['grade']).issubset({0, 1, 2}))

    @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True)
    def test_sample_less_arg(self, random_state):
        self.gibbs.state = None
        random_state.return_value = [
            State('diff', 0),
            State('intel', 0),
            State('grade', 0)
        ]
        sample = self.gibbs.sample(size=2)
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(sample), 2)

    def test_generate_sample(self):
        start_state = [State('diff', 0), State('intel', 0), State('grade', 0)]
        gen = self.gibbs.generate_sample(start_state, 2)
        samples = [sample for sample in gen]
        self.assertEqual(len(samples), 2)
        self.assertEqual(
            {samples[0][0].var, samples[0][1].var, samples[0][2].var},
            {'diff', 'intel', 'grade'})
        self.assertEqual(
            {samples[1][0].var, samples[1][1].var, samples[1][2].var},
            {'diff', 'intel', 'grade'})

    @patch("pgmpy.sampling.GibbsSampling.random_state", autospec=True)
    def test_generate_sample_less_arg(self, random_state):
        self.gibbs.state = None
        gen = self.gibbs.generate_sample(size=2)
        samples = [sample for sample in gen]
        random_state.assert_called_once_with(self.gibbs)
        self.assertEqual(len(samples), 2)
Example #39
0
def create_model():
    edges_list = [
        ('cebolla', 'distensionAbdominal'), ('cebolla', 'borborigmos'),
        ('cafe', 'reflujoGastroesofagico'), ('cafe', 'dolorAbdominal'),
        ('frijol', 'distensionAbdominal'), ('frijol', 'flatulencia'),
        ('aji', 'sintomasDispepsicos'), ('aji', 'dolorAbdominal'),
        ('leche', 'diarrea'), ('leche', 'intolerancia'),
        ('tomate', 'intolerancia'), ('te', 'estrenimiento'),
        ('manzana', 'borborigmos')
    ]
    model = BayesianModel(edges_list)
    nodes = {
        'aji': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'apio': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'fresa': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'manzana': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'arroz': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'cafe': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'cebolla': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'res': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'huevo': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'leche': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'lechuga': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'papa': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'platano': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'pollo': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'te': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'tomate': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'frijol': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'distensionAbdominal': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'flatulencia': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'dolorAbdominal': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'estrenimiento': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'diarrea': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'intolerancia': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
        'borborigmos': {
            'States': {'bajo', 'medio', 'alto'},
            'type': 'discrete'
        },
    }
    model.add_nodes_from(nodes)

    cpds = [{
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'apio': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'fresa': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'manzana': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'arroz': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'cafe': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'cebolla': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'res': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'huevo': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'leche': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'lechuga': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'papa': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'platano': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'aji': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'pollo': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'te': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'tomate': []
        }
    }, {
        'Values': np.array([[0.3], [0.4], [0.3]]),
        'Variables': {
            'frijol': []
        }
    }]

    tabular_cpds = []
    for cpd in cpds:
        var = list(cpd['Variables'].keys())[0]
        evidence = cpd['Variables'][var]
        values = cpd['Values']
        states = len(nodes[var]['States'])
        evidence_card = [
            len(nodes[evidence_var]['States']) for evidence_var in evidence
        ]
        tabular_cpds.append(
            TabularCPD(var, states, values, evidence, evidence_card))

    model.add_cpds(*tabular_cpds)

    cpd_distension = TabularCPD(
        variable='distensionAbdominal',
        variable_card=3,
        values=[[0.6, 0.3, 0.1, 0.65, 0.3, 0.05, 0.65, 0.3, 0.05],
                [0.2, 0.3, 0.5, 0.25, 0.4, 0.35, 0.05, 0.3, 0.65],
                [0.2, 0.4, 0.4, 0.1, 0.3, 0.60, 0.3, 0.4, 0.3]],
        evidence=['cebolla', 'frijol'],
        evidence_card=[3, 3])
    cpd_sintDisp = TabularCPD(variable='sintomasDispepsicos',
                              variable_card=3,
                              values=[[0.15, 0.3, 0.55], [0.65, 0.3, 0.05],
                                      [0.2, 0.4, 0.4]],
                              evidence=['aji'],
                              evidence_card=[3])
    cpd_intolerancia = TabularCPD(
        variable='intolerancia',
        variable_card=3,
        values=[[0.6, 0.3, 0.1, 0.65, 0.3, 0.05, 0.65, 0.3, 0.05],
                [0.2, 0.3, 0.5, 0.25, 0.4, 0.35, 0.05, 0.3, 0.65],
                [0.2, 0.4, 0.4, 0.1, 0.3, 0.60, 0.3, 0.4, 0.3]],
        evidence=['leche', 'tomate'],
        evidence_card=[3, 3])
    cpd_borborigmos = TabularCPD(
        variable='borborigmos',
        variable_card=3,
        values=[[0.6, 0.3, 0.1, 0.65, 0.3, 0.05, 0.65, 0.3, 0.05],
                [0.2, 0.3, 0.5, 0.25, 0.4, 0.35, 0.05, 0.3, 0.65],
                [0.2, 0.4, 0.4, 0.1, 0.3, 0.60, 0.3, 0.4, 0.3]],
        evidence=['manzana', 'cebolla'],
        evidence_card=[3, 3])
    cpd_estren = TabularCPD(variable='estrenimiento',
                            variable_card=3,
                            values=[[0.15, 0.3, 0.55], [0.65, 0.3, 0.05],
                                    [0.2, 0.4, 0.4]],
                            evidence=['te'],
                            evidence_card=[3])
    cpd_reflujo = TabularCPD(variable='reflujoGastroesofagico',
                             variable_card=3,
                             values=[[0.15, 0.3, 0.55], [0.65, 0.3, 0.05],
                                     [0.2, 0.4, 0.4]],
                             evidence=['cafe'],
                             evidence_card=[3])
    cpd_flat = TabularCPD(variable='flatulencia',
                          variable_card=3,
                          values=[[0.15, 0.3, 0.55], [0.65, 0.3, 0.05],
                                  [0.2, 0.4, 0.4]],
                          evidence=['frijol'],
                          evidence_card=[3])
    cpd_dolor = TabularCPD(
        variable='dolorAbdominal',
        variable_card=3,
        values=[[0.6, 0.3, 0.1, 0.65, 0.3, 0.05, 0.65, 0.3, 0.05],
                [0.2, 0.3, 0.5, 0.25, 0.4, 0.35, 0.05, 0.3, 0.65],
                [0.2, 0.4, 0.4, 0.1, 0.3, 0.60, 0.3, 0.4, 0.3]],
        evidence=['cafe', 'aji'],
        evidence_card=[3, 3])

    cpd_diarrea = TabularCPD(variable='diarrea',
                             variable_card=3,
                             values=[[0.15, 0.3, 0.55], [0.65, 0.3, 0.05],
                                     [0.2, 0.4, 0.4]],
                             evidence=['leche'],
                             evidence_card=[3])

    model.add_cpds(cpd_distension, cpd_sintDisp, cpd_intolerancia, cpd_estren,
                   cpd_borborigmos, cpd_reflujo, cpd_dolor, cpd_flat,
                   cpd_diarrea)
    model.check_model()
    return model
Example #40
0
 def setUp(self):
     self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                             ('i', 's')])
Example #41
0
 def test_class_init_with_data_string(self):
     self.g = BayesianModel([('a', 'b'), ('b', 'c')])
     self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
     self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                          [['a', 'b'], ['b', 'c']])
Example #42
0
 def test_update_node_parents_bm_constructor(self):
     self.g = BayesianModel([('a', 'b'), ('b', 'c')])
     self.assertListEqual(self.g.predecessors('a'), [])
     self.assertListEqual(self.g.predecessors('b'), ['a'])
     self.assertListEqual(self.g.predecessors('c'), ['b'])
Example #43
0
 def setUp(self):
     self.graph = BayesianModel()
Example #44
0
class TargetBayesNet:
    def __init__(self, model_path=""):
        """
        Class for generating dataset from pre-defined Bayesian Network.
        """
        # Starting with defining the network structure
        # Creating the model as well as the structure (arcs)
        import numpy as np
        from pgmpy.models import BayesianModel

        # Define the directed connection of BN
        edges_list = [(Contact, Covid), (Covid, None_Symptom),
                      (Covid, Dry_Cough), (Covid, Fever),
                      (Covid, Loss_of_taste_or_smell), (Covid, Diarrhea),
                      (Covid, Difficulty_in_Breathing), (Covid, Sore_Throat),
                      (None_Symptom, Dry_Cough), (None_Symptom, Fever),
                      (None_Symptom, Loss_of_taste_or_smell),
                      (None_Symptom, Diarrhea),
                      (None_Symptom, Difficulty_in_Breathing),
                      (None_Symptom, Sore_Throat),
                      (Dry_Cough, Difficulty_in_Breathing),
                      (Dry_Cough, Sore_Throat), (Dry_Cough, Tiredness),
                      (Fever, Tiredness)]

        # Initialize BN with the connection of directed edgse
        self.__covid_model = BayesianModel(edges_list)

        # Defining the parameters.
        # Specifying the CPD for each node
        # http://pgmpy.org/factors.html#module-pgmpy.factors.discrete.CPD
        # TabularCPD values: for example(3 variables): this node and 2 parents
        # First-dimension: this node
        # second-dimension: cartesian-product of the values from parents

        from pgmpy.factors.discrete import TabularCPD

        # Having contact with confirmed patients
        cpd_Contact = TabularCPD(variable=Contact,
                                 variable_card=2,
                                 values=[[0.9], [0.1]])

        # Conditional probability of containing Covid-19 (assumption: measuring from the patients who asked for test in hospital)
        cpd_Covid = TabularCPD(variable=Covid,
                               variable_card=2,
                               values=[[0.7, 0.4], [0.3, 0.6]],
                               evidence=[Contact],
                               evidence_card=[2])

        cpd_None = TabularCPD(variable=None_Symptom,
                              variable_card=2,
                              values=[[0.1, 0.7], [0.9, 0.3]],
                              evidence=[Covid],
                              evidence_card=[2])

        cpd_Dry = TabularCPD(variable=Dry_Cough,
                             variable_card=2,
                             values=[[0.95, 0.98, 0.3, 0.99],
                                     [0.05, 0.02, 0.7, 0.01]],
                             evidence=[Covid, None_Symptom],
                             evidence_card=[2, 2])

        cpd_Fever = TabularCPD(variable=Fever,
                               variable_card=2,
                               values=[[0.95, 0.99, 0.15, 0.99],
                                       [0.05, 0.01, 0.85, 0.01]],
                               evidence=[Covid, None_Symptom],
                               evidence_card=[2, 2])

        cpd_Loss = TabularCPD(variable=Loss_of_taste_or_smell,
                              variable_card=2,
                              values=[[0.999, 0.999, 0.7, 0.99],
                                      [0.001, 0.001, 0.3, 0.01]],
                              evidence=[Covid, None_Symptom],
                              evidence_card=[2, 2])

        cpd_Diarrhea = TabularCPD(variable=Diarrhea,
                                  variable_card=2,
                                  values=[[0.85, 0.99, 0.75, 0.99],
                                          [0.15, 0.01, 0.25, 0.01]],
                                  evidence=[Covid, None_Symptom],
                                  evidence_card=[2, 2])

        cpd_Diff = TabularCPD(
            variable=Difficulty_in_Breathing,
            variable_card=2,
            values=[[0.999, 0.95, 0.999, 0.999, 0.8, 0.6, 0.999, 0.999],
                    [0.001, 0.05, 0.001, 0.001, 0.2, 0.4, 0.001, 0.001]],
            evidence=[Covid, None_Symptom, Dry_Cough],
            evidence_card=[2, 2, 2])

        cpd_Sore = TabularCPD(
            variable=Sore_Throat,
            variable_card=2,
            values=[[0.95, 0.7, 0.999, 0.999, 0.8, 0.3, 0.999, 0.999],
                    [0.05, 0.3, 0.001, 0.001, 0.2, 0.7, 0.001, 0.001]],
            evidence=[Covid, None_Symptom, Dry_Cough],
            evidence_card=[2, 2, 2])

        cpd_Tiredness = TabularCPD(variable=Tiredness,
                                   variable_card=2,
                                   values=[[0.95, 0.35, 0.5, 0.05],
                                           [0.05, 0.65, 0.5, 0.95]],
                                   evidence=[Dry_Cough, Fever],
                                   evidence_card=[2, 2])

        # cpd_cancer = TabularCPD(variable='', variable_card=2,
        #                         values=[[0.03, 0.05, 0.001, 0.02],
        #                                 [0.97, 0.95, 0.999, 0.98]],
        #                         evidence=['', ''],
        #                         evidence_card=[2, 2])

        # Associating the parameters with the model structure.
        self.__covid_model.add_cpds(cpd_Contact, cpd_Covid, cpd_None, cpd_Dry,
                                    cpd_Fever, cpd_Loss, cpd_Diarrhea,
                                    cpd_Diff, cpd_Sore, cpd_Tiredness)

        # Checking if the cpds are valid for the model.
        print("Bayesian Network generated successfully or not: ",
              self.__covid_model.check_model())

        graph_file = ""
        model_file = ""
        if (model_path != ""):
            graph_file = model_path + "/"
            model_file = model_path + "/"
        graph_file += "targetBN"
        model_file += "targetBN.bif"

        # process.saveGraphToPDF(graph_file, list(self.__covid_model.edges()), True)
        process.saveModel(self.__covid_model, model_file)

        # Doing some simple queries on the network
        # Check if there is active trail between the nodes
        # self.__covid_model.is_active_trail('', '')
        # self.__covid_model.is_active_trail('', '', observed=[])
        # self.__covid_model.local_independencies('')

        # Checking all the independencies
        # print(self.__covid_model.get_independencies())

    def getDataset(self, size=1000, return_type='DataFrame'):
        """
        Method: retrun a set of samples generated from Bayesian Network. (Simply using forward-sampling)

        Parameters
        ----------
        size: size of the dataset to be generated (default: 1000)

        return_type: return type of dataset (default: panda.DataFrame)

        """
        # For more info, see: likelihood_weighted, rejection or Gibb sampling
        from pgmpy.sampling import BayesianModelSampling

        inference = BayesianModelSampling(self.__covid_model)
        dataset = inference.forward_sample(size=size, return_type=return_type)

        return dataset
Example #45
0
    def __init__(self, model_path=""):
        """
        Class for generating dataset from pre-defined Bayesian Network.
        """
        # Starting with defining the network structure
        # Creating the model as well as the structure (arcs)
        import numpy as np
        from pgmpy.models import BayesianModel

        # Define the directed connection of BN
        edges_list = [(Contact, Covid), (Covid, None_Symptom),
                      (Covid, Dry_Cough), (Covid, Fever),
                      (Covid, Loss_of_taste_or_smell), (Covid, Diarrhea),
                      (Covid, Difficulty_in_Breathing), (Covid, Sore_Throat),
                      (None_Symptom, Dry_Cough), (None_Symptom, Fever),
                      (None_Symptom, Loss_of_taste_or_smell),
                      (None_Symptom, Diarrhea),
                      (None_Symptom, Difficulty_in_Breathing),
                      (None_Symptom, Sore_Throat),
                      (Dry_Cough, Difficulty_in_Breathing),
                      (Dry_Cough, Sore_Throat), (Dry_Cough, Tiredness),
                      (Fever, Tiredness)]

        # Initialize BN with the connection of directed edgse
        self.__covid_model = BayesianModel(edges_list)

        # Defining the parameters.
        # Specifying the CPD for each node
        # http://pgmpy.org/factors.html#module-pgmpy.factors.discrete.CPD
        # TabularCPD values: for example(3 variables): this node and 2 parents
        # First-dimension: this node
        # second-dimension: cartesian-product of the values from parents

        from pgmpy.factors.discrete import TabularCPD

        # Having contact with confirmed patients
        cpd_Contact = TabularCPD(variable=Contact,
                                 variable_card=2,
                                 values=[[0.9], [0.1]])

        # Conditional probability of containing Covid-19 (assumption: measuring from the patients who asked for test in hospital)
        cpd_Covid = TabularCPD(variable=Covid,
                               variable_card=2,
                               values=[[0.7, 0.4], [0.3, 0.6]],
                               evidence=[Contact],
                               evidence_card=[2])

        cpd_None = TabularCPD(variable=None_Symptom,
                              variable_card=2,
                              values=[[0.1, 0.7], [0.9, 0.3]],
                              evidence=[Covid],
                              evidence_card=[2])

        cpd_Dry = TabularCPD(variable=Dry_Cough,
                             variable_card=2,
                             values=[[0.95, 0.98, 0.3, 0.99],
                                     [0.05, 0.02, 0.7, 0.01]],
                             evidence=[Covid, None_Symptom],
                             evidence_card=[2, 2])

        cpd_Fever = TabularCPD(variable=Fever,
                               variable_card=2,
                               values=[[0.95, 0.99, 0.15, 0.99],
                                       [0.05, 0.01, 0.85, 0.01]],
                               evidence=[Covid, None_Symptom],
                               evidence_card=[2, 2])

        cpd_Loss = TabularCPD(variable=Loss_of_taste_or_smell,
                              variable_card=2,
                              values=[[0.999, 0.999, 0.7, 0.99],
                                      [0.001, 0.001, 0.3, 0.01]],
                              evidence=[Covid, None_Symptom],
                              evidence_card=[2, 2])

        cpd_Diarrhea = TabularCPD(variable=Diarrhea,
                                  variable_card=2,
                                  values=[[0.85, 0.99, 0.75, 0.99],
                                          [0.15, 0.01, 0.25, 0.01]],
                                  evidence=[Covid, None_Symptom],
                                  evidence_card=[2, 2])

        cpd_Diff = TabularCPD(
            variable=Difficulty_in_Breathing,
            variable_card=2,
            values=[[0.999, 0.95, 0.999, 0.999, 0.8, 0.6, 0.999, 0.999],
                    [0.001, 0.05, 0.001, 0.001, 0.2, 0.4, 0.001, 0.001]],
            evidence=[Covid, None_Symptom, Dry_Cough],
            evidence_card=[2, 2, 2])

        cpd_Sore = TabularCPD(
            variable=Sore_Throat,
            variable_card=2,
            values=[[0.95, 0.7, 0.999, 0.999, 0.8, 0.3, 0.999, 0.999],
                    [0.05, 0.3, 0.001, 0.001, 0.2, 0.7, 0.001, 0.001]],
            evidence=[Covid, None_Symptom, Dry_Cough],
            evidence_card=[2, 2, 2])

        cpd_Tiredness = TabularCPD(variable=Tiredness,
                                   variable_card=2,
                                   values=[[0.95, 0.35, 0.5, 0.05],
                                           [0.05, 0.65, 0.5, 0.95]],
                                   evidence=[Dry_Cough, Fever],
                                   evidence_card=[2, 2])

        # cpd_cancer = TabularCPD(variable='', variable_card=2,
        #                         values=[[0.03, 0.05, 0.001, 0.02],
        #                                 [0.97, 0.95, 0.999, 0.98]],
        #                         evidence=['', ''],
        #                         evidence_card=[2, 2])

        # Associating the parameters with the model structure.
        self.__covid_model.add_cpds(cpd_Contact, cpd_Covid, cpd_None, cpd_Dry,
                                    cpd_Fever, cpd_Loss, cpd_Diarrhea,
                                    cpd_Diff, cpd_Sore, cpd_Tiredness)

        # Checking if the cpds are valid for the model.
        print("Bayesian Network generated successfully or not: ",
              self.__covid_model.check_model())

        graph_file = ""
        model_file = ""
        if (model_path != ""):
            graph_file = model_path + "/"
            model_file = model_path + "/"
        graph_file += "targetBN"
        model_file += "targetBN.bif"

        # process.saveGraphToPDF(graph_file, list(self.__covid_model.edges()), True)
        process.saveModel(self.__covid_model, model_file)
Example #46
0
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample(
            [State('A', 1), State('J', 1),
             State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({1}))
        self.assertTrue(set(sample.J).issubset({1}))
        self.assertTrue(set(sample.R).issubset({1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    @patch("pgmpy.sampling.BayesianModelSampling.forward_sample",
           autospec=True)
    def test_rejection_sample_less_arg(self, forward_sample):
        sample = self.sampling_inference.rejection_sample(size=5)
        forward_sample.assert_called_once_with(self.sampling_inference, 5)
        self.assertEqual(sample, forward_sample.return_value)

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample(
            [State('A', 0), State('J', 1),
             State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model
Example #47
0
data = usecaseData
# TODO: Option to later concat with white noise data (like in CarDemo Manual from CausalnexStudy)
data

# %% markdown [markdown]
# ## Step 2: Create Network Structure

# %% codecell

carModel: BayesianModel = BayesianModel([(Exertion.var, WorkCapacity.var),
                                         (Experience.var, WorkCapacity.var),
                                         (Training.var, WorkCapacity.var),
                                         (WorkCapacity.var, Absenteeism.var),
                                         (Time.var, WorkCapacity.var),
                                         (Time.var, Absenteeism.var),
                                         (Time.var, Exertion.var),
                                         (Time.var, Experience.var),
                                         (Time.var, Training.var),
                                         (Process.var, Tool.var),
                                         (Tool.var, Injury.var),
                                         (Process.var, Injury.var),
                                         (Process.var, Absenteeism.var),
                                         (Injury.var, Absenteeism.var)])

drawGraph(model=carModel)

# %% markdown [markdown]
# ## Step 3: Estimate CPDs
# %% codecell
from pgmpy.estimators import BayesianEstimator

#est: BayesianEstimator = BayesianEstimator(model = carModel, data = data)
Example #48
0
class TestBayesianModelCPD(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'),
                                ('i', 's')])

    def test_active_trail_nodes(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's'])

    def test_active_trail_nodes_args(self):
        self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's'])
        self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s'])
        self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's'])

    def test_is_active_trail_triplets(self):
        self.assertTrue(self.G.is_active_trail('d', 'l'))
        self.assertTrue(self.G.is_active_trail('g', 's'))
        self.assertFalse(self.G.is_active_trail('d', 'i'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='g'))
        self.assertFalse(self.G.is_active_trail('d', 'l', observed='g'))
        self.assertFalse(self.G.is_active_trail('i', 'l', observed='g'))
        self.assertTrue(self.G.is_active_trail('d', 'i', observed='l'))
        self.assertFalse(self.G.is_active_trail('g', 's', observed='i'))

    def test_is_active_trail(self):
        self.assertFalse(self.G.is_active_trail('d', 's'))
        self.assertTrue(self.G.is_active_trail('s', 'l'))
        self.assertTrue(self.G.is_active_trail('d', 's', observed='g'))
        self.assertFalse(self.G.is_active_trail('s', 'l', observed='g'))

    def test_is_active_trail_args(self):
        self.assertFalse(self.G.is_active_trail('s', 'l', 'i'))
        self.assertFalse(self.G.is_active_trail('s', 'l', 'g'))
        self.assertTrue(self.G.is_active_trail('d', 's', 'l'))
        self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l']))

    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')

    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')

    def test_add_single_cpd(self):
        from pgmpy.factors import TabularCPD
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_s)
        self.assertListEqual(self.G.get_cpds(), [cpd_s])

    def test_add_multiple_cpds(self):
        from pgmpy.factors import TabularCPD
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)

    def tearDown(self):
        del self.G
Example #49
0
import pandas as pd

from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

dataset = pd.read_csv('dataset.csv')

model = BayesianModel([('HD', 'AGE'), ('HD', 'GENDER'), ('CP', 'AGE'),
                       ('CHOLESTEROL', 'AGE'), ('HD', 'BP'), ('GENDER', 'CP')])

model.fit(dataset, estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')

HeartDisease_infer = VariableElimination(model)
# for cpd in model.get_cpds():
#     print("CPD of {variable}:".format(variable=cpd.variable))
#     print(cpd)
#     print(model.check_model())
print('\n1.Probability of HeartDisease given Gender = Female')
q = HeartDisease_infer.query(variables=['HD'], evidence={'GENDER': 1})
print(q['HD'])

print('\n2. Probability of HeartDisease given BP = Low')
q = HeartDisease_infer.query(variables=['HD'], evidence={'BP': 1})
print(q['HD'])
Example #50
0
 def setUp(self):
     self.G = BayesianModel()
Example #51
0
# Read the attributes
lines = list(csv.reader(open('heart.csv', 'r')))
attributes = lines[0]
# Read Cleveland Heart disease data
heartDisease = pd.read_csv('heart.csv', names=attributes)
heartDisease = heartDisease.replace('?', np.nan)
# Display the data
print('Few examples from the dataset are given below')
print(heartDisease.head())
print('\nAttributes and datatypes')
print(heartDisease.dtypes)
# Model Bayesian Network
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'),
                       ('sex', 'trestbps'), ('sex', 'trestbps'),
                       ('exang', 'trestbps'), ('trestbps', 'heartdisease'),
                       ('fbs', 'heartdisease'), ('heartdisease', 'restecg'),
                       ('heartdisease', 'thalach'), ('heartdisease', 'chol')])
# Learning CPDs using Maximum Likelihood Estimators
print('\nLearning CPDs using Maximum Likelihood Estimators...')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
# Deducing with Bayesian Network
print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
print('\n1.Probability of HeartDisease given Age=20')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 40})
print(q['heartdisease'])
print('\n2. Probability of HeartDisease given chol (Cholestoral) =100')
q = HeartDisease_infer.query(variables=['heartdisease'],
                             evidence={
                                 'sex': 0,
from src.utils.GraphvizUtil import *
from src.utils.NetworkUtil import *
from src.utils.DataUtil import *
from src.utils.GenericUtil import *

import pandas as pd
from pandas.core.frame import DataFrame

# %% codecell
# TODO: create "getallcausalchainswithinmodel" function , where we traverse each path and get all three-way causal chains
# TODO do the same for common parent and common effect models
model: BayesianModel = BayesianModel([('X', 'F'), ('F', 'Y'), ('C', 'X'),
                                      ('A', 'C'), ('A', 'D'), ('D', 'X'),
                                      ('D', 'Y'), ('E', 'R'), ('F', 'J'),
                                      ('B', 'D'), ('B', 'E'), ('A', 'Y'),
                                      ('O', 'B'), ('E', 'Y'), ('X', 'E'),
                                      ('D', 'E'), ('B', 'X'), ('B', 'F'),
                                      ('E', 'F'), ('C', 'F'), ('C', 'E'),
                                      ('C', 'Y')])
drawGraph(model)

# %% codecell
# STEP 1: get all causal chains
# STEP 2: get the nodes that go in the observed / evidence in order to  nullify active trails (the  middle node + the backdoors from getobservedvars function)

edges: List[Tuple[Name, Name]] = list(iter(model.edges()))

roots: List[Name] = model.get_roots()
roots
leaves: List[Name] = model.get_leaves()
leaves
Example #53
0
class TaxiGenericModel(GymEnvironment, CausalEnvironment):

    _observability: str

    def __init__(self, build_causal_model:bool=False, observability:str='full'):
        self._env = gym.make('Taxi-v3')
        self._observability = observability
        if build_causal_model:
            self.build_causal_model()
        self.reset()

    def run_step(self, action, *args, **kwargs):
        info = {'wins': 0}
        next_state, reward, done, _ = self._env.step(action)
        if done and reward == 20:
            info['wins'] = 1
        return next_state, reward, done, info

    def reset(self, *args, **kwargs) -> int:
        return self._env.reset()

    def decode(self, state):
        return tuple(self._env.decode(state))

    ###############################################
    # Causal section
    ###############################################

    def build_causal_model(self):
        #################################
        # Defining the model structure
        #################################
        # PP = Passenger Position
        # CP = Cab Position
        # DP = Destination Position of the passenger
        # onPP = the cab is on the Passenger Position
        # onDP = the cab is on the Destination Position
        # inC = passenger is in the cab

        self._causal_model = BayesianModel(
            [
                ('PP', 'onPP'),
                ('CP', 'onPP'),
                ('CP', 'onDP'),
                ('DP', 'onDP'),
            
                ('inC', 'X'),
                ('onPP', 'X'),
                ('onDP', 'X'),

                ('onPP', 'Y'),
                ('onDP', 'Y'),
                ('inC', 'Y'),
                ('X', 'Y'),
            ]
        )

        # Defining individual CPDs.
        cpd_PP = TabularCPD(
            variable='PP', 
            variable_card=25, 
            values=[[0.04] for _ in range(0,25)], #All states have the same probability
            state_names={'PP': ['state ' + str(i) for i in range(0,25)]}
            )
        cpd_CP = TabularCPD(
            variable='CP', 
            variable_card=25, 
            values=[[0.04] for _ in range(0,25)], #All states have the same probability
            state_names={'CP': ['cab state ' + str(i) for i in range(0,25)]}
            )        
        cpd_DP = TabularCPD(
            variable='DP',
            variable_card=25,
            values=[
                [0.25], [0], [0], [0], [0.25],
                [0], [0], [0], [0], [0],
                [0], [0], [0], [0], [0],
                [0], [0], [0], [0], [0],
                [0.25], [0], [0], [0.25], [0],        
            ],
            state_names={'DP': ['destination ' + str(i) for i in range(0,25)]}
            )
        cpd_onPP = TabularCPD(
            variable='onPP',
            variable_card=2,
            values=[
                np.ndarray.flatten(np.ones(25) - np.diag(np.ones(25))),
                np.ndarray.flatten(np.diag(np.ones(25)))
            ],
            evidence=['PP', 'CP'],
            evidence_card=[25, 25],
            state_names={
                'onPP': ['False', 'True'],
                'PP': ['state ' + str(i) for i in range(0,25)],
                'CP': ['cab state ' + str(i) for i in range(0,25)]
                }
            ) 
        cpd_onDP = TabularCPD(
            variable='onDP', 
            variable_card=2, 
            values=[
                np.ndarray.flatten(np.ones(25) - np.diag(np.ones(25))),
                np.ndarray.flatten(np.diag(np.ones(25)))
            ],
            evidence=['DP', 'CP'],
            evidence_card=[25, 25],
            state_names={
                'onDP': ['False', 'True'], 
                'DP': ['destination ' + str(i) for i in range(0,25)],
                'CP': ['cab state ' + str(i) for i in range(0,25)]
                }
            )
        cpd_inC = TabularCPD(
            variable='inC', 
            variable_card=2, 
            values=[[0.5], [0.5]],
            state_names={'inC': ['False', 'True']}
            )       
        cpd_X = TabularCPD(
            variable='X', 
            variable_card=2, 
            values=[
                [0.5, 0.5, 0.5, 0, 1, 0.5, 1, 0], 
                [0.5, 0.5, 0.5, 1, 0, 0.5, 0, 1]
            ],
            evidence=['onPP', 'onDP', 'inC'],
            evidence_card=[2, 2, 2],
            state_names={
                'X': ['Pickup', 'Dropoff'],
                'onPP': ['False', 'True'],
                'onDP': ['False', 'True'],
                'inC': ['False', 'True']
                }
            )
        cpd_Y = TabularCPD(
            variable='Y',
            variable_card=2,
            values=[
                [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], 
                [0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
            ],
            evidence=['X', 'inC', 'onDP', 'onPP'],
            evidence_card=[2, 2, 2, 2],
            state_names={
                'Y': ['False', 'True'],
                'X': ['Pickup', 'Dropoff'],
                'inC': ['False', 'True'],
                'onDP': ['False', 'True'],
                'onPP': ['False', 'True']
                }
                )

        # Associating the CPDs with the network
        self._causal_model.add_cpds(cpd_PP, cpd_DP, cpd_CP, cpd_onPP, cpd_onDP, cpd_inC, cpd_X, cpd_Y)

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
        # defined and sum to 1.
        self._causal_model.check_model()

    def get_causal_model(self):
        return self._causal_model

    def get_target(self):
        return 'Y'

    def get_evidence(self, state):
        # PP = Passenger Position
        # CP = Cab Position
        # DP = Destination Position of the passenger
        # onPP = the cab is on the Passenger Position
        # onDP = the cab is on the Destination Position
        # inC = passenger is in the cab

        # (taxi_row, taxi_col, passenger_location, destination)
        state = self.decode(state)

        inC = {
            0: {'inC': 'False'}, 
            1: {'inC': 'False'}, 
            2: {'inC': 'False'}, 
            3: {'inC': 'False'}, 
            4: {'inC': 'True'}
            }
        pp = {
            0 : {'PP' : 'state ' + str(0)},
            1 : {'PP' : 'state ' + str(4)},
            2 : {'PP' : 'state ' + str(20)},
            3 : {'PP' : 'state ' + str(23)},
            4 : {'PP' : 'state ' + str(state[0]*5 + state[1])}}
        pd = {
            0 : {'DP' : 'destination ' + str(0)},
            1 : {'DP' : 'destination ' + str(4)},
            2 : {'DP' : 'destination ' + str(20)},
            3 : {'DP' : 'destination ' + str(23)}}

        evidence = {}
        # EVIDENZE inC
        evidence.update(inC[state[2]])
        
        # EVIDENCE onPP, onDP
        if state[0]*5 + state[1] == 0:
            if state[2] == 0:
                evidence.update({'onPP' : 'True'})
            else:
                evidence.update({'onPP' : 'False'})
            if state[3] == 0:
                evidence.update({'onDP' : 'True'})
            else:
                evidence.update({'onDP' : 'False'})
        elif state[0]*5 + state[1] == 4:
            if state[2] == 1:
                evidence.update({'onPP' : 'True'})
            else:
                evidence.update({'onPP' : 'False'})
            if state[3] == 1:
                evidence.update({'onDP' : 'True'})
            else:
                evidence.update({'onDP' : 'False'})
        elif state[0]*5 + state[1] == 20:
            if state[2] == 2:
                evidence.update({'onPP' : 'True'})
            else:
                evidence.update({'onPP' : 'False'})
            if state[3] == 2:
                evidence.update({'onDP' : 'True'})
            else:
                evidence.update({'onDP' : 'False'})
        elif state[0]*5 + state[1] == 23:
            if state[2] == 3:
                evidence.update({'onPP' : 'True'})
            else:
                evidence.update({'onPP' : 'False'})           
            if state[3] == 3:
                evidence.update({'onDP' : 'True'})
            else:
                evidence.update({'onDP' : 'False'})

        if self._observability == 'full':
            # EVIDENCE CP, PP, DP
            evidence = {'CP' : 'cab state ' + str(state[0]*5 + state[1])}
            evidence.update(pp[state[2]])        
            evidence.update(pd[state[3]])

        return evidence

    def get_action(self):
        return 'X'

    def get_action_values(self):
        return ['Pickup', 'Dropoff']

    def get_good_target_value(self):
        return 'True'

    def causal_action_to_env_action(self, causal_action):
        if causal_action == 'Pickup':
            return 4
        elif causal_action == 'Dropoff':
            return 5

    def get_agent_intent(self):
        return 0
Example #54
0
    def setUp(self):
        self.model_disconnected = BayesianModel()
        self.model_disconnected.add_nodes_from(['A', 'B', 'C', 'D', 'E'])

        self.model_connected = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
Example #55
0
class TestBaseModelCreation(unittest.TestCase):
    def setUp(self):
        self.G = BayesianModel()

    def test_class_init_without_data(self):
        self.assertIsInstance(self.G, nx.DiGraph)

    def test_class_init_with_data_string(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.g.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.g.edges()),
                             [['a', 'b'], ['b', 'c']])

    def test_class_init_with_data_nonstring(self):
        BayesianModel([(1, 2), (2, 3)])

    def test_add_node_string(self):
        self.G.add_node('a')
        self.assertListEqual(self.G.nodes(), ['a'])

    def test_add_node_nonstring(self):
        self.G.add_node(1)

    def test_add_nodes_from_string(self):
        self.G.add_nodes_from(['a', 'b', 'c', 'd'])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c', 'd'])

    def test_add_nodes_from_non_string(self):
        self.G.add_nodes_from([1, 2, 3, 4])

    def test_add_edge_string(self):
        self.G.add_edge('d', 'e')
        self.assertListEqual(sorted(self.G.nodes()), ['d', 'e'])
        self.assertListEqual(self.G.edges(), [('d', 'e')])
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edge('a', 'b')
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['d', 'e']])

    def test_add_edge_nonstring(self):
        self.G.add_edge(1, 2)

    def test_add_edge_selfloop(self):
        self.assertRaises(ValueError, self.G.add_edge, 'a', 'a')

    def test_add_edge_result_cycle(self):
        self.G.add_edges_from([('a', 'b'), ('a', 'c')])
        self.assertRaises(ValueError, self.G.add_edge, 'c', 'a')

    def test_add_edges_from_string(self):
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(sorted(self.G.nodes()), ['a', 'b', 'c'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             [['a', 'b'], ['b', 'c']])
        self.G.add_nodes_from(['d', 'e', 'f'])
        self.G.add_edges_from([('d', 'e'), ('e', 'f')])
        self.assertListEqual(sorted(self.G.nodes()),
                             ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertListEqual(hf.recursive_sorted(self.G.edges()),
                             hf.recursive_sorted([('a', 'b'), ('b', 'c'),
                                                  ('d', 'e'), ('e', 'f')]))

    def test_add_edges_from_nonstring(self):
        self.G.add_edges_from([(1, 2), (2, 3)])

    def test_add_edges_from_self_loop(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'a')])

    def test_add_edges_from_result_cycle(self):
        self.assertRaises(ValueError, self.G.add_edges_from,
                          [('a', 'b'), ('b', 'c'), ('c', 'a')])

    def test_update_node_parents_bm_constructor(self):
        self.g = BayesianModel([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.g.predecessors('a'), [])
        self.assertListEqual(self.g.predecessors('b'), ['a'])
        self.assertListEqual(self.g.predecessors('c'), ['b'])

    def test_update_node_parents(self):
        self.G.add_nodes_from(['a', 'b', 'c'])
        self.G.add_edges_from([('a', 'b'), ('b', 'c')])
        self.assertListEqual(self.G.predecessors('a'), [])
        self.assertListEqual(self.G.predecessors('b'), ['a'])
        self.assertListEqual(self.G.predecessors('c'), ['b'])

    def tearDown(self):
        del self.G
def nodeChildPairs(model: BayesianModel,
                   vars: List[Name]) -> Dict[Name, List[Name]]:
    return [{node: list(model.successors(n=node))} for node in vars]
Example #57
0
    def do_it(self):
        '''EXPLANATIONS'''
        self.networx_fixed, self.dictionary, self.header = self._util.get_network(
        )
        self.networx = self.networx_fixed.copy()
        self.networx_test = self.networx_fixed.copy()
        print('Dictionary : ', self.dictionary)
        ''' -------------- Constructing all possible topologies, 
                              --> option : restrain the number with the treshold : 
                                        0 -> all possible topologies, 100 -> only the fully connnected topology'''
        possible_topologies = self._lat.get_possible_topologies(
            treshold=50
        )  #setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered
        print("Possible topologies : ", len(possible_topologies))
        entropy = 0
        count = 0  #TEMPORARY
        ''' -------------- walking through all toplogies'''
        for topology in possible_topologies:
            if self.loop < 200 or self.loop > 350:
                self.loop += 1
                count += 1
                continue
            entropy = topology[1]
            if entropy == 0:
                continue  #safeguard
            print('Loop *-> ', self.loop + 1, ' of ', len(possible_topologies))
            topo = topology[0]
            self.networx = nx.DiGraph()
            self.networx = self.networx_fixed.copy()
            ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change
                            depending on the number of incoming nodes'''
            self.add_edges(topo)
            self.add_dummy_cpds()
            self.nodes = self.networx.nodes(data=True)
            self.create_learning_data()
            # print('edges = ' , self.edges)
            #print(self.learning_data)
            ''' ----------- convert DiGraph to pgmpy and check'''
            self.pgmpy = BayesianModel()
            self.pgmpy = self._util.translate_digraph_to_pgmpy(
                self.networx.copy())
            '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs 
                             -> provide pgmpy with the learning data'''

            self.pgmpy.check_model()
            self.estimate_parameters()
            '''-------------- Testing the constructed topology'''
            self.test_topology(entropy)
            '''following  4 lines to remove : just use to check whether the algorithms are correct regarding the edges building'''
            count += 1
            #print('edges : ', self.edges)
            #
            # if count > 350:
            #     break
        print('Check -> number of processed topologies in loop : ', count)
        # print('My colors : ')
        # print(self.colors_table)
        # print(self.colors_cpd)
        '''  the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs
        but for the moment being we just assume there are only BEN's and WORLD's'''

        # self.networx.add_edge('BENS_1','WORLD_1')
        # self.networx.node['BENS_1']['cpd'] = [0.8,0.2]
        # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]]
        ''' if a best model has ben found, save it -> first update the Utility class object and save it'''
        # self._util.update_networkx(self.networx, self.dictionary, self.header)
        # self._util.save_network()
        # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header)
        # self._util.save_pgmpy_network()
        self.draw()
        self.draw_xy()
        return self.results
Example #58
0
 def setUp(self):
     self.G = BayesianModel([('a', 'd'), ('b', 'd'),
                             ('d', 'e'), ('b', 'c')])
Example #59
0
class TestDirectedGraphCPDOperations(unittest.TestCase):
    def setUp(self):
        self.graph = BayesianModel()

    def test_add_single_cpd(self):
        cpd = TabularCPD('grade', 2, np.random.rand(2, 4),
                         ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd)
        self.assertListEqual(self.graph.get_cpds(), [cpd])

    def test_add_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])

    def test_remove_single_cpd(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1)
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds(cpd1, cpd3)
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_remove_single_cpd_string(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff')
        self.assertListEqual(self.graph.get_cpds(), [cpd2, cpd3])

    def test_remove_multiple_cpds_string(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.graph.remove_cpds('diff', 'grade')
        self.assertListEqual(self.graph.get_cpds(), [cpd2])

    def test_get_cpd_for_node(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertEqual(self.graph.get_cpds('diff'), cpd1)
        self.assertEqual(self.graph.get_cpds('intel'), cpd2)
        self.assertEqual(self.graph.get_cpds('grade'), cpd3)

    def test_get_cpd_raises_error(self):
        cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
        cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
        cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4),
                          ['diff', 'intel'], [2, 2])
        self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
        self.graph.add_cpds(cpd1, cpd2, cpd3)
        self.assertRaises(ValueError, self.graph.get_cpds, 'sat')

    def tearDown(self):
        del self.graph
Example #60
0
class MyClass(object):
    def __init__(self, case):
        self.case = case
        self.results = []
        self.networx_test = nx.DiGraph()
        self.networx_fixed = nx.DiGraph()
        self.pgmpy_test = BayesianModel()
        self.networx = nx.DiGraph()
        self.pgmpy = BayesianModel()
        self.best_error = math.inf
        self.best_topology = [0, 0, nx.DiGraph,
                              0]  #[error, entropy, networkx DiGraph, loop]
        self.dictionary = []
        self.header = {}
        self.nodes_0 = []
        self.edges_0 = {}
        self.nodes = []
        self.edges = {}
        self.cpds = {}
        self.colors_dictionary = {}
        self.colors_table = []
        self.colors_cpd = []
        self.learning_data = {}
        self.nummber_of_colors = 0
        self._util = Utilities(case)
        self._lat = Lattices(self._util)
        self.expected_result = [0, 0, 0]
        self.loop = 0

    def get_my_colors(self):
        evidence = []
        cardinality = []
        for i, node in enumerate(self.nodes):
            if 'BEN' in node[0] or 'MEM' in node[0]:
                evidence.append(node[0])
                cardinality.append(node[1]['cardinality'])
        self.colors_dictionary, self.colors_table, self.colors_cpd = self.color_cpd(
            'WORLD', 3, evidence, cardinality)
        self.number_of_colors = self.colors_table.shape[1]
        # for i in range(0, len(self.colors_table[1])):
        #     rows = len(self.colors_table)
        #     hi = 1000
        #     lo = 1
        #     sum = hi+(rows-1)
        #     hi /= sum
        #     lo /= sum
        #     for j in range(0, rows):
        #         if self.colors_table[j][i] == 1:
        #             self.colors_table[j][i] = hi
        #         else:
        #             self.colors_table[j][i] = lo

        # print('Number of colors : ', self.number_of_colors)
        # print(self.colors_cpd)
        #print(self.colors_cpd.values)

    def color_cpd(self, var, card_var, evidence, cardinality):
        table = CPD.get_index_matrix(cardinality)
        colors = {}
        hi = 1  #0.999
        lo = 1 - hi
        C = np.prod(cardinality)
        matrix = np.full((3, C), 1. / 3.)
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3, 1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3, 1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3, 1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and not 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [1. / 3, lo, hi, 1. / 3]
            matrix[1] = [1. / 3, lo, lo, 1. / 3]
            matrix[2] = [1. / 3, hi, lo, 1. / 3]
        if 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence and not 'BENS_0' in evidence:
            matrix[0] = [lo, lo, lo, lo, hi, lo, hi, lo]
            matrix[1] = [hi, lo, hi, lo, lo, hi, lo, hi]
            matrix[2] = [lo, hi, lo, hi, lo, lo, lo, lo]
        if 'BENS_0' in evidence and 'BENS_1' in evidence and 'BENS_2' in evidence and 'BENS_3' in evidence:
            matrix[0] = [
                lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo
            ]
            matrix[1] = [
                hi, lo, hi, lo, lo, hi, lo, hi, hi, lo, hi, lo, lo, hi, lo, hi
            ]
            matrix[2] = [
                lo, hi, lo, hi, lo, lo, lo, lo, lo, hi, lo, hi, lo, lo, lo, lo
            ]

        cpd = TabularCPD(variable=var,
                         variable_card=card_var,
                         values=matrix,
                         evidence=evidence,
                         evidence_card=cardinality)
        for i, node in enumerate(evidence):
            colors.update({node: table[i]})
        return colors, table, cpd

    # def set_color(self, color):
    #     col = self.colors_table[:, color]
    #     for i in range(0,len(col)):
    #         node = 'BENS_'+ str(i)
    #         self.pgmpy.get_cpds(node).values = CPD.RON_cpd(node, self.pgmpy.get_cardinality(node), mu = int(col[i])).values

    def test_topology(self, entropy):
        self.networx_test = copy.deepcopy(self.networx)
        self.pgmpy_test = BayesianModel()
        self.pgmpy_test = self._util.translate_digraph_to_pgmpy(
            self.networx.copy())
        #model = {'main': GenerativeModel(SensoryInputVirtualPeepo(self), self.pgmpy_test)}
        self.expected_result = [0, 0, 0]
        ''' ------ going through all possible "colors'''
        error = 0
        for color in range(0, self.number_of_colors):
            states = self.colors_table[:, color]
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            self.expected_result = reshaped_cpd[:, int(color)]
            for i, pixel in enumerate(states):
                if 'BENS_' + str(i) not in self.networx_test.nodes():
                    continue
                cardinality = self.pgmpy_test.get_cardinality('BENS_' + str(i))
                self.pgmpy_test.get_cpds(
                    'BENS_' + str(i)).values = CPD.create_fixed_parent(
                        cardinality, state=int(pixel))
            #error += self.do_inference(model)

            error += self.do_simple_inference()
        error /= self.number_of_colors
        self.results.append([entropy, error])
        if error <= self.best_error:
            self.best_error = error
            self.best_topology[0] = error
            self.best_topology[1] = entropy
            self.best_topology[2] = self.networx_test
            self.best_topology[3] = self.loop
        self.loop += 1

    def do_inference(self, models):
        error = 0
        for key in models:
            error += models[key].process()
        return error

    '''.................. vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv ..................................'''

    def do_simple_inference(self):
        total_prediction_error_size = 0
        for node in self.pgmpy_test.get_leaves():
            prediction = self.predict(node)
            observation = self.sensory_input(node)
            prediction_error_size = self.error_size(prediction, observation)
            prediction_error = self.error(prediction, observation)
            precision = entropy(prediction, base=2)
            total_prediction_error_size += prediction_error_size
        return total_prediction_error_size

    def predict(self, node):
        """
        Predicts the given leaf node (i.e. the observational node) based on the root nodes (i.e. the belief nodes)
        :return: prediction for given observation variable, a prediction is a probability distribution
        :rtype: np.array
        """
        infer = VariableElimination(self.pgmpy_test)
        evidence = self.get_root_nodes()
        evidence = {k: v for k, v in evidence.items() if k not in [node]}
        return infer.query(variables=[node], evidence=evidence)[node].values

    def sensory_input(self, name):
        expected_result = self.expected_result
        cpds = []
        for i in range(0, len(expected_result)):
            cpds.append([
                'WORLD_' + str(i),
                CPD.create_fixed_parent(2, state=int(expected_result[i]))
            ])
        for i, node in enumerate(self.nodes):
            for j in range(0, len(cpds)):
                if name == cpds[j][0]:
                    return cpds[j][1]

    def error(self, pred, obs):
        """
        Calculates the prediction error as the residual of subtracting the predicted inputs from the observed inputs
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error
        :type pred : np.array
        :type obs : np.array
        :rtype : np.array
        """
        return obs - pred

    def error_size(self, pred, obs):
        """
        Calculates the size of the prediction error as the Kullback-Leibler divergence. This responds the magnitude
        of the prediction error, how wrong the prediction was.
        :param pred: predicted sensory inputs
        :param obs: observed sensory inputs
        :return: prediction error size
        :type pred : np.array
        :type obs : np.array
        :rtype : float
        """
        return entropy(obs, pred)

    def get_root_nodes(self):
        """
        Returns status of all root nodes.
        :param network: Bayesian Network representing the generative model
        :return: Dictionary containing all root nodes as keys and status as values
        :type network: BayesianModel
        :rtype dict
        """
        roots = {}
        for root in self.pgmpy_test.get_roots():
            roots.update(
                {root: np.argmax(self.pgmpy_test.get_cpds(root).values)})
        return roots

    def get_observations(self):
        obs = {}
        for leaf in self.pgmpy_test.get_leaves():
            obs.update(
                {leaf: np.argmax(self.pgmpy_test.get_cpds(leaf).values)})
        return obs

    '''**********************   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^                           '''

    def estimate_parameters(self):
        data = pd.DataFrame(data=self.learning_data)
        sample_size = len(self.learning_data)
        # print(sample_size)
        estimator = BayesianEstimator(self.pgmpy, data)
        # print('data')
        # print('pgmpy node : ', self.pgmpy.nodes())
        # print(self.learning_data)
        # print(data)
        pseudocount = {
            'BENS_0': [1, 2],
            'BENS_1': [1, 2],
            'BENS_2': [1, 2],
            'BENS_3': [1, 2],
            'WORLD_0': [1, 2],
            'WORLD_1': [1, 2],
            'WORLD_2': [1, 2]
        }

        pseudocount = [0.9, 0.9]
        if not 'BENS_1' in self.pgmpy.nodes(
        ) or not 'BENS_2' in self.pgmpy.nodes(
        ) or not 'BENS_3' in self.pgmpy.nodes():
            pseudocount = [0.9, 0.9, 0.9]
        # print('pseudocount :', pseudocount)
        for i, node in enumerate(self.nodes):

            if 'LAN' in node[0] or 'MOTOR' in node[0] or 'WORLD' in node[0]:
                # print('cardinality node ', node[0], ' : ', self.pgmpy.get_cardinality(node[0]))
                # print(self.pgmpy.get_cpds(node[0]).values)
                #self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(node[0], prior_type='dirichlet', pseudo_counts=pseudocount).values
                self.pgmpy.get_cpds(node[0]).values = estimator.estimate_cpd(
                    node[0],
                    prior_type='BDeu',
                    equivalent_sample_size=sample_size).values

    def add_edges(self, topology):
        self.networx.remove_edges_from(self.edges)
        self.edges = []
        self.nodes = []
        shape = np.asarray(topology).shape
        ''' let's first remove all void nodes  ----> not necssary -----> delete the code ??'''
        nodes_to_remove = []
        # rows = np.sum(topology, axis = 1)
        # for row in range(0, len(rows)):
        #     if rows[row] == 0:
        #         nodes_to_remove.append('WORLD_' + str(row))
        columns = np.sum(topology, axis=0)
        for column in range(0, len(columns)):
            if columns[column] == 0:
                nodes_to_remove.append('BENS_' + str(column))
        self.networx.remove_nodes_from(nodes_to_remove)
        self.nodes = self.networx.nodes(data=True)
        for column in range(0, shape[1]):
            for row in range(0, shape[0]):
                if topology[row][column] == 1:
                    parent = 'BENS_' + str(column)
                    child = 'WORLD_' + str(row)
                    self.networx.add_edge(parent, child)
        self.edges = self.networx.edges()
        # print('edges  --------------------------- >', self.edges)
        # print(self.nodes)

    def add_dummy_cpds(self):
        for i, node in enumerate(self.nodes):
            cardinality = node[1]['cardinality']
            if ('BEN' in node[0]) or ('MEM' in node[0]):
                self.nodes[i][1]['cpd'] = CPD.create_fixed_parent(
                    cardinality, modus='uniform')
            else:
                incoming_nodes = self.networx.in_edges(node[0])
                if len(incoming_nodes) == 0:
                    self.nodes[i][1]['cpd'] = CPD.create_random_child(
                        cardinality, modus='orphan')
                    continue
                card_parent = []
                for m, n in enumerate(incoming_nodes):
                    par = self.networx.node[n[0]]['cardinality']
                    card_parent.append(par)
                self.nodes[i][1]['cpd'] = CPD.create_random_child(
                    cardinality, card_parent)

        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])
        self.nodes = self.networx.nodes(data=True)
        # print('   IN NETWORX  ')
        # for i, node in enumerate(self.nodes):
        #     print(node[0])
        #     print(node[1]['cpd'])

    def create_learning_data(self):
        self.get_my_colors()
        self.learning_data = {}
        ben_nodes = [x for x in self.nodes if "BEN" in x[0]]
        world_nodes = [x for x in self.nodes if "WORLD" in x[0]]

        for i, node in enumerate(ben_nodes):
            self.learning_data.update({node[0]: self.colors_table[i].tolist()})

        for i, node in enumerate(world_nodes):
            shape = self.colors_cpd.values.shape
            reshaped_cpd = self.colors_cpd.values.reshape(
                shape[0], int(np.prod(shape) / shape[0]))
            for hue in range(0, 3):
                if str(hue) in node[0]:
                    self.learning_data.update(
                        {node[0]: reshaped_cpd[hue, :].tolist()})
        # for i, node in enumerate(self.nodes):
        #     if "BEN" in node[0]:
        #         self.learning_data.update({node[0]:self.colors_table[i].tolist()})
        #     if "WORLD" in node[0]:
        #         shape = self.colors_cpd.values.shape
        #         reshaped_cpd = self.colors_cpd.values.reshape(shape[0], int(np.prod(shape)/shape[0]))
        #         for hue in range(0,3):
        #             if str(hue) in node[0]:
        #                 self.learning_data.update({node[0]:reshaped_cpd[hue,:].tolist()})
        # print('Learning data')
        # print(self.learning_data)

    def do_it(self):
        '''EXPLANATIONS'''
        self.networx_fixed, self.dictionary, self.header = self._util.get_network(
        )
        self.networx = self.networx_fixed.copy()
        self.networx_test = self.networx_fixed.copy()
        print('Dictionary : ', self.dictionary)
        ''' -------------- Constructing all possible topologies, 
                              --> option : restrain the number with the treshold : 
                                        0 -> all possible topologies, 100 -> only the fully connnected topology'''
        possible_topologies = self._lat.get_possible_topologies(
            treshold=50
        )  #setting the entropy at a 50% -> only topologies with an entropy >= 0.5 will be considered
        print("Possible topologies : ", len(possible_topologies))
        entropy = 0
        count = 0  #TEMPORARY
        ''' -------------- walking through all toplogies'''
        for topology in possible_topologies:
            if self.loop < 200 or self.loop > 350:
                self.loop += 1
                count += 1
                continue
            entropy = topology[1]
            if entropy == 0:
                continue  #safeguard
            print('Loop *-> ', self.loop + 1, ' of ', len(possible_topologies))
            topo = topology[0]
            self.networx = nx.DiGraph()
            self.networx = self.networx_fixed.copy()
            ''' ----------- for each topology we construct the edges and update dummy cpd (necessary as the shape of the LENs cpd's can change
                            depending on the number of incoming nodes'''
            self.add_edges(topo)
            self.add_dummy_cpds()
            self.nodes = self.networx.nodes(data=True)
            self.create_learning_data()
            # print('edges = ' , self.edges)
            #print(self.learning_data)
            ''' ----------- convert DiGraph to pgmpy and check'''
            self.pgmpy = BayesianModel()
            self.pgmpy = self._util.translate_digraph_to_pgmpy(
                self.networx.copy())
            '''------------ ask pgmpy to guess the best cpd's of the LANs and LENs 
                             -> provide pgmpy with the learning data'''

            self.pgmpy.check_model()
            self.estimate_parameters()
            '''-------------- Testing the constructed topology'''
            self.test_topology(entropy)
            '''following  4 lines to remove : just use to check whether the algorithms are correct regarding the edges building'''
            count += 1
            #print('edges : ', self.edges)
            #
            # if count > 350:
            #     break
        print('Check -> number of processed topologies in loop : ', count)
        # print('My colors : ')
        # print(self.colors_table)
        # print(self.colors_cpd)
        '''  the methods have to be completed to cope with a general case i.e. BENS,MEMS,LANS, MOTORs, WORLDs
        but for the moment being we just assume there are only BEN's and WORLD's'''

        # self.networx.add_edge('BENS_1','WORLD_1')
        # self.networx.node['BENS_1']['cpd'] = [0.8,0.2]
        # self.networx.node['WORLD_2']['cpd'] = [[0.8, 0.2, 0.5,0.3],[0.2,0.8,0.5,0.7]]
        ''' if a best model has ben found, save it -> first update the Utility class object and save it'''
        # self._util.update_networkx(self.networx, self.dictionary, self.header)
        # self._util.save_network()
        # self._util.update_pgmpy(self.pgmpy, self.dictionary, self.header)
        # self._util.save_pgmpy_network()
        self.draw()
        self.draw_xy()
        return self.results

    def draw_xy(self):
        x = []
        y = []
        s = []
        color = []
        best_x = 0
        best_y = 0
        for i in range(0, len(self.results)):
            x.append(self.results[i][0])
            y.append(self.results[i][1])
            if i == self.best_topology[3]:
                best_x = self.results[i][0]
                best_y = self.results[i][1]
                s.append(60)
                color.append("r")
            else:
                s.append(20)
                color.append("b")
        plt.scatter(x, y, s=s, c=color, alpha=0.5)
        plt.xlabel("Complexity of topology")
        plt.ylabel("Average error over all colors")
        plt.show()

    def draw(self):
        '''TO REMOVE LATER'''
        plt.figure(figsize=(10, 5))
        pos = nx.circular_layout(self.best_topology[2], scale=2)
        #node_labels = nx.get_node_attributes(self.networx, 'cpd')
        nx.draw(self.best_topology[2],
                pos,
                node_size=1200,
                node_color='lightblue',
                linewidths=0.25,
                font_size=10,
                font_weight='bold',
                with_labels=True)
        plt.text(1, 1, 'Topology nr. : ' + str(self.best_topology[3]))
        plt.show()