def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD("diff", 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD("intel", 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
             [0.3, 0.7, 0.02, 0.2]],
            evidence=["diff", "intel"],
            evidence_card=[2, 2],
        )
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(["diff", "intel", "grade"])
        self.bayesian_model.add_edges_from([("diff", "grade"),
                                            ("intel", "grade")])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([("A", "B"), ("C", "B"), ("B", "D")])
        factor_ab = DiscreteFactor(["A", "B"], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = DiscreteFactor(["C", "B"], [4, 3],
                                   [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = DiscreteFactor(["B", "D"], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)
 def setUp(self):
     self.m1 = BayesianModel([("A", "C"), ("B", "C")])
     self.d1 = pd.DataFrame(data={
         "A": [0, 0, 1],
         "B": [0, 1, 0],
         "C": [1, 1, 0]
     })
     self.d2 = pd.DataFrame(
         data={
             "A": [0, np.NaN, 1],
             "B": [0, 1, 0],
             "C": [1, 1, np.NaN],
             "D": [np.NaN, "Y", np.NaN],
         })
     self.cpds = [
         TabularCPD("A", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD(
             "C",
             2,
             [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
             evidence=["A", "B"],
             evidence_card=[2, 2],
         ),
     ]
     self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)
Example #3
0
    def generate_cpds(self):
        model = BayesianModel([(str(a), str(b))
                               for a, b in self.graph.edges()])

        variable_cards = {}
        cpds = []
        for n in nx.topological_sort(self.graph):
            causes = sorted(self.graph.predecessors(n))
            variable_card = random.choice([2, 3, 4, 5])
            variable_cards[n] = variable_card
            if len(causes) == 0:
                values = np.random.rand(1, variable_card)
                values = values / np.sum(values)
                cpd = TabularCPD(variable=str(n),
                                 variable_card=variable_card,
                                 values=values)
                cpds.append(cpd)
            else:
                evidence_card = [variable_cards[i] for i in causes]
                values = np.random.rand(variable_card, np.prod(evidence_card))
                values = values / np.sum(values, axis=0)
                cpd = TabularCPD(variable=str(n),
                                 variable_card=variable_card,
                                 values=values,
                                 evidence=[str(a) for a in causes],
                                 evidence_card=evidence_card)
                cpds.append(cpd)

        model.add_cpds(*cpds)
        model.check_model()

        self.model = model
def load_toy_symptom():
    """
        TOY symptom
    """

    # FEATURES =
    S, D, C = 'Symptom', 'Disease', 'Another Disease'
    VALUES = ['No', 'Yes']

    model = BayesianModel([(D, S), (C, S)])
    model.add_cpds(
        TabularCPD(variable=C,
                   variable_card=2,
                   values=[[.7], [.3]],
                   state_names=VALUES),
        TabularCPD(variable=D,
                   variable_card=2,
                   values=[[.9], [.1]],
                   state_names=VALUES),
        TabularCPD(variable=S,
                   variable_card=2,
                   values=[
                       [.3, .99, .1, .1],
                       [.7, .01, .9, .9],
                   ],
                   evidence=[D, C],
                   evidence_card=[2, 2],
                   state_names=VALUES),
    )
    return model
    def setUp(self):
        reader = XMLBIFReader(string=TEST_FILE)
        self.expected_model = reader.get_model()
        self.writer = XMLBIFWriter(self.expected_model)

        self.model_stateless = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')])
        self.cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.6, 0.4]])
        self.cpd_i = TabularCPD(variable='I', variable_card=2, values=[[0.7, 0.3]])

        self.cpd_g = TabularCPD(variable='G', variable_card=3,
                                values=[[0.3, 0.05, 0.9,  0.5],
                                        [0.4, 0.25, 0.08, 0.3],
                                        [0.3, 0.7,  0.02, 0.2]],
                                evidence=['I', 'D'],
                                evidence_card=[2, 2])

        self.cpd_l = TabularCPD(variable='L', variable_card=2,
                                values=[[0.1, 0.4, 0.99],
                                        [0.9, 0.6, 0.01]],
                                evidence=['G'],
                                evidence_card=[3])

        self.cpd_s = TabularCPD(variable='S', variable_card=2,
                                values=[[0.95, 0.2],
                                        [0.05, 0.8]],
                                evidence=['I'],
                                evidence_card=[2])

        self.model_stateless.add_cpds(self.cpd_d, self.cpd_i, self.cpd_g, self.cpd_l, self.cpd_s)
        self.writer_stateless = XMLBIFWriter(self.model_stateless)
Example #6
0
    def setUp(self):
        self.bayesian = BayesianModel([("a", "b"), ("b", "c"), ("c", "d"),
                                       ("d", "e")])
        a_cpd = TabularCPD("a", 2, [[0.4, 0.6]])
        b_cpd = TabularCPD("b",
                           2, [[0.2, 0.4], [0.8, 0.6]],
                           evidence=["a"],
                           evidence_card=[2])
        c_cpd = TabularCPD("c",
                           2, [[0.1, 0.2], [0.9, 0.8]],
                           evidence=["b"],
                           evidence_card=[2])
        d_cpd = TabularCPD("d",
                           2, [[0.4, 0.3], [0.6, 0.7]],
                           evidence=["c"],
                           evidence_card=[2])
        e_cpd = TabularCPD("e",
                           2, [[0.3, 0.2], [0.7, 0.8]],
                           evidence=["d"],
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([("a", "b"), ("b", "d"), ("a", "c"),
                                   ("c", "d")])
        factor_1 = DiscreteFactor(["a", "b"], [2, 2],
                                  np.array([100, 1, 1, 100]))
        factor_2 = DiscreteFactor(["a", "c"], [2, 2],
                                  np.array([40, 30, 100, 20]))
        factor_3 = DiscreteFactor(["b", "d"], [2, 2],
                                  np.array([1, 100, 100, 1]))
        factor_4 = DiscreteFactor(["c", "d"], [2, 2],
                                  np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
Example #7
0
    def intervene(self, intervention_node, intervention_value=None):
        intervention_node = str(intervention_node)

        v_card, states = self.get_state_space(intervention_node)

        values = [float(s == intervention_value) for s in states]
        values = np.array([values])

        self.bn.remove_cpds(intervention_node)

        if np.sum(values) == 0:
            self.bn.remove_node(intervention_node)
            self.bn.add_node(intervention_node)
            cpd = TabularCPD(
                variable=intervention_node,
                variable_card=1,
                values=np.array([[1.]]),
                state_names={intervention_node: [intervention_value]})
            self.bn.add_cpds(cpd)
        else:
            cpd = TabularCPD(variable=intervention_node,
                             variable_card=v_card,
                             values=values)

            edges = [(e[0], e[1]) for e in self.bn.in_edges(intervention_node)]
            for n_in, n_out in edges:
                self.bn.remove_edge(n_in, n_out)

            self.bn.add_cpds(cpd)

        # print(self.bn.nodes())
        # for a in self.bn.get_cpds():
        #     print(a)
        # print(self.bn.edges())
        self.bn.check_model()
Example #8
0
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H',
                       3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                           [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                           [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'],
                       evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
Example #9
0
def basic_different_dec_cardinality() -> MACID:
    macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'),
                   ('D2', 'U1')], {
                       0: {
                           'D': ['D1'],
                           'U': ['U1']
                       },
                       1: {
                           'D': ['D2'],
                           'U': ['U2']
                       }
                   })

    cpd_d1 = DecisionDomain('D1', [0, 1])
    cpd_d2 = DecisionDomain('D2', [0, 1, 2])

    cpd_u1 = TabularCPD('U1',
                        4,
                        np.array([[0, 0, 1, 0, 0, 0], [0, 1, 0, 1, 0, 0],
                                  [0, 0, 0, 0, 1, 0], [1, 0, 0, 0, 0, 1]]),
                        evidence=['D1', 'D2'],
                        evidence_card=[2, 3])
    cpd_u2 = TabularCPD('U2',
                        4,
                        np.array([[0, 0, 0, 0, 1, 0], [1, 0, 1, 1, 0, 0],
                                  [0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]]),
                        evidence=['D1', 'D2'],
                        evidence_card=[2, 3])

    macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2)

    return macid
Example #10
0
    def estimate_cpd(self, node):
        """
        Method to estimate the CPD for a given variable.

        Parameters
        ----------
        node: int, string (any hashable python object)
            The name of the variable for which the CPD is to be estimated.

        Returns
        -------
        CPD: TabularCPD

        Examples
        --------
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        >>> model = BayesianModel([('A', 'C'), ('B', 'C')])
        >>> cpd_A = MaximumLikelihoodEstimator(model, data).estimate_cpd('A')
        >>> print(cpd_A)
        ╒══════╤══════════╕
        │ A(0) │ 0.666667 │
        ├──────┼──────────┤
        │ A(1) │ 0.333333 │
        ╘══════╧══════════╛
        >>> cpd_C = MaximumLikelihoodEstimator(model, data).estimate_cpd('C')
        >>> print(cpd_C)
        ╒══════╤══════╤══════╤══════╤══════╕
        │ A    │ A(0) │ A(0) │ A(1) │ A(1) │
        ├──────┼──────┼──────┼──────┼──────┤
        │ B    │ B(0) │ B(1) │ B(0) │ B(1) │
        ├──────┼──────┼──────┼──────┼──────┤
        │ C(0) │ 0.0  │ 0.0  │ 1.0  │ 0.5  │
        ├──────┼──────┼──────┼──────┼──────┤
        │ C(1) │ 1.0  │ 1.0  │ 0.0  │ 0.5  │
        ╘══════╧══════╧══════╧══════╧══════╛
        """

        state_counts = self.state_counts(node)

        # if a column contains only `0`s (no states observed for some configuration
        # of parents' states) fill that column uniformly instead
        state_counts.ix[:, (state_counts == 0).all()] = 1

        parents = sorted(self.model.get_parents(node))
        parents_cardinalities = [
            len(self.state_names[parent]) for parent in parents
        ]
        node_cardinality = len(self.state_names[node])

        cpd = TabularCPD(node,
                         node_cardinality,
                         np.array(state_counts),
                         evidence=parents,
                         evidence_card=parents_cardinalities,
                         state_names=self.state_names)
        cpd.normalize()
        return cpd
Example #11
0
 def _get_node_CPT(self, node, df=None):
     parents = list(self.G.predecessors(node))
     if len(parents) == 0:  # if root node (latent)
         mu = df[node].mean()
         return TabularCPD(node, 2, values=[[1 - mu], [mu]])
     elif len(parents) > 0:
         mus = df.groupby(parents)[node].mean().reset_index()
         uniques = mus[parents].drop_duplicates()
         parent_combos = list(product(*[[0, 1] for _ in parents]))
         appends = []
         for combo in parent_combos:
             if not (uniques == np.array(combo)
                     ).all(1).any():  # if value not enumerated in sample
                 appends.append(list(combo) +
                                [0.5])  # add an uninformative prior
         add_df = pd.DataFrame(appends, columns=parents + [node])
         mus = pd.concat((mus, add_df), axis=0)
         mus = mus.sort_values(by=parents)
         mus = mus[node].values
         cpt = np.vstack((1. - mus, mus))
         cpt = TabularCPD(node,
                          2,
                          values=cpt,
                          evidence=parents,
                          evidence_card=len(parents) * [2])
         return cpt
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(
         self.m1,
         self.d1,
         state_names={
             "A": [0, 1, 23],
             "B": [0, 1],
             "C": [0, 42, 1],
             1: [2]
         },
     )
     cpds = [
         TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]),
         TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD(
             "C",
             3,
             [
                 [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
             ],
             evidence=["A", "B"],
             evidence_card=[3, 2],
         ),
     ]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
    def test_check_model(self):
        cpd_g = TabularCPD(
            "g",
            2,
            values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]),
            evidence=["d", "i"],
            evidence_card=[2, 2],
        )

        cpd_s = TabularCPD(
            "s",
            2,
            values=np.array([[0.2, 0.3], [0.8, 0.7]]),
            evidence=["i"],
            evidence_card=[2],
        )

        cpd_l = TabularCPD(
            "l",
            2,
            values=np.array([[0.2, 0.3], [0.8, 0.7]]),
            evidence=["g"],
            evidence_card=[2],
        )

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertRaises(ValueError, self.G.check_model)

        cpd_d = TabularCPD("d", 2, values=[[0.8, 0.2]])
        cpd_i = TabularCPD("i", 2, values=[[0.7, 0.3]])
        self.G.add_cpds(cpd_d, cpd_i)

        self.assertTrue(self.G.check_model())
def Generate_CPTs(data_cpts, data, cols, root):
    cpts_list = []
    for i in cols:
        if (i == root):
            cpt = TabularCPD(variable=root,
                             variable_card=5,
                             values=[data_cpts[0].T.values[1]])
            cpts_list.append(cpt)
        elif (i[:2] == "T:"):
            cpt = TabularCPD(
                variable=i,
                #variable_card = len(data_cpts[np.where(cols == "B")[0][0]]),
                variable_card=2,
                values=data_cpts[np.where(cols == i)[0][0]].T.values,
                evidence=[root],
                evidence_card=[5])
            cpts_list.append(cpt)
        else:
            cpt = TabularCPD(
                variable=i,
                #variable_card = len(data_cpts[np.where(cols == "B")[0][0]]),
                variable_card=6,
                values=data_cpts[np.where(cols == i)[0][0]].T.values,
                evidence=[root],
                evidence_card=[5])
            cpts_list.append(cpt)
    return cpts_list
    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD("d", 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD("i", 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD(
            "g",
            2,
            values=np.random.rand(2, 4),
            evidence=["d", "i"],
            evidence_card=[2, 2],
        )
        cpd_l = TabularCPD("l",
                           2,
                           values=np.random.rand(2, 2),
                           evidence=["g"],
                           evidence_card=[2])
        cpd_s = TabularCPD("s",
                           2,
                           values=np.random.rand(2, 2),
                           evidence=["i"],
                           evidence_card=[2])

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds("d"), cpd_d)
        self.assertEqual(self.G.get_cpds("i"), cpd_i)
        self.assertEqual(self.G.get_cpds("g"), cpd_g)
        self.assertEqual(self.G.get_cpds("l"), cpd_l)
        self.assertEqual(self.G.get_cpds("s"), cpd_s)
    def test_check_model2(self):
        cpd_s = TabularCPD('s',
                           2,
                           values=np.array([[0.5, 0.3], [0.8, 0.7]]),
                           evidence=['i'],
                           evidence_card=[2])
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)

        cpd_g = TabularCPD('g',
                           2,
                           values=np.array([[0.2, 0.3, 0.4, 0.6],
                                            [0.3, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l',
                           2,
                           values=np.array([[0.2, 0.3], [0.1, 0.7]]),
                           evidence=['g'],
                           evidence_card=[2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)
Example #17
0
 def test_fit_missing_data(self):
     self.model2.fit(self.data2, state_names={'C': [0, 1]}, complete_samples_only=False)
     cpds = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                 TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
                 TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                            evidence=['A', 'B'], evidence_card=[2, 2])])
     self.assertSetEqual(cpds, set(self.model2.get_cpds()))
    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD('g',
                           2,
                           values=np.random.rand(2, 4),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        cpd_l = TabularCPD('l',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['g'],
                           evidence_card=[2])
        cpd_s = TabularCPD('s',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['i'],
                           evidence_card=[2])

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)
Example #19
0
    def inf(self, file1):
        f1 = open(file1, encoding="utf8")
        lines = f1.readlines()
        i = 0
        G = BayesianModel()
        nodeList = {}
        while i < len(lines):
            if lines[i] == '\n':
                break
            nodeName = self.getnode(lines[i])
            valueNum = int(lines[i + 1])
            cpd_str = lines[i + 2]
            sequence = self.getList(lines[i + 3])
            card = self.getCard(lines[i + 4])
            cpd = self.parseCpd(cpd_str, valueNum, card)
            l = {}
            l['nodeName'] = nodeName
            l['valueNum'] = valueNum
            l['cpd'] = cpd
            l['sequence'] = sequence
            l['card'] = card
            # l = [nodeName,valueNum,cpd,sequence,card]

            nodeList[nodeName] = l
            i += 5
        edges = self.getegdes(lines[i + 1])
        evidence2 = self.getValue(lines[i + 3])

        # print(nodeList)
        for i in range(int(len(edges) / 2)):
            G.add_edge(edges[2 * i], edges[2 * i + 1])

        for (this, node) in nodeList.items():
            if node['sequence'][0] == '':
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 values=node['cpd'])
            else:
                cpt = TabularCPD(variable=node['nodeName'],
                                 variable_card=node['valueNum'],
                                 evidence=node['sequence'],
                                 evidence_card=node['card'],
                                 values=node['cpd'])
            G.add_cpds(cpt)

        if G.check_model():
            # print('1')
            # belief_propagation = BeliefPropagation(G)
            inference = VariableElimination(G)
            result = ''

            for node in G.nodes():
                if node not in evidence2:
                    namelist = [node]
                    result += node + ' '
                    phi_query = inference.query(variables=namelist,
                                                evidence=evidence2,
                                                show_progress=False).values
                    result += str(phi_query) + '\n'
            print(result)
def factors():
    """
    initialise the initial factor
    """
    phi = dict()

    # marginal on A
    phi['a'] = TabularCPD(variable='a',
                          variable_card=2,
                          values=np.array([[0.05, 0.95]]))

    #CPD on B|A
    phi['ab'] = TabularCPD('b', 2,
                           np.array([[0.1, 0.9], [0.2, 0.8]]).T, ['a'], [2])

    ##CPD ON E|A
    #phi['ae'] = DiscreteFactor(['a','e'],[2,2], np.array([[0.3,
    #    0.7],[0.4,0.6]]))
    phi['ae'] = TabularCPD('e', 2, np.array([[0.3, 0.4], [0.7, 0.6]]), ['a'],
                           [2])

    ##CPD ON c|b
    #phi['bc'] = DiscreteFactor(['b','c'],[2,2], np.array([[0.5,
    #    0.5],[0.6,0.4]]))
    phi['bc'] = TabularCPD('c', 2, np.array([[0.5, 0.6], [0.5, 0.4]]), ['b'],
                           [2])

    #CPD on D|B,c
    A = np.array([[[0.7, 0.3], [0.8, 0.2]], [[0.9, 0.1], [0.99, 0.01]]]).T
    A = A.reshape(A.shape[0], -1)
    phi['ced'] = TabularCPD('d', 2, A, ['c', 'e'], [2, 2])

    return phi
Example #21
0
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = DiscreteFactor(['C', 'B'], [4, 3],
                                   [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)
    def test_check_model2(self):
        cpd_s = TabularCPD(
            "s",
            2,
            values=np.array([[0.5, 0.3], [0.8, 0.7]]),
            evidence=["i"],
            evidence_card=[2],
        )
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)

        cpd_g = TabularCPD(
            "g",
            2,
            values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]),
            evidence=["d", "i"],
            evidence_card=[2, 2],
        )
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD(
            "l",
            2,
            values=np.array([[0.2, 0.3], [0.1, 0.7]]),
            evidence=["g"],
            evidence_card=[2],
        )
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)
Example #23
0
    def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'):
        """
		Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe 
		la listas de aristas y las tablas CPD.

		Args:
			ebunch (list) : una lista de que contiene a las aristas del grafo.
			cpdtables (list) : un arreglo de diccionarios donde cada diccionario 
			contiene la información necesaria para crear una tabla de probabilidad.
			plot (boolean) : una bandera para saber si guardo una imagen del grafo
			usando matplotlib.
			graph_id (str): el nombre para identificar el grafo. 
		"""
        for cpdtable in cpdtables:
            self.variables_dict[cpdtable['variable']] = [\
             _ for _ in range(cpdtable['variable_card'])]
            table = TabularCPD(variable=cpdtable['variable'],\
               variable_card=cpdtable['variable_card'],\
               values=cpdtable['values'],\
               evidence_card=cpdtable.get('evidence_card'),\
               evidence=cpdtable.get('evidence'))
            if cpdtable.get('evidence'):
                table.reorder_parents(sorted(cpdtable.get('evidence')))
            self.pgmodel.add_cpds(table)
        if not self.pgmodel.check_model():
            raise ValueError("Error with CPDTs")
        self.update_infer_system()
        if plot: self.save_pgm_as_img(pgm_id)
Example #24
0
def basic2agent_tie_break() -> MACID:
    macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'),
                   ('D2', 'U1')], {
                       0: {
                           'D': ['D1'],
                           'U': ['U1']
                       },
                       1: {
                           'D': ['D2'],
                           'U': ['U2']
                       }
                   })

    cpd_d1 = DecisionDomain('D1', [0, 1])
    cpd_d2 = DecisionDomain('D2', [0, 1])
    cpd_u1 = TabularCPD('U1',
                        6,
                        np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0],
                                  [1, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]]),
                        evidence=['D1', 'D2'],
                        evidence_card=[2, 2])
    cpd_u2 = TabularCPD('U2',
                        6,
                        np.array([[0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1],
                                  [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]),
                        evidence=['D1', 'D2'],
                        evidence_card=[2, 2])

    macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2)

    return macid
    def test_check_model(self):
        cpd_g = TabularCPD('g',
                           2,
                           values=np.array([[0.2, 0.3, 0.4, 0.6],
                                            [0.8, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])

        cpd_s = TabularCPD('s',
                           2,
                           values=np.array([[0.2, 0.3], [0.8, 0.7]]),
                           evidence=['i'],
                           evidence_card=[2])

        cpd_l = TabularCPD('l',
                           2,
                           values=np.array([[0.2, 0.3], [0.8, 0.7]]),
                           evidence=['g'],
                           evidence_card=[2])

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertRaises(ValueError, self.G.check_model)

        cpd_d = TabularCPD('d', 2, values=[[0.8, 0.2]])
        cpd_i = TabularCPD('i', 2, values=[[0.7, 0.3]])
        self.G.add_cpds(cpd_d, cpd_i)

        self.assertTrue(self.G.check_model())
Example #26
0
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'),
                                       ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b',
                           2, [[0.2, 0.4], [0.8, 0.6]],
                           evidence=['a'],
                           evidence_card=[2])
        c_cpd = TabularCPD('c',
                           2, [[0.1, 0.2], [0.9, 0.8]],
                           evidence=['b'],
                           evidence_card=[2])
        d_cpd = TabularCPD('d',
                           2, [[0.4, 0.3], [0.6, 0.7]],
                           evidence=['c'],
                           evidence_card=[2])
        e_cpd = TabularCPD('e',
                           2, [[0.3, 0.2], [0.7, 0.8]],
                           evidence=['d'],
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'),
                                   ('c', 'd')])
        factor_1 = DiscreteFactor(['a', 'b'], [2, 2],
                                  np.array([100, 1, 1, 100]))
        factor_2 = DiscreteFactor(['a', 'c'], [2, 2],
                                  np.array([40, 30, 100, 20]))
        factor_3 = DiscreteFactor(['b', 'd'], [2, 2],
                                  np.array([1, 100, 100, 1]))
        factor_4 = DiscreteFactor(['c', 'd'], [2, 2],
                                  np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
Example #27
0
def buildBN():
    burglary_model = BayesianModel([('Burglary', 'Alarm'),
                                    ('Earthquake', 'Alarm'),
                                    ("Alarm", "JohnCalls"),
                                    ("Alarm", "MaryCalls")])

    cpd_burg = TabularCPD(variable='Burglary',
                          variable_card=2,
                          values=[[.999], [.001]])  # [ P(!B), p(B) ]

    cpd_earth = TabularCPD(variable='Earthquake',
                           variable_card=2,
                           values=[[.998], [.002]])  # [ P(!E), p(E) ]

    cpd_alarm = TabularCPD(
        variable='Alarm',
        variable_card=2,
        values=[
            [.999, .06, .71,
             .05],  # P(!A|!E,!B), P(!A|!E,B), P(!A|E,!B), P(!A|E,B)
            [.001, .94, .29, .95]
        ],  # P(A|!E,!B), P(A|!E,B), P(A|E,!B), P(A|E,B)
        evidence=['Earthquake', 'Burglary'],
        evidence_card=[2, 2])

    cpd_john = TabularCPD(
        variable="JohnCalls",
        variable_card=2,
        values=[[.95, .10], [.05, .90]],  # P(!J|!A), P(!J|A)
        evidence=['Alarm'],
        evidence_card=[2])  # P(J|!A), P(J|A)

    cpd_mary = TabularCPD(
        variable="MaryCalls",
        variable_card=2,
        values=[[.99, .30], [.01, .70]],  # P(!M|!A), P(!M|A)
        evidence=['Alarm'],
        evidence_card=[2])  # P(M|!A), P(M|A)

    burglary_model.add_cpds(cpd_burg, cpd_earth, cpd_alarm, cpd_john, cpd_mary)

    # print(burglary_model.check_model())
    # print(burglary_model.get_independencies())
    # print(burglary_model.edges())
    # print(burglary_model.get_cpds())

    # Doing exact inference using Variable Elimination
    burglary_infer = VariableElimination(burglary_model)

    # using D-interference to determine conditional dependence of B and E given A is observed
    # print(burglary_model.is_active_trail('Burglary', 'Earthquake'))
    # print(burglary_model.is_active_trail('Burglary', 'Earthquake', observed=['Alarm']))

    # print(burglary_infer.query(variables=['JohnCalls'], joint=False, evidence={'Earthquake': 0})['JohnCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 0})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 1})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1})['MaryCalls'])
    # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1, 'Burglary': 0,"Earthquake": 0})['MaryCalls'])

    return burglary_infer
Example #28
0
def buildNet(data, conn):
    model = BayesianModel(data)
    checkedSymp = list()  #Lista dei sintomi già visitati ed aggiunti alla rete
    checkedDis = list()  #Lista delle malattie già aggiunte alla rete

    #Costruzione dei nodi parents della rete
    for t in data:
        if t[0] not in checkedSymp:
            cpd = TabularCPD(variable=t[0],
                             variable_card=2,
                             values=[[0.5, 0.5]])
            checkedSymp.append(t[0])
        model.add_cpds(cpd)
    #Costruzione dei nodi figli, collegandoli ai rispettivi parent
    for t in data:
        if t[1] not in checkedDis:
            sym_list = SQL.symList(
                conn,
                t[1])  #Ricavo la lista di sintomi collegati alla malattia
            sym_list_length = len(sym_list)
            mat = numberOfSons(conn, sym_list)
            arr = []
            for i in range(0, len(mat)):
                arr.append(mat[i][1])
            print(arr)
            cpd = TabularCPD(variable=t[1],
                             variable_card=sym_list_length,
                             values=np.full((1, sym_list_length),
                                            1 / sym_list_length),
                             evidence=sym_list,
                             evidence_card=arr)
            break
            checkedDis.append(t[1])
        model.add_cpds(cpd)
    return model
Example #29
0
def basic2agent_tie_break() -> MACID:
    macid = MACID(
        [("D1", "D2"), ("D1", "U1"), ("D1", "U2"), ("D2", "U2"), ("D2", "U1")],
        agent_decisions={
            0: ["D1"],
            1: ["D2"]
        },
        agent_utilities={
            0: ["U1"],
            1: ["U2"]
        },
    )

    cpd_d1 = DecisionDomain("D1", [0, 1])
    cpd_d2 = DecisionDomain("D2", [0, 1])
    cpd_u1 = TabularCPD(
        "U1",
        6,
        np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0], [1, 0, 1, 0],
                  [0, 0, 0, 0], [0, 0, 0, 0]]),
        evidence=["D1", "D2"],
        evidence_card=[2, 2],
    )
    cpd_u2 = TabularCPD(
        "U2",
        6,
        np.array([[0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 0, 0, 0],
                  [0, 0, 0, 0], [0, 1, 0, 0]]),
        evidence=["D1", "D2"],
        evidence_card=[2, 2],
    )

    macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2)

    return macid
        def __setitem__(self,
                        variable: str,
                        cpd: TabularCPD,
                        sync_state_names: bool = True) -> None:

            # Update the keys
            if variable in self.keys():
                self.__delitem__(variable)
            super().__setitem__(variable, cpd)

            # If the CPD can be initialized, try doing so. If it fails, do nothing
            if isinstance(cpd, StochasticFunctionCPD):
                try:
                    cpd.initialize_tabular_cpd(self.cbn)
                except ParentsNotReadyException:
                    return

            # add cpd to BayesianModel, and update domain dictionary
            BayesianModel.add_cpds(self.cbn, cpd)
            old_domain = self.domain.get(variable, None)
            self.domain[variable] = cpd.state_names[variable]

            # if the domain has changed, update all descendants, and sync the state_names
            if not (old_domain and old_domain == self.domain[variable]):
                for child in self.cbn.get_children(variable):
                    if child in self.keys():
                        self.__setitem__(
                            child, self[child],
                            sync_state_names=False)  # type: ignore
                if sync_state_names:
                    self.sync_state_names()
Example #31
0
    def estimate_cpd(self, node):
        """
        Method to estimate the CPD for a given variable.

        Parameters
        ----------
        node: int, string (any hashable python object)
            The name of the variable for which the CPD is to be estimated.

        Returns
        -------
        CPD: TabularCPD

        Examples
        --------
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        >>> model = BayesianModel([('A', 'C'), ('B', 'C')])
        >>> cpd_A = MaximumLikelihoodEstimator(model, data).estimate_cpd('A')
        >>> print(cpd_A)
        ╒══════╤══════════╕
        │ A(0) │ 0.666667 │
        ├──────┼──────────┤
        │ A(1) │ 0.333333 │
        ╘══════╧══════════╛
        >>> cpd_C = MaximumLikelihoodEstimator(model, data).estimate_cpd('C')
        >>> print(cpd_C)
        ╒══════╤══════╤══════╤══════╤══════╕
        │ A    │ A(0) │ A(0) │ A(1) │ A(1) │
        ├──────┼──────┼──────┼──────┼──────┤
        │ B    │ B(0) │ B(1) │ B(0) │ B(1) │
        ├──────┼──────┼──────┼──────┼──────┤
        │ C(0) │ 0.0  │ 0.0  │ 1.0  │ 0.5  │
        ├──────┼──────┼──────┼──────┼──────┤
        │ C(1) │ 1.0  │ 1.0  │ 0.0  │ 0.5  │
        ╘══════╧══════╧══════╧══════╧══════╛
        """

        state_counts = self.state_counts(node)

        # if a column contains only `0`s (no states observed for some configuration
        # of parents' states) fill that column uniformly instead
        state_counts.ix[:, (state_counts == 0).all()] = 1

        parents = sorted(self.model.get_parents(node))
        parents_cardinalities = [len(self.state_names[parent]) for parent in parents]
        node_cardinality = len(self.state_names[node])

        cpd = TabularCPD(node, node_cardinality, np.array(state_counts),
                         evidence=parents,
                         evidence_card=parents_cardinalities,
                         state_names=self.state_names)
        cpd.normalize()
        return cpd
    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))
    def estimate_cpd(self, node, prior_type='BDeu', pseudo_counts=[], equivalent_sample_size=5):
        """
        Method to estimate the CPD for a given variable.

        Parameters
        ----------
        node: int, string (any hashable python object)
            The name of the variable for which the CPD is to be estimated.

        prior_type: 'dirichlet', 'BDeu', 'K2',
            string indicting which type of prior to use for the model parameters.
            - If 'prior_type' is 'dirichlet', the following must be provided:
                'pseudo_counts' = dirichlet hyperparameters; a list or dict
                 with a "virtual" count for each variable state.
                 The virtual counts are added to the actual state counts found in the data.
                 (if a list is provided, a lexicographic ordering of states is assumed)
            - If 'prior_type' is 'BDeu', then an 'equivalent_sample_size'
                must be specified instead of 'pseudo_counts'. This is equivalent to
                'prior_type=dirichlet' and using uniform 'pseudo_counts' of
                `equivalent_sample_size/(node_cardinality*np.prod(parents_cardinalities))`.
            - A prior_type of 'K2' is a shorthand for 'dirichlet' + setting every pseudo_count to 1,
                regardless of the cardinality of the variable.

        Returns
        -------
        CPD: TabularCPD

        Examples
        --------
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import BayesianEstimator
        >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        >>> model = BayesianModel([('A', 'C'), ('B', 'C')])
        >>> estimator = BayesianEstimator(model, data)
        >>> cpd_C = estimator.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[1, 2])
        >>> print(cpd_C)
        ╒══════╤══════╤══════╤══════╤════════════════════╕
        │ A    │ A(0) │ A(0) │ A(1) │ A(1)               │
        ├──────┼──────┼──────┼──────┼────────────────────┤
        │ B    │ B(0) │ B(1) │ B(0) │ B(1)               │
        ├──────┼──────┼──────┼──────┼────────────────────┤
        │ C(0) │ 0.25 │ 0.25 │ 0.5  │ 0.3333333333333333 │
        ├──────┼──────┼──────┼──────┼────────────────────┤
        │ C(1) │ 0.75 │ 0.75 │ 0.5  │ 0.6666666666666666 │
        ╘══════╧══════╧══════╧══════╧════════════════════╛
        """

        node_cardinality = len(self.state_names[node])
        parents = sorted(self.model.get_parents(node))
        parents_cardinalities = [len(self.state_names[parent]) for parent in parents]

        if prior_type == 'K2':
            pseudo_counts = [1] * node_cardinality
        elif prior_type == 'BDeu':
            alpha = float(equivalent_sample_size) / (node_cardinality * np.prod(parents_cardinalities))
            pseudo_counts = [alpha] * node_cardinality
        elif prior_type == 'dirichlet':
            if not len(pseudo_counts) == node_cardinality:
                raise ValueError("'pseudo_counts' should have length {0}".format(node_cardinality))
            if isinstance(pseudo_counts, dict):
                pseudo_counts = sorted(pseudo_counts.values())
        else:
            raise ValueError("'prior_type' not specified")

        state_counts = self.state_counts(node)
        bayesian_counts = (state_counts.T + pseudo_counts).T

        cpd = TabularCPD(node, node_cardinality, np.array(bayesian_counts),
                         evidence=parents,
                         evidence_card=parents_cardinalities,
                         state_names=self.state_names)
        cpd.normalize()
        return cpd