Beispiel #1
0
    def test_add_multiple_cpds(self):
        cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1))
        cpd_g = TabularCPD('g',
                           2,
                           values=np.random.rand(2, 4),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        cpd_l = TabularCPD('l',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['g'],
                           evidence_card=[2])
        cpd_s = TabularCPD('s',
                           2,
                           values=np.random.rand(2, 2),
                           evidence=['i'],
                           evidence_card=[2])

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)
Beispiel #2
0
    def test_check_model2(self):
        cpd_s = TabularCPD('s',
                           2,
                           values=np.array([[0.5, 0.3], [0.8, 0.7]]),
                           evidence=['i'],
                           evidence_card=2)
        self.G.add_cpds(cpd_s)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_s)

        cpd_g = TabularCPD('g',
                           2,
                           values=np.array([[0.2, 0.3, 0.4, 0.6],
                                            [0.3, 0.7, 0.6, 0.4]]),
                           evidence=['d', 'i'],
                           evidence_card=[2, 2])
        self.G.add_cpds(cpd_g)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_g)

        cpd_l = TabularCPD('l',
                           2,
                           values=np.array([[0.2, 0.3], [0.1, 0.7]]),
                           evidence=['g'],
                           evidence_card=[2])
        self.G.add_cpds(cpd_l)
        self.assertRaises(ValueError, self.G.check_model)
        self.G.remove_cpds(cpd_l)
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'], 'time': ['day', 'night']}

        self.phi1 = Factor(['speed', 'switch', 'time'],
                           [3, 2, 2], np.ones(12))
        self.phi2 = Factor(['speed', 'switch', 'time'],
                           [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = Inference(student)
        self.model2 = Inference(student, state_names=self.sn2)
Beispiel #4
0
    def setUp(self):
        # A test Bayesian model
        diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]])
        intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]])
        grade_cpd = TabularCPD('grade',
                               3,
                               [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3],
                                [0.3, 0.7, 0.02, 0.2]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        self.bayesian_model = BayesianModel()
        self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade'])
        self.bayesian_model.add_edges_from([('diff', 'grade'),
                                            ('intel', 'grade')])
        self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd)

        # A test Markov model
        self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')])
        factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6])
        factor_cb = Factor(['C', 'B'], [4, 3],
                           [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6])
        factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3])
        self.markov_model.add_factors(factor_ab, factor_cb, factor_bd)

        self.gibbs = GibbsSampling(self.bayesian_model)
Beispiel #5
0
    def setUp(self):
        self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'),
                                       ('d', 'e')])
        a_cpd = TabularCPD('a', 2, [[0.4, 0.6]])
        b_cpd = TabularCPD('b',
                           2, [[0.2, 0.4], [0.3, 0.4]],
                           evidence='a',
                           evidence_card=[2])
        c_cpd = TabularCPD('c',
                           2, [[0.1, 0.2], [0.3, 0.4]],
                           evidence='b',
                           evidence_card=[2])
        d_cpd = TabularCPD('d',
                           2, [[0.4, 0.3], [0.2, 0.1]],
                           evidence='c',
                           evidence_card=[2])
        e_cpd = TabularCPD('e',
                           2, [[0.3, 0.2], [0.4, 0.1]],
                           evidence='d',
                           evidence_card=[2])
        self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd)

        self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'),
                                   ('c', 'd')])
        factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100]))
        factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20]))
        factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1]))
        factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40]))
        self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
Beispiel #6
0
 def name_cpd(aid):
     from pgmpy.factors import TabularCPD
     cpd = TabularCPD(
         variable='N' + aid,
         variable_card=num_names,
         values=[[1.0 / num_names] * num_names])
     cpd.semtype = 'name'
     return cpd
Beispiel #7
0
 def test_add_multiple_cpds(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])
Beispiel #8
0
 def test_get_cpd_raises_error(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertRaises(ValueError, self.graph.get_cpds, 'sat')
Beispiel #9
0
 def name_cpd(aid):
     from pgmpy.factors import TabularCPD
     cpd = TabularCPD(
         variable='N' + aid,
         variable_card=num_names,
         values=[[1.0 / num_names] * num_names])
     cpd.semtype = 'name'
     return cpd
Beispiel #10
0
    def initialize_initial_state(self):
        """
        This method will automatically re-adjust the cpds and the edges added to the bayesian network.
        If an edge that is added as an intra time slice edge in the 0th timeslice, this method will
        automatically add it in the 1st timeslice. It will also add the cpds. However, to call this
        method, one needs to add cpds as well as the edges in the bayesian network of the whole
        skeleton including the 0th and the 1st timeslice,.

        Examples:
        -------
        >>> from pgmpy.models import DynamicBayesianNetwork as DBN
        >>> from pgmpy.factors import TabularCPD
        >>> student = DBN()
        >>> student.add_nodes_from(['D', 'G', 'I', 'S', 'L'])
        >>> student.add_edges_from([(('D', 0),('G', 0)),(('I', 0),('G', 0)),(('D', 0),('D', 1)),(('I', 0),('I', 1))])
        >>> grade_cpd = TabularCPD(('G', 0), 3, [[0.3, 0.05, 0.9, 0.5],
        ...                                      [0.4, 0.25, 0.8, 0.03],
        ...                                      [0.3, 0.7, 0.02, 0.2]],
        ...                        evidence=[('I', 0),('D', 0)],
        ...                        evidence_card=[2, 2])
        >>> d_i_cpd = TabularCPD(('D', 1), 2, [[0.6, 0.3],
        ...                                    [0.4, 0.7]],
        ...                      evidence=[('D', 0)],
        ...                      evidence_card=2)
        >>> diff_cpd = TabularCPD(('D', 0), 2, [[0.6, 0.4]])
        >>> intel_cpd = TabularCPD(('I',0), 2, [[0.7, 0.3]])
        >>> i_i_cpd = TabularCPD(('I', 1), 2, [[0.5, 0.4],
        ...                                    [0.5, 0.6]],
        ...                      evidence=[('I', 0)],
        ...                      evidence_card=2)
        >>> student.add_cpds(grade_cpd, d_i_cpd, diff_cpd, intel_cpd, i_i_cpd)
        >>> student.initialize_initial_state()
        """
        for cpd in self.cpds:
            temp_var = (cpd.variable[0], 1 - cpd.variable[1])
            parents = self.get_parents(temp_var)
            if not any(x.variable == temp_var for x in self.cpds):
                if all(x[1] == parents[0][1] for x in parents):
                    if parents:
                        evidence_card = cpd.cardinality[:0:-1]
                        new_cpd = TabularCPD(
                            temp_var, cpd.variable_card,
                            cpd.values.reshape(cpd.variable_card,
                                               np.prod(evidence_card)),
                            parents, evidence_card)
                    else:
                        if cpd.get_evidence():
                            initial_cpd = cpd.marginalize(cpd.get_evidence(),
                                                          inplace=False)
                            new_cpd = TabularCPD(
                                temp_var, cpd.variable_card,
                                np.reshape(initial_cpd.values, (-1, 2)))
                        else:
                            new_cpd = TabularCPD(
                                temp_var, cpd.variable_card,
                                np.reshape(cpd.values, (-1, 2)))
                    self.add_cpds(new_cpd)
            self.check_model()
Beispiel #11
0
    def test_get_cpds(self):
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)
        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

        self.assertEqual(self.G.get_cpds('d').variable, 'd')
Beispiel #12
0
    def test_get_cpds1(self):
        self.model = BayesianModel([('A', 'AB')])
        cpd_a = TabularCPD('A', 2, np.random.rand(2, 1))
        cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'],
                            evidence_card=[2])

        self.model.add_cpds(cpd_a, cpd_ab)
        self.assertEqual(self.model.get_cpds('A').variable, 'A')
        self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
Beispiel #13
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['N' + aid1, 'N' + aid2],
         evidence_card=[num_names, num_names])
     cpd.semtype = 'score'
     return cpd
Beispiel #14
0
 def samediff_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='A' + aid1 + aid2,
         variable_card=num_same_diff,
         values=samediff_vals,
         evidence=['N' + aid1, 'N' + aid2],  # [::-1],
         evidence_card=[num_names, num_names])  # [::-1])
     cpd.semtype = 'match'
     return cpd
Beispiel #15
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['N' + aid1, 'N' + aid2],
         evidence_card=[num_names, num_names])
     cpd.semtype = 'score'
     return cpd
Beispiel #16
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['A' + aid1 + aid2],  # [::-1],
         evidence_card=[num_same_diff])  # [::-1])
     cpd.semtype = 'score'
     return cpd
Beispiel #17
0
 def samediff_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='A' + aid1 + aid2,
         variable_card=num_same_diff,
         values=samediff_vals,
         evidence=['N' + aid1, 'N' + aid2],  # [::-1],
         evidence_card=[num_names, num_names])  # [::-1])
     cpd.semtype = 'match'
     return cpd
Beispiel #18
0
 def score_cpd(aid1, aid2):
     cpd = TabularCPD(
         variable='S' + aid1 + aid2,
         variable_card=num_scores,
         values=score_values,
         evidence=['A' + aid1 + aid2],  # [::-1],
         evidence_card=[num_same_diff])  # [::-1])
     cpd.semtype = 'score'
     return cpd
Beispiel #19
0
 def test_get_cpd_for_node(self):
     cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1))
     cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'],
                       [2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.assertEqual(self.graph.get_cpds('diff'), cpd1)
     self.assertEqual(self.graph.get_cpds('intel'), cpd2)
     self.assertEqual(self.graph.get_cpds('grade'), cpd3)
Beispiel #20
0
def bayesnet_examples():
    from pgmpy.factors import TabularCPD
    from pgmpy.models import BayesianModel
    import pandas as pd

    student_model = BayesianModel([('D', 'G'),
                                   ('I', 'G'),
                                   ('G', 'L'),
                                   ('I', 'S')])
    # we can generate some random data.
    raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
    data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S'])
    data_train = data[: int(data.shape[0] * 0.75)]
    student_model.fit(data_train)
    student_model.get_cpds()

    data_test = data[int(0.75 * data.shape[0]): data.shape[0]]
    data_test.drop('D', axis=1, inplace=True)
    student_model.predict(data_test)

    grade_cpd = TabularCPD(
        variable='G',
        variable_card=3,
        values=[[0.3, 0.05, 0.9, 0.5],
                [0.4, 0.25, 0.08, 0.3],
                [0.3, 0.7, 0.02, 0.2]],
        evidence=['I', 'D'],
        evidence_card=[2, 2])
    difficulty_cpd = TabularCPD(
        variable='D',
        variable_card=2,
        values=[[0.6, 0.4]])
    intel_cpd = TabularCPD(
        variable='I',
        variable_card=2,
        values=[[0.7, 0.3]])
    letter_cpd = TabularCPD(
        variable='L',
        variable_card=2,
        values=[[0.1, 0.4, 0.99],
                [0.9, 0.6, 0.01]],
        evidence=['G'],
        evidence_card=[3])
    sat_cpd = TabularCPD(
        variable='S',
        variable_card=2,
        values=[[0.95, 0.2],
                [0.05, 0.8]],
        evidence=['I'],
        evidence_card=[2])
    student_model.add_cpds(grade_cpd, difficulty_cpd,
                           intel_cpd, letter_cpd,
                           sat_cpd)
Beispiel #21
0
 def test_remove_multiple_cpds_string(self):
     cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1))
     cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1))
     cpd3 = TabularCPD('grade',
                       2,
                       values=np.random.rand(2, 4),
                       evidence=['diff', 'intel'],
                       evidence_card=[2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd1, cpd2, cpd3)
     self.graph.remove_cpds('diff', 'grade')
     self.assertListEqual(self.graph.get_cpds(), [cpd2])
Beispiel #22
0
    def test_get_parameters_missing_data(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1)
        cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]])
        ]

        self.assertSetEqual(set(mle.get_parameters()), set(cpds))
Beispiel #23
0
    def test_check_model(self):
        cpd_g = TabularCPD(
            'g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]),
            ['d', 'i'], [2, 2])

        cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'],
                           2)

        cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'],
                           2)

        self.G.add_cpds(cpd_g, cpd_s, cpd_l)
        self.assertTrue(self.G.check_model())
Beispiel #24
0
 def setUp(self):
     self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'),
                             ('b', 'c')])
     self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
     diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]])
     intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]])
     grade_cpd = TabularCPD('grade',
                            3,
                            values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                    [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                            evidence=['diff', 'intel'],
                            evidence_card=[2, 3])
     self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
 def setUp(self):
     dbn_1 = DynamicBayesianNetwork()
     dbn_1.add_edges_from([(('Z', 0), ('X', 0)), (('Z', 0), ('Y', 0)),
                           (('Z', 0), ('Z', 1))])
     cpd_start_z_1 = TabularCPD(('Z', 0), 2, [[0.8, 0.2]])
     cpd_x_1 = TabularCPD(('X', 0), 2, [[0.9, 0.6], [0.1, 0.4]], [('Z', 0)],
                          2)
     cpd_y_1 = TabularCPD(('Y', 0), 2, [[0.7, 0.2], [0.3, 0.8]], [('Z', 0)],
                          2)
     cpd_trans_z_1 = TabularCPD(('Z', 1), 2, [[0.9, 0.1], [0.1, 0.9]],
                                [('Z', 0)], 2)
     dbn_1.add_cpds(cpd_start_z_1, cpd_trans_z_1, cpd_x_1, cpd_y_1)
     dbn_1.initialize_initial_state()
     self.dbn_inference_1 = DBNInference(dbn_1)
     dbn_2 = DynamicBayesianNetwork()
     dbn_2.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)),
                           (('Z', 0), ('Z', 1))])
     cpd_start_z_2 = TabularCPD(('Z', 0), 2, [[0.5, 0.5]])
     cpd_x_2 = TabularCPD(('X', 0), 2, [[0.6, 0.9], [0.4, 0.1]], [('Z', 0)],
                          2)
     cpd_y_2 = TabularCPD(('Y', 0), 2, [[0.2, 0.3], [0.8, 0.7]], [('X', 0)],
                          2)
     cpd_z_2 = TabularCPD(('Z', 1), 2, [[0.4, 0.7], [0.6, 0.3]], [('Z', 0)],
                          2)
     dbn_2.add_cpds(cpd_x_2, cpd_y_2, cpd_z_2, cpd_start_z_2)
     dbn_2.initialize_initial_state()
     self.dbn_inference_2 = DBNInference(dbn_2)
Beispiel #26
0
    def test_add_multiple_cpds(self):
        from pgmpy.factors import TabularCPD
        cpd_d = TabularCPD('d', 2, np.random.rand(2, 1))
        cpd_i = TabularCPD('i', 2, np.random.rand(2, 1))
        cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2])
        cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2)
        cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2)

        self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)
        self.assertEqual(self.G.get_cpds('d'), cpd_d)
        self.assertEqual(self.G.get_cpds('i'), cpd_i)
        self.assertEqual(self.G.get_cpds('g'), cpd_g)
        self.assertEqual(self.G.get_cpds('l'), cpd_l)
        self.assertEqual(self.G.get_cpds('s'), cpd_s)
Beispiel #27
0
    def get_parameters(self, prior='dirichlet', **kwargs):
        """
        Method for getting all the learned CPDs of the model.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if prior == 'dirichlet' and 'alpha' not in kwargs:
            alpha = {node: [1] * (self.node_card[node] * (np.product([self.node_card[_node]
                                                                      for _node in self.model.predecessors(node)])
                                                          if self.model.predecessors(node) else 1))
                     for node in self.model.nodes()}
        else:
            alpha = kwargs['alpha']

        parameters = []

        for node in self.model.nodes():
            if prior == 'dirichlet':
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    node_alpha = np.array(alpha[node])

                    values = (state_counts.values + node_alpha) / (state_counts.values.sum() + node_alpha.sum())
                    cpd = TabularCPD(node, self.node_card[node], values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array([self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = (self.data.groupby([node] + self.model.predecessors(node)).size()).values
                    node_alpha = np.array(alpha[node])
                    
                    values = (state_counts + node_alpha) / (state_counts.sum() + node_alpha.sum())
                    values = values.reshape(var_card, np.product(parent_card))
                    cpd = TabularCPD(node, var_card, values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

        return parameters
Beispiel #28
0
    def get_model(self):
        model = BayesianModel(self.get_edges())
        model.name = self.network_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            cpd = TabularCPD(var,
                             len(self.variable_states[var]),
                             values,
                             evidence=self.variable_parents[var],
                             evidence_card=[
                                 len(self.variable_states[evidence_var])
                                 for evidence_var in self.variable_parents[var]
                             ])
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for node, properties in self.variable_property.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                model.node[node][prop_name] = prop_value

        return model
Beispiel #29
0
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = BayesianModel(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values[
                'CARDINALITY'] if 'CARDINALITY' in values else []
            states = self.variables[var]['STATES']
            cpd = TabularCPD(var,
                             len(states),
                             cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)

        for var, properties in self.variables.items():
            model.node[var] = properties

        return model
Beispiel #30
0
    def setUp(self):
        edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'], ['dog-out', 'hear-bark']]

        cpds = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                 [0.9, 0.1, 0.3, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]),
            'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])
        }

        states = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }

        parents = {
            'bowel-problem': [],
            'dog-out': ['family-out', 'bowel-problem'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }

        properties = {
            'bowel-problem': ['position = (335, 99)'],
            'dog-out': ['position = (300, 195)'],
            'family-out': ['position = (257, 99)'],
            'hear-bark': ['position = (296, 268)'],
            'light-on': ['position = (218, 195)']
        }

        self.model = BayesianModel(edges)

        tabular_cpds = []
        for var in sorted(cpds.keys()):
            values = cpds[var]
            cpd = TabularCPD(var,
                             len(states[var]),
                             values,
                             evidence=parents[var],
                             evidence_card=[
                                 len(states[evidence_var])
                                 for evidence_var in parents[var]
                             ])
            tabular_cpds.append(cpd)
        self.model.add_cpds(*tabular_cpds)

        for node, properties in properties.items():
            for prop in properties:
                prop_name, prop_value = map(lambda t: t.strip(),
                                            prop.split('='))
                self.model.node[node][prop_name] = prop_value

        self.writer = BIFWriter(model=self.model)
Beispiel #31
0
    def get_model(self):
        """
        Returns the fitted bayesian model

        Example
        ----------
        >>> from pgmpy.readwrite import BIFReader
        >>> reader = BIFReader("bif_test.bif")
        >>> reader.get_model()
        <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320>
        """
        try:
            model = BayesianModel(self.variable_edges)
            model.name = self.network_name
            model.add_nodes_from(self.variable_names)

            tabular_cpds = []
            for var in sorted(self.variable_cpds.keys()):
                values = self.variable_cpds[var]
                cpd = TabularCPD(var, len(self.variable_states[var]), values,
                                 evidence=self.variable_parents[var],
                                 evidence_card=[len(self.variable_states[evidence_var])
                                                for evidence_var in self.variable_parents[var]])
                tabular_cpds.append(cpd)

            model.add_cpds(*tabular_cpds)
            for node, properties in self.variable_properties.items():
                for prop in properties:
                    prop_name, prop_value = map(lambda t: t.strip(), prop.split('='))
                    model.node[node][prop_name] = prop_value

            return model

        except AttributeError:
            raise AttributeError('First get states of variables, edges, parents and network name')
Beispiel #32
0
 def test_add_single_cpd(self):
     cpd = TabularCPD('grade',
                      2,
                      values=np.random.rand(2, 4),
                      evidence=['diff', 'intel'],
                      evidence_card=[2, 2])
     self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')])
     self.graph.add_cpds(cpd)
     self.assertListEqual(self.graph.get_cpds(), [cpd])
Beispiel #33
0
    def setUp(self):
        nodes = {'c': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(c) Brain Tumor',
                       'YPOS': '11935',
                       'XPOS': '15250',
                       'TYPE': 'discrete'},
                 'a': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(a) Metastatic Cancer',
                       'YPOS': '10465',
                       'XPOS': '13495',
                       'TYPE': 'discrete'},
                 'b': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(b) Serum Calcium Increase',
                       'YPOS': '11965',
                       'XPOS': '11290',
                       'TYPE': 'discrete'},
                 'e': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(e) Papilledema',
                       'YPOS': '13240',
                       'XPOS': '17305',
                       'TYPE': 'discrete'},
                 'd': {'STATES': ['Present', 'Absent'],
                       'DESCRIPTION': '(d) Coma',
                       'YPOS': '12985',
                       'XPOS': '13960',
                       'TYPE': 'discrete'}}
        model = BayesianModel([('b', 'd'), ('a', 'b'), ('a', 'c'), ('c', 'd'), ('c', 'e')])
        cpd_distribution = {'a': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8]])},
                            'e': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]},
                            'b': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'c': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8],
                                                                        [0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
                            'd': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2],
                                                                        [0.9, 0.1],
                                                                        [0.7, 0.3],
                                                                        [0.05, 0.95]]), 'CONDSET': ['b', 'c'], 'CARDINALITY': [2, 2]}}

        tabular_cpds = []
        for var, values in cpd_distribution.items():
            evidence = values['CONDSET'] if 'CONDSET' in values else []
            cpd = values['DPIS']
            evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else []
            states = nodes[var]['STATES']
            cpd = TabularCPD(var, len(states), cpd,
                             evidence=evidence,
                             evidence_card=evidence_card)
            tabular_cpds.append(cpd)
        model.add_cpds(*tabular_cpds)

        for var, properties in nodes.items():
            model.node[var] = properties

        self.maxDiff = None
        self.writer = XMLBeliefNetwork.XBNWriter(model=model)
Beispiel #34
0
    def setUp(self):
        self.maxDiff = None
        edges = [['family-out', 'dog-out'],
                 ['bowel-problem', 'dog-out'],
                 ['family-out', 'light-on'],
                 ['dog-out', 'hear-bark']]
        cpds = {'bowel-problem': np.array([[0.01],
                                           [0.99]]),
                'dog-out': np.array([[0.99, 0.01, 0.97, 0.03],
                                     [0.9, 0.1, 0.3, 0.7]]),
                'family-out': np.array([[0.15],
                                        [0.85]]),
                'hear-bark': np.array([[0.7, 0.3],
                                       [0.01, 0.99]]),
                'light-on': np.array([[0.6, 0.4],
                                      [0.05, 0.95]])}
        states = {'bowel-problem': ['true', 'false'],
                  'dog-out': ['true', 'false'],
                  'family-out': ['true', 'false'],
                  'hear-bark': ['true', 'false'],
                  'light-on': ['true', 'false']}
        parents = {'bowel-problem': [],
                   'dog-out': ['family-out', 'bowel-problem'],
                   'family-out': [],
                   'hear-bark': ['dog-out'],
                   'light-on': ['family-out']}

        self.bayesmodel = BayesianModel(edges)

        tabular_cpds = []
        for var, values in cpds.items():
            cpd = TabularCPD(var, len(states[var]), values,
                             evidence=parents[var],
                             evidence_card=[len(states[evidence_var])
                                            for evidence_var in parents[var]])
            tabular_cpds.append(cpd)
        self.bayesmodel.add_cpds(*tabular_cpds)
        self.bayeswriter = UAIWriter(self.bayesmodel)

        edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')}
        self.markovmodel = MarkovModel(edges)
        tables = [(['var_0', 'var_1'],
                   ['4.000', '2.400', '1.000', '0.000']),
                  (['var_0', 'var_1', 'var_2'],
                   ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000',
                    '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])]
        domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'}
        factors = []
        for table in tables:
            variables = table[0]
            cardinality = [int(domain[var]) for var in variables]
            values = list(map(float, table[1]))
            factor = Factor(variables, cardinality, values)
            factors.append(factor)
        self.markovmodel.add_factors(*factors)
        self.markovwriter = UAIWriter(self.markovmodel)
Beispiel #35
0
            def score_cpd(aid1, aid2):
                semtype = 'score'
                evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2]
                evidence_cpds = [var2_cpd[key] for key in evidence]
                evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds]
                evidence_card = list(map(len, evidence_nice))
                evidence_states = list(ut.iprod(*evidence_nice))
                variable_basis = semtype2_nice[semtype]

                variable_values = []
                for mystate in variable_basis:
                    row = []
                    for state in evidence_states:
                        if state[0] == state[1]:
                            if state[2] == 'same':
                                val = .2 if mystate == 'low' else .8
                            else:
                                val = 1
                                # val = .5 if mystate == 'low' else .5
                        elif state[0] != state[1]:
                            if state[2] == 'same':
                                val = .5 if mystate == 'low' else .5
                            else:
                                val = 1
                                # val = .9 if mystate == 'low' else .1
                        row.append(val)
                    variable_values.append(row)

                cpd = TabularCPD(
                    variable='S' + aid1 + aid2,
                    variable_card=len(variable_basis),
                    values=variable_values,
                    evidence=evidence,  # [::-1],
                    evidence_card=evidence_card)  # [::-1])
                cpd.semtype = semtype
                return cpd
Beispiel #36
0
    def get_parameters(self):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        parameters = []

        for node in self.model.nodes():
            parents = self.model.get_parents(node)
            if not parents:
                state_counts = self.data.ix[:, node].value_counts()
                state_counts = state_counts.reindex(sorted(state_counts.index))
                cpd = TabularCPD(node, self.node_card[node],
                                 state_counts.values[:, np.newaxis])
                cpd.normalize()
                parameters.append(cpd)
            else:
                parent_card = np.array([self.node_card[parent] for parent in parents])
                var_card = self.node_card[node]
                values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0)
                cpd = TabularCPD(node, var_card, np.array(values),
                                 evidence=parents,
                                 evidence_card=parent_card.astype('int'))
                cpd.normalize()
                parameters.append(cpd)

        return parameters
Beispiel #37
0
    def get_parameters(self, **kwargs):
        """
        Method used to get parameters.

        Returns
        -------
        parameters: list
            List containing all the parameters. For Bayesian Model it would be list of CPDs'
            for Markov Model it would be a list of factors

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> from pgmpy.estimators import MaximumLikelihoodEstimator
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> estimator = MaximumLikelihoodEstimator(model, values)
        >>> estimator.get_parameters()
        """
        if isinstance(self.model, BayesianModel):
            parameters = []

            for node in self.model.nodes():
                parents = self.model.get_parents(node)
                if not parents:
                    state_counts = self.data.ix[:, node].value_counts()
                    cpd = TabularCPD(node, self.node_card[node],
                                     state_counts.values[:, np.newaxis])
                    cpd.normalize()
                    parameters.append(cpd)
                else:
                    parent_card = np.array([self.node_card[parent] for parent in parents])
                    var_card = self.node_card[node]
                    state_counts = self.data.groupby([node] + self.model.predecessors(node)).size()
                    values = state_counts.values.reshape(var_card, np.product(parent_card))
                    cpd = TabularCPD(node, var_card, values,
                                     evidence=parents,
                                     evidence_card=parent_card.astype('int'))
                    cpd.normalize()
                    parameters.append(cpd)

            return parameters

        elif isinstance(self.model, MarkovModel):
            edges = self.model.edges()
            no_of_params = [self.node_card[u] * self.node_card[v] for u, v in edges]
            constants = []
            for u, v in edges:
                value_counts = self.data.groupby([u, v]).size()
                constants.extend(value_counts.values)
            total_params = sum(no_of_params)
            constants = np.array(constants)

            no_of_params.insert(0, 0)
            param_cumsum = np.cumsum(no_of_params)

            def optimize_fun(params):
                factors = []
                for index in range(len(edges)):
                    u, v = edges[index][0], edges[index][1]
                    factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]],
                                          params[param_cumsum[index]: param_cumsum[index + 1]]))
                Z = sum(factor_product(*factors).values)
                return Z - sum(constants * params)

            mini = minimize(optimize_fun, x0=[1]*total_params)
            final_params = mini.x
            score = mini.fun

            factors = []
            for index in range(len(edges)):
                u, v = edges[index][0], edges[index][1]
                factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]],
                                      final_params[param_cumsum[index]: param_cumsum[index + 1]]))

            if 'score' in kwargs and kwargs['score']:
                return factors, score
            else:
                return factors