def test_state_names1(self):
     m = BayesianModel([('A', 'B')])
     d = pd.DataFrame(data={'A': [2, 3, 8, 8, 8], 'B': ['X', 'O', 'X', 'O', 'X']})
     cpd_b = TabularCPD('B', 2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                        evidence=['A'], evidence_card=[3])
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd('B'), cpd_b)
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(
         self.m1,
         self.d1,
         state_names={
             "A": [0, 1, 23],
             "B": [0, 1],
             "C": [0, 42, 1],
             1: [2]
         },
     )
     cpds = [
         TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]),
         TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD(
             "C",
             3,
             [
                 [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
             ],
             evidence=["A", "B"],
             evidence_card=[3, 2],
         ),
     ]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
 def setUp(self):
     self.m1 = BayesianModel([("A", "C"), ("B", "C")])
     self.d1 = pd.DataFrame(data={
         "A": [0, 0, 1],
         "B": [0, 1, 0],
         "C": [1, 1, 0]
     })
     self.d2 = pd.DataFrame(
         data={
             "A": [0, np.NaN, 1],
             "B": [0, 1, 0],
             "C": [1, 1, np.NaN],
             "D": [np.NaN, "Y", np.NaN],
         })
     self.cpds = [
         TabularCPD("A", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD(
             "C",
             2,
             [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
             evidence=["A", "B"],
             evidence_card=[2, 2],
         ),
     ]
     self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)
 def test_class_init(self):
     mle = MaximumLikelihoodEstimator(self.m1,
                                      self.d1,
                                      state_names={
                                          "A": [0, 1],
                                          "B": [0, 1],
                                          "C": [0, 1]
                                      })
     self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                      state_names={'A': [0, 1, 23], 'B': [0, 1], 'C': [0, 42, 1], 1: [2]})
     cpds = [TabularCPD('A', 3, [[2.0/3], [1.0/3], [0]]),
             TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
             TabularCPD('C', 3, [[0.0, 0.0, 1.0, 1.0/3, 1.0/3, 1.0/3],
                                 [1.0, 1.0, 0.0, 1.0/3, 1.0/3, 1.0/3],
                                 [0.0, 0.0, 0.0, 1.0/3, 1.0/3, 1.0/3]],
                        evidence=['A', 'B'], evidence_card=[3, 2])]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
 def test_state_names2(self):
     m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
     d = pd.DataFrame(data={'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
                            'Light?': [True,   True,   False,   False,    True],
                            'Color': ['red',   'green', 'black', 'black',  'yellow']})
     color_cpd = TabularCPD('Color', 4, [[1, 0, 1, 0], [0, 0.5, 0, 0],
                                         [0, 0.5, 0, 0], [0, 0, 0, 1]],
                            evidence=['Fruit', 'Light?'], evidence_card=[2, 2])
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)
 def test_state_names1(self):
     m = BayesianModel([('A', 'B')])
     d = pd.DataFrame(data={
         'A': [2, 3, 8, 8, 8],
         'B': ['X', 'O', 'X', 'O', 'X']
     })
     cpd_b = TabularCPD('B',
                        2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                        evidence=['A'],
                        evidence_card=[3])
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd('B'), cpd_b)
Ejemplo n.º 8
0
    def setup(self):
        self.alarm = get_example_model('alarm')
        self.alarm_model = BayesianNetwork(self.alarm.edges())
        self.alarm_est = MaximumLikelihoodEstimator(
            self.alarm_model, self.alarm.simulate(int(1e4),
                                                  show_progress=False))

        self.munin = get_example_model('munin1')
        self.munin_model = BayesianNetwork(self.munin.edges())
        self.munin_est = MaximumLikelihoodEstimator(
            self.munin_model, self.munin.simulate(int(1e4),
                                                  show_progress=False))
Ejemplo n.º 9
0
    def test_get_parameters_missing_data(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1)
        cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]])
        ]

        self.assertSetEqual(set(mle.get_parameters()), set(cpds))
    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds1 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[2./3], [1./3]]),
                     TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=True)
        cpds2 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[0.5], [0.5]]),
                     TabularCPD('C', 2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))
 def test_state_names2(self):
     m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
     d = pd.DataFrame(
         data={
             'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
             'Light?': [True, True, False, False, True],
             'Color': ['red', 'green', 'black', 'black', 'yellow']
         })
     color_cpd = TabularCPD(
         'Color',
         4, [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
         evidence=['Fruit', 'Light?'],
         evidence_card=[2, 2])
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)
 def test_state_names1(self):
     m = BayesianModel([("A", "B")])
     d = pd.DataFrame(data={
         "A": [2, 3, 8, 8, 8],
         "B": ["X", "O", "X", "O", "X"]
     })
     cpd_b = TabularCPD(
         "B",
         2,
         [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
         evidence=["A"],
         evidence_card=[3],
     )
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd("B"), cpd_b)
 def test_state_names2(self):
     m = BayesianModel([("Light?", "Color"), ("Fruit", "Color")])
     d = pd.DataFrame(
         data={
             "Fruit": ["Apple", "Apple", "Apple", "Banana", "Banana"],
             "Light?": [True, True, False, False, True],
             "Color": ["red", "green", "black", "black", "yellow"],
         })
     color_cpd = TabularCPD(
         "Color",
         4,
         [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
         evidence=["Fruit", "Light?"],
         evidence_card=[2, 2],
     )
     mle2 = MaximumLikelihoodEstimator(m, d)
     self.assertEqual(mle2.estimate_cpd("Color"), color_cpd)
 def setUp(self):
     self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
     self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
     self.d2 = pd.DataFrame(data={'A': [0, np.NaN, 1], 'B': [0, 1, 0], 'C': [1, 1, np.NaN], 'D': [np.NaN, 'Y', np.NaN]})
     self.cpds = [TabularCPD('A', 2, [[2.0/3], [1.0/3]]),
                  TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
                  TabularCPD('C', 2, [[0.0, 0.0, 1.0, 0.5],
                                      [1.0, 1.0, 0.0, 0.5]],
                             evidence=['A', 'B'], evidence_card=[2, 2])]
     self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(self.m1,
                                      self.d1,
                                      state_names={
                                          'A': [0, 1, 23],
                                          'B': [0, 1],
                                          'C': [0, 42, 1],
                                          1: [2]
                                      })
     cpds = [
         TabularCPD('A', 3, [[2.0 / 3], [1.0 / 3], [0]]),
         TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD('C',
                    3, [[0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                        [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                        [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3]],
                    evidence=['A', 'B'],
                    evidence_card=[3, 2])
     ]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
Ejemplo n.º 16
0
class TimeMLE:
    timeout = 600

    def setup(self):
        self.alarm = get_example_model('alarm')
        self.alarm_model = BayesianNetwork(self.alarm.edges())
        self.alarm_est = MaximumLikelihoodEstimator(
            self.alarm_model, self.alarm.simulate(int(1e4),
                                                  show_progress=False))

        self.munin = get_example_model('munin1')
        self.munin_model = BayesianNetwork(self.munin.edges())
        self.munin_est = MaximumLikelihoodEstimator(
            self.munin_model, self.munin.simulate(int(1e4),
                                                  show_progress=False))

    def time_alarm_mle(self):
        self.alarm_est.get_parameters()

    def time_munin_mle(self):
        self.munin_est.get_parameters()
Ejemplo n.º 17
0
    def build_BN(self):
        '''
        The BayesianModel present a method to create a BN.
        Note:A-Altitude; C-Cropland; DNR-Distance to nature reserve; DRD-Distance to road; DRW-Distance to railway; DT-Distance to township; DW-Distance to water body; ES-Eco-environmental sensitivity; G-Gradient; IE-Importance of ecosystem services; IW-Importance of water conservation; R-Relief amplitude; SD-Soil erosion degree; LC-Urban encroachment; ELP-ERA potential
        :return:The result for JTA.
        '''
        data = pd.DataFrame(data=self.Bayesdata)
        model = BayesianModel([
            ('A', 'ES'),
            ('R', 'ES'),
            ('DW', 'ES'),
            ('SD', 'ES'),
            ('IW', 'IE'),
            ('DNR', 'IE'),
            ('G', 'IE'),
            ('DRW', 'LC'),
            ('DT', 'LC'),
            ('DRD', 'LC'),
            ('G', 'C'),
            ('DW', 'C'),
            ('ES', 'ELP'),
            ('IE', 'ELP'),
            ('LC', 'ELP'),
            ('C', 'ELP'),
        ])
        mle_model = MaximumLikelihoodEstimator(model, data)

        alldict = {}
        for i in range(len(self.Bayesfields)):
            locals()['cpd_' +
                     str(self.Bayesfields[i])] = mle_model.estimate_cpd(
                         self.Bayesfields[i])
            arcpy.AddMessage(str(locals()['cpd_' + str(self.Bayesfields[i])]))
            values = locals()['cpd_' +
                              str(self.Bayesfields[i])].get_values().tolist()
            dic = locals()['cpd_' + str(self.Bayesfields[i])].returndic()
            model.add_cpds(locals()['cpd_' + str(self.Bayesfields[i])])
            alldict[str(self.Bayesfields[i])] = dic

        return alldict
    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[0.5], [0.5]]),
            TabularCPD(
                "C",
                2,
                [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))
 def fit(self, X):
     # X, y = check_X_y(X, y)
     # self.classes_ = unique_labels(y)
     # # do color quant
     # data = np.hstack((np.array(y).reshape(-1, 1), X))
     # data = pd.DataFrame(data, columns=self.columns)
     # print(data)
     for node in tqdm(self.columns):
         MaximumLikelihoodEstimator(self.model, data).estimate_cpd(node)
     start_time = time.time()
     # self.model.fit(data, estimator=MaximumLikelihoodEstimator)
     self.fit_time = time.time() - start_time
     print(self.fit_time)
 def setUp(self):
     self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
     self.d1 = pd.DataFrame(data={
         'A': [0, 0, 1],
         'B': [0, 1, 0],
         'C': [1, 1, 0]
     })
     self.d2 = pd.DataFrame(
         data={
             'A': [0, np.NaN, 1],
             'B': [0, 1, 0],
             'C': [1, 1, np.NaN],
             'D': [np.NaN, 'Y', np.NaN]
         })
     self.cpds = [
         TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD('C',
                    2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                    evidence=['A', 'B'],
                    evidence_card=[2, 2])
     ]
     self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)
Ejemplo n.º 21
0
        [0.1, 0.2, 1, 1, 0.8, 0.9, 1, 1]
    ],  #p(~G)
    evidence=[
        'BrokeElectionLaw', 'PoliticallyMotivatedProsecutor', 'Indicted'
    ],
    evidence_card=[2, 2, 2])

cpd_j = TabularCPD(variable='Jailed',
                   variable_card=2,
                   values=[[0.9, 0.0], [0.1, 1.0]],
                   evidence=['FoundGuilty'],
                   evidence_card=[2])

#Associar os model aos nodos
election_model.add_cpds(cpd_b, cpd_i, cpd_m, cpd_g, cpd_j)

#Verificar as independencias
print(election_model.get_independencies())

samples = BayesianModelSampling(election_model).forward_sample(size=int(1e5))
samples.head()

#Mostrar estimativas
mle = MaximumLikelihoodEstimator(model=election_model, data=samples)
print("\nEstimating the CPD for a single node.\n")
print(mle.estimate_cpd(node='BrokeElectionLaw'))
print(mle.estimate_cpd(node='PoliticallyMotivatedProsecutor'))
print(mle.estimate_cpd(node='Indicted'))
print(mle.estimate_cpd(node='FoundGuilty'))
print(mle.estimate_cpd(node='Jailed'))
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.d2 = pd.DataFrame(data={'A': [0, np.NaN, 1], 'B': [0, 1, 0], 'C': [1, 1, np.NaN], 'D': [np.NaN, 'Y', np.NaN]})
        self.cpds = [TabularCPD('A', 2, [[2.0/3], [1.0/3]]),
                     TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
                     TabularCPD('C', 2, [[0.0, 0.0, 1.0, 0.5],
                                         [1.0, 1.0, 0.0, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd('A'), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd('B'), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd('C'), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([('A', 'B')])
        d = pd.DataFrame(data={'A': [2, 3, 8, 8, 8], 'B': ['X', 'O', 'X', 'O', 'X']})
        cpd_b = TabularCPD('B', 2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                           evidence=['A'], evidence_card=[3])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('B'), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
        d = pd.DataFrame(data={'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
                               'Light?': [True,   True,   False,   False,    True],
                               'Color': ['red',   'green', 'black', 'black',  'yellow']})
        color_cpd = TabularCPD('Color', 4, [[1, 0, 1, 0], [0, 0.5, 0, 0],
                                            [0, 0.5, 0, 0], [0, 0, 0, 1]],
                               evidence=['Fruit', 'Light?'], evidence_card=[2, 2])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                         state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]})
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                         state_names={'A': [0, 1, 23], 'B': [0, 1], 'C': [0, 42, 1], 1: [2]})
        cpds = [TabularCPD('A', 3, [[2.0/3], [1.0/3], [0]]),
                TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
                TabularCPD('C', 3, [[0.0, 0.0, 1.0, 1.0/3, 1.0/3, 1.0/3],
                                    [1.0, 1.0, 0.0, 1.0/3, 1.0/3, 1.0/3],
                                    [0.0, 0.0, 0.0, 1.0/3, 1.0/3, 1.0/3]],
                           evidence=['A', 'B'], evidence_card=[3, 2])]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds1 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[2./3], [1./3]]),
                     TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=True)
        cpds2 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[0.5], [0.5]]),
                     TabularCPD('C', 2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
 def test_class_init(self):
     mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                      state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]})
     self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))
# State counts
print(" −−−−−−−−−−− State counts −−−−−−−−−−− ")
from pgmpy.estimators import ParameterEstimator

pe = ParameterEstimator(model, data)
print("\n", pe.state_counts('fruit'))  # unconditional
print(" −−−−−−−−−−−−−−−−−−−−−− ")
print("\n", pe.state_counts('tasty'))  # conditional on fruit and size
print(" −−−−−−−−−−−−−−−−−−−−−− ")

# Maximum Likelihood Estimation
print("−−−−− Maximum Likelihood Estimation −−−−−−−−−−−−−−")
from pgmpy.estimators import MaximumLikelihoodEstimator

mle = MaximumLikelihoodEstimator(model, data)
print(mle.estimate_cpd("fruit"))  # unconditional
print(" −−−−−−−−−−−−−−−−−−−−−− ")
print(mle.estimate_cpd("tasty"))  # conditional
print(" −−−−−−−−−−−−−−−−−−−−−− ")

# Calibrate all CPDs of ‘model' using MLE:
model.fit(data, estimator=MaximumLikelihoodEstimator)
# Bayesian Parameter Estimation
print("−−−−− Bayesian Parameter Estimation −−−−−−−−−−−−−−")
from pgmpy.estimators import BayesianEstimator

est = BayesianEstimator(model, data)
print(est.estimate_cpd("tasty", prior_type="BDeu", equivalent_sample_size=10))
print(" −−−−−−−−−−−−−−−−−−−−−− ")
    return links

links = CreateLinks(data_columns)
model = BayesianModel(links)

pe = ParameterEstimator(model, data)

# Print ParameterEstimator unconditional
pe_symptom1 = pe.state_counts('Symptom_1')
print(pe_symptom1)

# Print ParameterEstimator conditional disease
pe_disease = pe.state_counts('Disease')
print(pe_disease)

mle = MaximumLikelihoodEstimator(model, data)

# Print MaximumLikelihoodEstimator unconditional
mle_symptom1 = mle.estimate_cpd('Symptom_1')
print(mle_symptom1)

# Print MaximumLikelihoodEstimator conditional
#mle_disease = mle.estimate_cpd('Disease')
#print(mle_disease)

# Calibrate all CPDs of `model` using MLE:
model.fit(data, estimator=MaximumLikelihoodEstimator)

est = BayesianEstimator(model, data)
est_disease = est.estimate_cpd('Disease', prior_type='BDeu', equivalent_sample_size=10)
print(est_disease)
Ejemplo n.º 26
0
def parameter_learning(model, df, methodtype='bayes', verbose=3):
    '''

    Parameters
    ----------
    model       : [DICT] Contains model and adjmat.

    df          : [pd.DataFrame] Pandas DataFrame containing the data
                   f1  ,f2  ,f3
                s1 0   ,0   ,1
                s2 0   ,1   ,0
                s3 1   ,1   ,0

    methodtype  : [STRING] strategy for parameter learning.
                'nl' or 'maximumlikelihood' (default) :Learning CPDs using Maximum Likelihood Estimators
                'bayes' :Bayesian Parameter Estimation

    verbose     : [INT] Print messages to screen.
                0: NONE
                1: ERROR
                2: WARNING
                3: INFO (default)
                4: DEBUG
                5: TRACE

    Returns
    -------
    model


    Parameter learning is the task to estimate the values of the conditional 
    probability distributions (CPDs), for the variables cloudy, sprinkler, rain and wet grass. 
    State counts
        To make sense of the given data, we can start by counting how often each state of the variable occurs. 
        If the variable is dependent on parents, the counts are done conditionally on the parents states, 
        i.e. for seperately for each parent configuration:
    '''

    #    model = BayesianModel([('Cloudy', 'Sprinkler'),
    #                           ('Cloudy', 'Rain'),
    #                           ('Sprinkler', 'Wet_Grass'),
    #                           ('Rain', 'Wet_Grass')])

    config = dict()
    config['verbose'] = verbose
    config['method'] = methodtype
    model = model['model']
    if verbose >= 3:
        print('[BNLEARN][PARAMETER LEARNING] Computing parameters using [%s]' %
              (config['method']))

    #    pe = ParameterEstimator(model, df)
    #    print("\n", pe.state_counts('Cloudy'))
    #    print("\n", pe.state_counts('Sprinkler'))
    '''
    Maximum Likelihood Estimation
        A natural estimate for the CPDs is to simply use the *relative frequencies*, 
        with which the variable states have occured. We observed x cloudy` among a total of `all clouds`, 
        so we might guess that about `50%` of `cloudy` are `sprinkler or so.
        According to MLE, we should fill the CPDs in such a way, that $P(\text{data}|\text{model})$ is maximal. 
        This is achieved when using the *relative frequencies*.

    While very straightforward, the ML estimator has the problem of *overfitting* to the data. 
    If the observed data is not representative for the underlying distribution, ML estimations will be extremly far off. 
    When estimating parameters for Bayesian networks, lack of data is a frequent problem. 
    Even if the total sample size is very large, the fact that state counts are done conditionally 
    for each parents configuration causes immense fragmentation. 
    If a variable has 3 parents that can each take 10 states, then state counts will 
    be done seperately for `10^3 = 1000` parents configurations. 
    This makes MLE very fragile and unstable for learning Bayesian Network parameters. 
    A way to mitigate MLE's overfitting is *Bayesian Parameter Estimation*.
    '''

    # Learning CPDs using Maximum Likelihood Estimators
    if config['method'] == 'ml' or config['method'] == 'maximumlikelihood':
        mle = MaximumLikelihoodEstimator(model, df)
        for node in mle.state_names:
            print(mle.estimate_cpd(node))
    '''
    Bayesian Parameter Estimation
        The Bayesian Parameter Estimator starts with already existing prior CPDs, 
        that express our beliefs about the variables *before* the data was observed. 
        Those "priors" are then updated, using the state counts from the observed data. 
    
    One can think of the priors as consisting in *pseudo state counts*, that are added 
    to the actual counts before normalization. Unless one wants to encode specific beliefs 
    about the distributions of the variables, one commonly chooses uniform priors, 
    i.e. ones that deem all states equiprobable.
    
    A very simple prior is the so-called *K2* prior, which simply adds `1` to the count of every single state.
    A somewhat more sensible choice of prior is *BDeu* (Bayesian Dirichlet equivalent uniform prior). 
    For BDeu we need to specify an *equivalent sample size* `N` and then the pseudo-counts are 
    the equivalent of having observed `N` uniform samples of each variable (and each parent configuration).
    '''
    if config['method'] == 'bayes':
        model.fit(
            df,
            estimator=BayesianEstimator,
            prior_type="BDeu",
            equivalent_sample_size=1000)  # default equivalent_sample_size=5

        for cpd in model.get_cpds():
            if verbose >= 3:
                print("CPD of {variable}:".format(variable=cpd.variable))
            if verbose >= 3: print(cpd)

    return (model)
Ejemplo n.º 27
0
#存在边
# eg:('X8', 'S8')代表存在从X8  to   S8的一条边
model = BayesianModel([
    ('X8', 'S8'), ('X9', 'S9'), ('X1', 'S10'), ('X1', 'S12'), ('X2', 'S10'),
    ('X2', 'S11'), ('X2', 'S13'), ('X3', 'S14'), ('X4', 'S15'), ('X5', 'S16'),
    ('X6', 'S17'), ('X6', 'S7'), ('X7', 'S7'), ('S10', 'S1'), ('S11', 'S1'),
    ('S12', 'S2'), ('S13', 'S2'), ('X2', 'S3'), ('S14', 'S4'), ('S15', 'S4'),
    ('S16', 'S4'), ('S17', 'S4'), ('S8', 'S5'), ('S9', 'S5'), ('S1', 'S6'),
    ('S2', 'S6'), ('S3', 'S6'), ('S4', 'S6'), ('S5', 'T'), ('S6', 'T'),
    ('S7', 'T')
])

pe = ParameterEstimator(model, data)
# print("\n", pe.state_counts('S1'))
'''对模型和数据进行   极大似然估计  train'''
mle = MaximumLikelihoodEstimator(model, data)

# print("\n", mle.estimate_cpd('S1'))
# print("\n", mle.estimate_cpd('T'))  # 在fruit和size的条件下,tasty的概率分布

mle.get_parameters()
model.fit(data, estimator=MaximumLikelihoodEstimator)
#查看各个节点之间的概率分布
'''
print(model.get_cpds('S1'))
print(model.get_cpds('S7'))
print(model.get_cpds('T'))
'''
'''变量估计'''
infer = VariableElimination(model)
#输出infer的数据类型
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={
            'A': [0, 0, 1],
            'B': [0, 1, 0],
            'C': [1, 1, 0]
        })
        self.d2 = pd.DataFrame(
            data={
                'A': [0, np.NaN, 1],
                'B': [0, 1, 0],
                'C': [1, 1, np.NaN],
                'D': [np.NaN, 'Y', np.NaN]
            })
        self.cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd('A'), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd('B'), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd('C'), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([('A', 'B')])
        d = pd.DataFrame(data={
            'A': [2, 3, 8, 8, 8],
            'B': ['X', 'O', 'X', 'O', 'X']
        })
        cpd_b = TabularCPD('B',
                           2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                           evidence=['A'],
                           evidence_card=[3])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('B'), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
        d = pd.DataFrame(
            data={
                'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
                'Light?': [True, True, False, False, True],
                'Color': ['red', 'green', 'black', 'black', 'yellow']
            })
        color_cpd = TabularCPD(
            'Color',
            4, [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
            evidence=['Fruit', 'Light?'],
            evidence_card=[2, 2])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             'A': [0, 1],
                                             'B': [0, 1],
                                             'C': [0, 1]
                                         })
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             'A': [0, 1, 23],
                                             'B': [0, 1],
                                             'C': [0, 42, 1],
                                             1: [2]
                                         })
        cpds = [
            TabularCPD('A', 3, [[2.0 / 3], [1.0 / 3], [0]]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       3, [[0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                           [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                           [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3]],
                       evidence=['A', 'B'],
                       evidence_card=[3, 2])
        ]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={'C': [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD('A', 2, [[0.5], [0.5]]),
            TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
            TabularCPD('C',
                       2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={'C': [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD('A', 2, [[0.5], [0.5]]),
            TabularCPD('B', 2, [[0.5], [0.5]]),
            TabularCPD('C',
                       2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
Ejemplo n.º 29
0
passive_users = "passive, "*24

active_users = [elem for elem in active_users.strip().split(",") if elem != '']
passive_users = [elem for elem in passive_users.strip().split(",") if elem != '']

data = pd.DataFrame(data = {'last_activity' : high + medium + low, 'duration': dhigh + dmedium + dlow, 'pages_viewed': pvhigh + pvmedium + pvlow, 'user_type' : active_users + passive_users })

model = BayesianModel([ 
	('last_activity', 'duration'),
	('duration', 'pages_viewed'), 
	('pages_viewed', 'user_type')])

pe = ParameterEstimator(model, data)

#print("\n", pe.state_counts('last_activity'))  # unconditional
#print("\n", pe.state_counts('user_type'))  # conditional on fruit and size

mle = MaximumLikelihoodEstimator(model, data)
#print(mle.estimate_cpd('last_activity'))  # unconditional
#print(mle.estimate_cpd('user_type'))  # conditional


# Calibrate all CPDs of `model` using MLE:
model.fit(data)

est = BayesianEstimator(model, data)

result = est.estimate_cpd('user_type', prior_type='BDeu', equivalent_sample_size=10)
import code
code.interact(local=locals())
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([("A", "C"), ("B", "C")])
        self.d1 = pd.DataFrame(data={
            "A": [0, 0, 1],
            "B": [0, 1, 0],
            "C": [1, 1, 0]
        })
        self.d2 = pd.DataFrame(
            data={
                "A": [0, np.NaN, 1],
                "B": [0, 1, 0],
                "C": [1, 1, np.NaN],
                "D": [np.NaN, "Y", np.NaN],
            })
        self.cpds = [
            TabularCPD("A", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd("A"), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd("B"), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd("C"), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([("A", "B")])
        d = pd.DataFrame(data={
            "A": [2, 3, 8, 8, 8],
            "B": ["X", "O", "X", "O", "X"]
        })
        cpd_b = TabularCPD(
            "B",
            2,
            [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
            evidence=["A"],
            evidence_card=[3],
        )
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd("B"), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([("Light?", "Color"), ("Fruit", "Color")])
        d = pd.DataFrame(
            data={
                "Fruit": ["Apple", "Apple", "Apple", "Banana", "Banana"],
                "Light?": [True, True, False, False, True],
                "Color": ["red", "green", "black", "black", "yellow"],
            })
        color_cpd = TabularCPD(
            "Color",
            4,
            [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
            evidence=["Fruit", "Light?"],
            evidence_card=[2, 2],
        )
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd("Color"), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             "A": [0, 1],
                                             "B": [0, 1],
                                             "C": [0, 1]
                                         })
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(
            self.m1,
            self.d1,
            state_names={
                "A": [0, 1, 23],
                "B": [0, 1],
                "C": [0, 42, 1],
                1: [2]
            },
        )
        cpds = [
            TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                3,
                [
                    [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                    [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                    [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                ],
                evidence=["A", "B"],
                evidence_card=[3, 2],
            ),
        ]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[0.5], [0.5]]),
            TabularCPD(
                "C",
                2,
                [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
Ejemplo n.º 31
0
model = BayesianModel([("fruit", "tasty"), ("size", "tasty")])
est = ParameterEstimator(model, data)
a = est.state_counts
#Write your code below
fruit_counts = a("fruit")
size_counts = a("size")
tasty_counts = a("tasty")
#write you output to csv
tasty_counts.to_csv('/code/output/output1.csv')

#Question2

#create a Bayesian Model and generate CPD using MLE
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()
#Write your code
fruit_cpd = cpds[0]
size_cpd = cpds[1]
tasty_cpd = cpds[2]
print(tasty_cpd)
#write cpd of tasty to csv
res = pd.DataFrame(b)
res.to_csv('/code/output/output2.csv', index=False, header=False)

#Question3
for i in range(0, 3):
    model.add_cpds(cpds[i])
#create a Bayesian model and run variable elimination algorithm on it
from pgmpy.models import BayesianModel
print(samples.head())

#Step 2: Define a model structure
# Defining the Bayesian Model structure

from pgmpy.models import BayesianModel

model_struct = BayesianModel(ebunch=alarm_model.edges())
print(model_struct.nodes())

#Step 3: Learning the model parameters
# Fitting the model using Maximum Likelihood Estimator

from pgmpy.estimators import MaximumLikelihoodEstimator

mle = MaximumLikelihoodEstimator(model=model_struct, data=samples)

# Estimating the CPD for a single node.
print(mle.estimate_cpd(node='FIO2'))
print(mle.estimate_cpd(node='CVP'))

# Estimating CPDs for all the nodes in the model
print(mle.get_parameters()[:10]) # Show just the first 10 CPDs in the output

# Verifying that the learned parameters are almost equal.
import numpy as np
print(np.allclose(alarm_model.get_cpds('FIO2').values, mle.estimate_cpd('FIO2').values, atol=0.01))

# Fitting the using Bayesian Estimator
from pgmpy.estimators import BayesianEstimator
    '\n',
    pe.state_counts('tasty'))  # example of conditional count of fruit and size

# %% markdown [markdown]
# Can see that as many apples as bananas were observed and that $5$ large bananas were tasty while the only small one was not.

# %% markdown [markdown]
# ### 2/ Maximum Likelihood Estimation
# A natural estimate for the CPDs is to use the *relative frequencies* (probabilities version of the state count table above). For instance we observed $7$ apples among a total of $14$ fruits, so we might guess that about half the fruits are apples.
#
# This approach is **Maximum Likelihood Estimation (MLE)**: this fills the CPDs in such a way that $P(\text{data} \; | \; \text{model})$ is maximumal, and this is achieved using the *relative frequencies*. The `mle.estimate_cpd(variable)` function computes the state counts and divides each cell by the (conditional) sample size.
# %% codecell

from pgmpy.estimators import MaximumLikelihoodEstimator

mle: MaximumLikelihoodEstimator = MaximumLikelihoodEstimator(model=fruitModel,
                                                             data=fruitData)

assert mle.state_names == {
    'fruit': ['apple', 'banana'],
    'tasty': ['no', 'yes'],
    'size': ['large', 'small']
}

estCPD_fruit: TabularCPD = mle.estimate_cpd('fruit')  # unconditional
print(estCPD_fruit)
estCPD_size: TabularCPD = mle.estimate_cpd("size")
print(estCPD_size)

estCPD_tasty: TabularCPD = mle.estimate_cpd('tasty')  # conditional
print(estCPD_tasty)
Ejemplo n.º 34
0
def fit(model, df, methodtype='bayes', verbose=3):
    """Learn the parameters given the DAG and data.

    Description
    -----------
    Maximum Likelihood Estimation
        A natural estimate for the CPDs is to simply use the *relative frequencies*,
        with which the variable states have occured. We observed x cloudy` among a total of `all clouds`,
        so we might guess that about `50%` of `cloudy` are `sprinkler or so.
        According to MLE, we should fill the CPDs in such a way, that $P(\text{data}|\text{model})$ is maximal.
        This is achieved when using the *relative frequencies*.

        While very straightforward, the ML estimator has the problem of *overfitting* to the data.
        If the observed data is not representative for the underlying distribution, ML estimations will be extremly far off.
        When estimating parameters for Bayesian networks, lack of data is a frequent problem.
        Even if the total sample size is very large, the fact that state counts are done conditionally
        for each parents configuration causes immense fragmentation.
        If a variable has 3 parents that can each take 10 states, then state counts will
        be done seperately for `10^3 = 1000` parents configurations.
        This makes MLE very fragile and unstable for learning Bayesian Network parameters.
        A way to mitigate MLE's overfitting is *Bayesian Parameter Estimation*.

    Bayesian Parameter Estimation
        The Bayesian Parameter Estimator starts with already existing prior CPDs,
        that express our beliefs about the variables *before* the data was observed.
        Those "priors" are then updated, using the state counts from the observed data.

        One can think of the priors as consisting in *pseudo state counts*, that are added
        to the actual counts before normalization. Unless one wants to encode specific beliefs
        about the distributions of the variables, one commonly chooses uniform priors,
        i.e. ones that deem all states equiprobable.

        A very simple prior is the so-called *K2* prior, which simply adds `1` to the count of every single state.
        A somewhat more sensible choice of prior is *BDeu* (Bayesian Dirichlet equivalent uniform prior).
        For BDeu we need to specify an *equivalent sample size* `N` and then the pseudo-counts are
        the equivalent of having observed `N` uniform samples of each variable (and each parent configuration).

    Parameters
    ----------
    model : dict
        Contains a model object with a key 'adjmat' (adjacency matrix).
    df : pd.DataFrame()
        Pandas DataFrame containing the data.
    methodtype : str, (default: 'bayes')
        strategy for parameter learning.
        Options are: 'ml' or 'maximumlikelihood' for learning CPDs using Maximum Likelihood Estimators. or 'bayes' for Bayesian Parameter Estimation.
    verbose : int, optional
        Print progress to screen. The default is 3.
            * 0: NONE
            * 1: ERROR
            * 2: WARNING
            * 3: INFO (default)
            * 4: DEBUG
            * 5: TRACE

    Returns
    -------
    dict with model.

    Examples
    --------
    >>> import bnlearn as bn
    >>>
    >>> df = bn.import_example()
    >>> model = bn.import_DAG('sprinkler', CPD=False)
    >>>
    >>> # Parameter learning
    >>> model_update = bn.parameter_learning.fit(model, df)
    >>> bn.plot(model_update)
    >>>
    >>> # LOAD BIF FILE
    >>> model = bn.import_DAG('alarm')
    >>> df = bn.sampling(model, n=1000)
    >>> model_update = bn.parameter_learning.fit(model, df)
    >>> G = bn.plot(model_update)

    """
    config = {}
    config['verbose'] = verbose
    config['method'] = methodtype
    adjmat = model['adjmat']

    # Check whether all labels in the adjacency matrix are included from the dataframe
    # adjmat, model = _check_adjmat(model, df)
    df = _filter_df(adjmat, df, verbose=config['verbose'])

    if config['verbose'] >= 3:
        print('[BNLEARN][PARAMETER LEARNING] Computing parameters using [%s]' %
              (config['method']))
    # Extract model
    if isinstance(model, dict):
        model = model['model']

    # Convert to BayesianModel
    if 'BayesianModel' not in str(type(model)):
        model = to_BayesianModel(adjmat, verbose=config['verbose'])

    # pe = ParameterEstimator(model, df)
    # print("\n", pe.state_counts('Cloudy'))
    # print("\n", pe.state_counts('Sprinkler'))

    # Learning CPDs using Maximum Likelihood Estimators
    if config['method'] == 'ml' or config['method'] == 'maximumlikelihood':
        # mle = MaximumLikelihoodEstimator(model, df)
        model = MaximumLikelihoodEstimator(model, df)
        for node in model.state_names:
            print(model.estimate_cpd(node))

    #  Learning CPDs using Bayesian Parameter Estimation
    if config['method'] == 'bayes':
        model.fit(df,
                  estimator=BayesianEstimator,
                  prior_type="BDeu",
                  equivalent_sample_size=1000)

        for cpd in model.get_cpds():
            if config['verbose'] >= 3:
                print("CPD of {variable}:".format(variable=cpd.variable))
            if config['verbose'] >= 3: print(cpd)

    out = {}
    out['model'] = model
    out['adjmat'] = adjmat
    out['config'] = config

    return (out)
Ejemplo n.º 35
0
for k,data in {file1:data1, file2:data2}.items():
    print('Using ' + k)

    if k==file1:
    	print('True network:')
    	print('Y <-- X --> Z')
    else:
    	print('True network:')
    	print('X --> Z <-- Y')


    est = HillClimbSearch(data, scoring_method = BicScore(data))
    best_model = est.estimate()

    MLE_estimator = MaximumLikelihoodEstimator(best_model, data)
    MLE_parameters = MLE_estimator.get_parameters()

    bay_estimator = BayesianEstimator(best_model, data)
    bay_parameters = bay_estimator.get_parameters()

    print('Learnt edges:')
    print(best_model.edges())

    # print('MLE Parameters')
    # for m in MLE_parameters:
    #     print(m)

    print('Bayesian Parameters')
    for b in bay_parameters:
        print(b)