def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[0.5], [0.5]]),
            TabularCPD(
                "C",
                2,
                [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))
    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds1 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[2./3], [1./3]]),
                     TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=True)
        cpds2 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[0.5], [0.5]]),
                     TabularCPD('C', 2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(
         self.m1,
         self.d1,
         state_names={
             "A": [0, 1, 23],
             "B": [0, 1],
             "C": [0, 42, 1],
             1: [2]
         },
     )
     cpds = [
         TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]),
         TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD(
             "C",
             3,
             [
                 [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                 [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
             ],
             evidence=["A", "B"],
             evidence_card=[3, 2],
         ),
     ]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
 def test_class_init(self):
     mle = MaximumLikelihoodEstimator(self.m1,
                                      self.d1,
                                      state_names={
                                          "A": [0, 1],
                                          "B": [0, 1],
                                          "C": [0, 1]
                                      })
     self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                      state_names={'A': [0, 1, 23], 'B': [0, 1], 'C': [0, 42, 1], 1: [2]})
     cpds = [TabularCPD('A', 3, [[2.0/3], [1.0/3], [0]]),
             TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
             TabularCPD('C', 3, [[0.0, 0.0, 1.0, 1.0/3, 1.0/3, 1.0/3],
                                 [1.0, 1.0, 0.0, 1.0/3, 1.0/3, 1.0/3],
                                 [0.0, 0.0, 0.0, 1.0/3, 1.0/3, 1.0/3]],
                        evidence=['A', 'B'], evidence_card=[3, 2])]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
Example #6
0
    def test_get_parameters_missing_data(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1)
        cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]])
        ]

        self.assertSetEqual(set(mle.get_parameters()), set(cpds))
Example #7
0
class TimeMLE:
    timeout = 600

    def setup(self):
        self.alarm = get_example_model('alarm')
        self.alarm_model = BayesianNetwork(self.alarm.edges())
        self.alarm_est = MaximumLikelihoodEstimator(
            self.alarm_model, self.alarm.simulate(int(1e4),
                                                  show_progress=False))

        self.munin = get_example_model('munin1')
        self.munin_model = BayesianNetwork(self.munin.edges())
        self.munin_est = MaximumLikelihoodEstimator(
            self.munin_model, self.munin.simulate(int(1e4),
                                                  show_progress=False))

    def time_alarm_mle(self):
        self.alarm_est.get_parameters()

    def time_munin_mle(self):
        self.munin_est.get_parameters()
 def test_nonoccurring_values(self):
     mle = MaximumLikelihoodEstimator(self.m1,
                                      self.d1,
                                      state_names={
                                          'A': [0, 1, 23],
                                          'B': [0, 1],
                                          'C': [0, 42, 1],
                                          1: [2]
                                      })
     cpds = [
         TabularCPD('A', 3, [[2.0 / 3], [1.0 / 3], [0]]),
         TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
         TabularCPD('C',
                    3, [[0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                        [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                        [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3]],
                    evidence=['A', 'B'],
                    evidence_card=[3, 2])
     ]
     self.assertSetEqual(set(mle.get_parameters()), set(cpds))
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([("A", "C"), ("B", "C")])
        self.d1 = pd.DataFrame(data={
            "A": [0, 0, 1],
            "B": [0, 1, 0],
            "C": [1, 1, 0]
        })
        self.d2 = pd.DataFrame(
            data={
                "A": [0, np.NaN, 1],
                "B": [0, 1, 0],
                "C": [1, 1, np.NaN],
                "D": [np.NaN, "Y", np.NaN],
            })
        self.cpds = [
            TabularCPD("A", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd("A"), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd("B"), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd("C"), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([("A", "B")])
        d = pd.DataFrame(data={
            "A": [2, 3, 8, 8, 8],
            "B": ["X", "O", "X", "O", "X"]
        })
        cpd_b = TabularCPD(
            "B",
            2,
            [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
            evidence=["A"],
            evidence_card=[3],
        )
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd("B"), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([("Light?", "Color"), ("Fruit", "Color")])
        d = pd.DataFrame(
            data={
                "Fruit": ["Apple", "Apple", "Apple", "Banana", "Banana"],
                "Light?": [True, True, False, False, True],
                "Color": ["red", "green", "black", "black", "yellow"],
            })
        color_cpd = TabularCPD(
            "Color",
            4,
            [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
            evidence=["Fruit", "Light?"],
            evidence_card=[2, 2],
        )
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd("Color"), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             "A": [0, 1],
                                             "B": [0, 1],
                                             "C": [0, 1]
                                         })
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(
            self.m1,
            self.d1,
            state_names={
                "A": [0, 1, 23],
                "B": [0, 1],
                "C": [0, 42, 1],
                1: [2]
            },
        )
        cpds = [
            TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                3,
                [
                    [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                    [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                    [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                ],
                evidence=["A", "B"],
                evidence_card=[3, 2],
            ),
        ]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD(
                "C",
                2,
                [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={"C": [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD("A", 2, [[0.5], [0.5]]),
            TabularCPD("B", 2, [[0.5], [0.5]]),
            TabularCPD(
                "C",
                2,
                [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                evidence=["A", "B"],
                evidence_card=[2, 2],
            ),
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
Example #10
0
est = ParameterEstimator(model, data)
a = est.state_counts
#Write your code below
fruit_counts = a("fruit")
size_counts = a("size")
tasty_counts = a("tasty")
#write you output to csv
tasty_counts.to_csv('/code/output/output1.csv')

#Question2

#create a Bayesian Model and generate CPD using MLE
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()
#Write your code
fruit_cpd = cpds[0]
size_cpd = cpds[1]
tasty_cpd = cpds[2]
print(tasty_cpd)
#write cpd of tasty to csv
res = pd.DataFrame(b)
res.to_csv('/code/output/output2.csv', index=False, header=False)

#Question3
for i in range(0, 3):
    model.add_cpds(cpds[i])
#create a Bayesian model and run variable elimination algorithm on it
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
model_struct = BayesianModel(ebunch=alarm_model.edges())
print(model_struct.nodes())

#Step 3: Learning the model parameters
# Fitting the model using Maximum Likelihood Estimator

from pgmpy.estimators import MaximumLikelihoodEstimator

mle = MaximumLikelihoodEstimator(model=model_struct, data=samples)

# Estimating the CPD for a single node.
print(mle.estimate_cpd(node='FIO2'))
print(mle.estimate_cpd(node='CVP'))

# Estimating CPDs for all the nodes in the model
print(mle.get_parameters()[:10]) # Show just the first 10 CPDs in the output

# Verifying that the learned parameters are almost equal.
import numpy as np
print(np.allclose(alarm_model.get_cpds('FIO2').values, mle.estimate_cpd('FIO2').values, atol=0.01))

# Fitting the using Bayesian Estimator
from pgmpy.estimators import BayesianEstimator

best = BayesianEstimator(model=model_struct, data=samples)

print(best.estimate_cpd(node='FIO2', prior_type="BDeu", equivalent_sample_size=1000))
# Uniform pseudo count for each state. Can also accept an array of the size of CPD.
print(best.estimate_cpd(node='CVP', prior_type="dirichlet", pseudo_counts=100))

# Learning CPDs for all the nodes in the model. For learning all parameters with BDeU prior, a dict of
for k,data in {file1:data1, file2:data2}.items():
    print('Using ' + k)

    if k==file1:
    	print('True network:')
    	print('Y <-- X --> Z')
    else:
    	print('True network:')
    	print('X --> Z <-- Y')


    est = HillClimbSearch(data, scoring_method = BicScore(data))
    best_model = est.estimate()

    MLE_estimator = MaximumLikelihoodEstimator(best_model, data)
    MLE_parameters = MLE_estimator.get_parameters()

    bay_estimator = BayesianEstimator(best_model, data)
    bay_parameters = bay_estimator.get_parameters()

    print('Learnt edges:')
    print(best_model.edges())

    # print('MLE Parameters')
    # for m in MLE_parameters:
    #     print(m)

    print('Bayesian Parameters')
    for b in bay_parameters:
        print(b)
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={
            'A': [0, 0, 1],
            'B': [0, 1, 0],
            'C': [1, 1, 0]
        })
        self.d2 = pd.DataFrame(
            data={
                'A': [0, np.NaN, 1],
                'B': [0, 1, 0],
                'C': [1, 1, np.NaN],
                'D': [np.NaN, 'Y', np.NaN]
            })
        self.cpds = [
            TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd('A'), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd('B'), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd('C'), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([('A', 'B')])
        d = pd.DataFrame(data={
            'A': [2, 3, 8, 8, 8],
            'B': ['X', 'O', 'X', 'O', 'X']
        })
        cpd_b = TabularCPD('B',
                           2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                           evidence=['A'],
                           evidence_card=[3])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('B'), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
        d = pd.DataFrame(
            data={
                'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
                'Light?': [True, True, False, False, True],
                'Color': ['red', 'green', 'black', 'black', 'yellow']
            })
        color_cpd = TabularCPD(
            'Color',
            4, [[1, 0, 1, 0], [0, 0.5, 0, 0], [0, 0.5, 0, 0], [0, 0, 0, 1]],
            evidence=['Fruit', 'Light?'],
            evidence_card=[2, 2])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             'A': [0, 1],
                                             'B': [0, 1],
                                             'C': [0, 1]
                                         })
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(self.m1,
                                         self.d1,
                                         state_names={
                                             'A': [0, 1, 23],
                                             'B': [0, 1],
                                             'C': [0, 42, 1],
                                             1: [2]
                                         })
        cpds = [
            TabularCPD('A', 3, [[2.0 / 3], [1.0 / 3], [0]]),
            TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]),
            TabularCPD('C',
                       3, [[0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                           [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3],
                           [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3]],
                       evidence=['A', 'B'],
                       evidence_card=[3, 2])
        ]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={'C': [0, 1]},
                                        complete_samples_only=False)
        cpds1 = set([
            TabularCPD('A', 2, [[0.5], [0.5]]),
            TabularCPD('B', 2, [[2. / 3], [1. / 3]]),
            TabularCPD('C',
                       2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1,
                                        self.d2,
                                        state_names={'C': [0, 1]},
                                        complete_samples_only=True)
        cpds2 = set([
            TabularCPD('A', 2, [[0.5], [0.5]]),
            TabularCPD('B', 2, [[0.5], [0.5]]),
            TabularCPD('C',
                       2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                       evidence=['A', 'B'],
                       evidence_card=[2, 2])
        ])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2
Example #14
0
    ('X6', 'S17'), ('X6', 'S7'), ('X7', 'S7'), ('S10', 'S1'), ('S11', 'S1'),
    ('S12', 'S2'), ('S13', 'S2'), ('X2', 'S3'), ('S14', 'S4'), ('S15', 'S4'),
    ('S16', 'S4'), ('S17', 'S4'), ('S8', 'S5'), ('S9', 'S5'), ('S1', 'S6'),
    ('S2', 'S6'), ('S3', 'S6'), ('S4', 'S6'), ('S5', 'T'), ('S6', 'T'),
    ('S7', 'T')
])

pe = ParameterEstimator(model, data)
# print("\n", pe.state_counts('S1'))
'''对模型和数据进行   极大似然估计  train'''
mle = MaximumLikelihoodEstimator(model, data)

# print("\n", mle.estimate_cpd('S1'))
# print("\n", mle.estimate_cpd('T'))  # 在fruit和size的条件下,tasty的概率分布

mle.get_parameters()
model.fit(data, estimator=MaximumLikelihoodEstimator)
#查看各个节点之间的概率分布
'''
print(model.get_cpds('S1'))
print(model.get_cpds('S7'))
print(model.get_cpds('T'))
'''
'''变量估计'''
infer = VariableElimination(model)
#输出infer的数据类型
# print(type(infer))
'''打印各个节点的概率'''
# for i in infer.query(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9'
#                       , 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10'
#                       , 'S11', 'S12', 'S13', 'S14', 'S15', 'S16', 'S17', 'T']).values():
 def test_class_init(self):
     mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                      state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]})
     self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))
class TestMLE(unittest.TestCase):
    def setUp(self):
        self.m1 = BayesianModel([('A', 'C'), ('B', 'C')])
        self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]})
        self.d2 = pd.DataFrame(data={'A': [0, np.NaN, 1], 'B': [0, 1, 0], 'C': [1, 1, np.NaN], 'D': [np.NaN, 'Y', np.NaN]})
        self.cpds = [TabularCPD('A', 2, [[2.0/3], [1.0/3]]),
                     TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
                     TabularCPD('C', 2, [[0.0, 0.0, 1.0, 0.5],
                                         [1.0, 1.0, 0.0, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])]
        self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)

    def test_get_parameters_incomplete_data(self):
        self.assertSetEqual(set(self.mle1.get_parameters()), set(self.cpds))

    def test_estimate_cpd(self):
        self.assertEqual(self.mle1.estimate_cpd('A'), self.cpds[0])
        self.assertEqual(self.mle1.estimate_cpd('B'), self.cpds[1])
        self.assertEqual(self.mle1.estimate_cpd('C'), self.cpds[2])

    def test_state_names1(self):
        m = BayesianModel([('A', 'B')])
        d = pd.DataFrame(data={'A': [2, 3, 8, 8, 8], 'B': ['X', 'O', 'X', 'O', 'X']})
        cpd_b = TabularCPD('B', 2, [[0, 1, 1.0 / 3], [1, 0, 2.0 / 3]],
                           evidence=['A'], evidence_card=[3])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('B'), cpd_b)

    def test_state_names2(self):
        m = BayesianModel([('Light?', 'Color'), ('Fruit', 'Color')])
        d = pd.DataFrame(data={'Fruit': ['Apple', 'Apple', 'Apple', 'Banana', 'Banana'],
                               'Light?': [True,   True,   False,   False,    True],
                               'Color': ['red',   'green', 'black', 'black',  'yellow']})
        color_cpd = TabularCPD('Color', 4, [[1, 0, 1, 0], [0, 0.5, 0, 0],
                                            [0, 0.5, 0, 0], [0, 0, 0, 1]],
                               evidence=['Fruit', 'Light?'], evidence_card=[2, 2])
        mle2 = MaximumLikelihoodEstimator(m, d)
        self.assertEqual(mle2.estimate_cpd('Color'), color_cpd)

    def test_class_init(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                         state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]})
        self.assertSetEqual(set(mle.get_parameters()), set(self.cpds))

    def test_nonoccurring_values(self):
        mle = MaximumLikelihoodEstimator(self.m1, self.d1,
                                         state_names={'A': [0, 1, 23], 'B': [0, 1], 'C': [0, 42, 1], 1: [2]})
        cpds = [TabularCPD('A', 3, [[2.0/3], [1.0/3], [0]]),
                TabularCPD('B', 2, [[2.0/3], [1.0/3]]),
                TabularCPD('C', 3, [[0.0, 0.0, 1.0, 1.0/3, 1.0/3, 1.0/3],
                                    [1.0, 1.0, 0.0, 1.0/3, 1.0/3, 1.0/3],
                                    [0.0, 0.0, 0.0, 1.0/3, 1.0/3, 1.0/3]],
                           evidence=['A', 'B'], evidence_card=[3, 2])]
        self.assertSetEqual(set(mle.get_parameters()), set(cpds))

    def test_missing_data(self):
        e1 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=False)
        cpds1 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[2./3], [1./3]]),
                     TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds1, set(e1.get_parameters()))

        e2 = MaximumLikelihoodEstimator(self.m1, self.d2, state_names={'C': [0, 1]}, complete_samples_only=True)
        cpds2 = set([TabularCPD('A', 2, [[0.5], [0.5]]),
                     TabularCPD('B', 2, [[0.5], [0.5]]),
                     TabularCPD('C', 2, [[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 0.5, 0.5]],
                                evidence=['A', 'B'], evidence_card=[2, 2])])
        self.assertSetEqual(cpds2, set(e2.get_parameters()))

    def tearDown(self):
        del self.m1
        del self.d1
        del self.d2