Esempio n. 1
0
class TimeSampling:
    timeout = 600.0

    def setup(self):
        self.model = get_example_model('alarm')
        self.s = BayesianModelSampling(self.model)

    def time_forward_sample(self):
        self.model.simulate(n_samples=int(1e4), show_progress=False)

    def time_rejection_sample(self):
        self.model.simulate(n_samples=int(1e4),
                            evidence={
                                "HISTORY": "TRUE",
                                "HR": "NORMAL"
                            },
                            show_progress=False)

    def time_likelihood_sample(self):
        self.s.likelihood_weighted_sample(evidence=[("HISTORY", "TRUE"),
                                                    ("HR", "NORMAL")],
                                          size=int(1e4))

    def time_gibbs_sampling(self):
        gibbs_samples = GibbsSampling(model=self.model)
        gibbs_sampling.sample(size=int(1e4))
def bayesian_net():
    musicianship_model = BayesianModel([('Difficulty', 'Rating'),
                                        ('Musicianship', 'Rating'),
                                        ('Musicianship', 'Exam'),
                                        ('Rating', 'Letter')])
    cpd_diff = TabularCPD(variable='Difficulty',
                          variable_card=2,
                          values=[[0.6], [0.4]])  #0->Low, 1->High
    cpd_music = TabularCPD(variable='Musicianship',
                           variable_card=2,
                           values=[[0.7], [0.3]])  #0->Weak 1->Strong
    cpd_rating = TabularCPD(variable='Rating',
                            variable_card=3,
                            values=[[0.3, 0.05, 0.9, 0.5],
                                    [0.4, 0.25, 0.08, 0.3],
                                    [0.3, 0.7, 0.02, 0.2]],
                            evidence=['Difficulty', 'Musicianship'],
                            evidence_card=[2, 2])  #0->* 1->** 2-->***
    cpd_exam = TabularCPD(variable='Exam',
                          variable_card=2,
                          values=[[0.95, 0.2], [0.05, 0.8]],
                          evidence=['Musicianship'],
                          evidence_card=[2])  #0-->Low 1-->High

    cpd_letter = TabularCPD(variable='Letter',
                            variable_card=2,
                            values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                            evidence=['Rating'],
                            evidence_card=[3])  #0-->Weak 1-->Strong

    musicianship_model.add_cpds(cpd_diff, cpd_music, cpd_rating, cpd_exam,
                                cpd_letter)
    musicianship_model.check_model()

    infer = SimpleInference(musicianship_model)  # query without normalization

    print('------------------------')
    print(' EXACT INFERENCE')
    print('------------------------')
    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NOT NORMALIZED'
    )
    print('--------------------')
    print(
        infer.query(['Letter'],
                    evidence={('Difficulty', 0), ('Musicianship', 1),
                              ('Rating', 1), ('Exam', 1)}))
    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NORMALIZED'
    )
    print('--------------------')
    infer = VariableElimination(musicianship_model)  # query normalized
    print(
        infer.query(['Letter'],
                    evidence={
                        'Difficulty': 0,
                        'Musicianship': 1,
                        'Rating': 1,
                        'Exam': 1
                    })['Letter'])

    print('--------------------')
    print(' QUERY Letter with no evidence')
    print('--------------------')
    print(infer.query(['Letter'])['Letter'])
    print('--------------------')
    print(' QUERY Letter with evidence Musicianship: 0  NORMALIZED')
    print('--------------------')
    print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])

    sampling = BayesianModelSampling(musicianship_model)
    data = sampling.likelihood_weighted_sample(evidence={},
                                               size=2000,
                                               return_type='dataframe')

    musicianship_model_bis = BayesianModel([('Difficulty', 'Rating'),
                                            ('Musicianship', 'Rating'),
                                            ('Rating', 'Letter'),
                                            ('Musicianship', 'Exam')])
    musicianship_model_bis.fit(data, estimator=BayesianEstimator)
    musicianship_model_bis.check_model()
    infer = VariableElimination(musicianship_model_bis)  # query normalized
    for cpd in musicianship_model_bis.get_cpds():
        print("CPD of {variable}:".format(variable=cpd.variable))
        print(cpd)

    print('------------------------')
    print(' APPROXIMATE INFERENCE')
    print('------------------------')

    print('--------------------')
    print(
        ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1  NORMALIZED'
    )
    print('--------------------')

    print(
        infer.query(['Letter'],
                    evidence={
                        'Difficulty': 0,
                        'Musicianship': 1,
                        'Rating': 1,
                        'Exam': 1
                    })['Letter'])

    print('--------------------')
    print(' QUERY Letter with no evidence')
    print('--------------------')
    print(infer.query(['Letter'])['Letter'])
    print('--------------------')
    print(' QUERY Letter with evidence Musicianship: 0  NORMALIZED')
    print('--------------------')
    print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])
Esempio n. 3
0
model.fit(trainData, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
#    print("CPD of {variable}:".format(variable=cpd.variable))
#    print(cpd)

model_sample = BayesianModelSampling(model)
pickle.dump(model_sample, open('results/sampler.p', 'wb'))

# open the nhts sample and add the inferred resType requirements
nhtsSample = pd.read_csv('results/nhtsSample.csv')
resType = []
for ind, row in nhtsSample.iterrows():
    evidence = [
        State('IncomeQ', min(row['hh_income'] - 1, 10)),
        State('HhSize', min(row['hh_size'] - 1, 5))
    ]
    sample = model_sample.likelihood_weighted_sample(evidence=evidence, size=1)
    resType.extend([int(sample['Bedrooms']) * 3 + int(sample['RentQ'])])
nhtsSample['resType'] = resType
os.chdir('..')
nhtsSample[nhtsSample['occupation_type'] == 1].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_1.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 2].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_2.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 3].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_3.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 4].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_4.csv', index=False)
nhtsSample[nhtsSample['occupation_type'] == 5].sample(
    n=50, replace=True).to_csv('ABM/includes/pop_occat_5.csv', index=False)
Esempio n. 4
0
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample(
            [State('A', 1), State('J', 1),
             State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({1}))
        self.assertTrue(set(sample.J).issubset({1}))
        self.assertTrue(set(sample.R).issubset({1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    @patch("pgmpy.sampling.BayesianModelSampling.forward_sample",
           autospec=True)
    def test_rejection_sample_less_arg(self, forward_sample):
        sample = self.sampling_inference.rejection_sample(size=5)
        forward_sample.assert_called_once_with(self.sampling_inference, 5)
        self.assertEqual(sample, forward_sample.return_value)

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample(
            [State('A', 0), State('J', 1),
             State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model
class TestBayesianModelSampling(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)
        self.sampling_inference = BayesianModelSampling(self.bayesian_model)
        self.markov_model = MarkovModel()

    def test_init(self):
        with self.assertRaises(TypeError):
            BayesianModelSampling(self.markov_model)

    def test_forward_sample(self):
        sample = self.sampling_inference.forward_sample(25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def test_rejection_sample_basic(self):
        sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 6)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertTrue(set(sample.A).issubset({1}))
        self.assertTrue(set(sample.J).issubset({1}))
        self.assertTrue(set(sample.R).issubset({1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True)
    def test_rejection_sample_less_arg(self, forward_sample):
        sample = self.sampling_inference.rejection_sample(size=5)
        forward_sample.assert_called_once_with(self.sampling_inference, 5)
        self.assertEqual(sample, forward_sample.return_value)

    def test_likelihood_weighted_sample(self):
        sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25)
        self.assertEquals(len(sample), 25)
        self.assertEquals(len(sample.columns), 7)
        self.assertIn('A', sample.columns)
        self.assertIn('J', sample.columns)
        self.assertIn('R', sample.columns)
        self.assertIn('Q', sample.columns)
        self.assertIn('G', sample.columns)
        self.assertIn('L', sample.columns)
        self.assertIn('_weight', sample.columns)
        self.assertTrue(set(sample.A).issubset({0, 1}))
        self.assertTrue(set(sample.J).issubset({0, 1}))
        self.assertTrue(set(sample.R).issubset({0, 1}))
        self.assertTrue(set(sample.Q).issubset({0, 1}))
        self.assertTrue(set(sample.G).issubset({0, 1}))
        self.assertTrue(set(sample.L).issubset({0, 1}))

    def tearDown(self):
        del self.sampling_inference
        del self.bayesian_model
        del self.markov_model