class TimeSampling: timeout = 600.0 def setup(self): self.model = get_example_model('alarm') self.s = BayesianModelSampling(self.model) def time_forward_sample(self): self.model.simulate(n_samples=int(1e4), show_progress=False) def time_rejection_sample(self): self.model.simulate(n_samples=int(1e4), evidence={ "HISTORY": "TRUE", "HR": "NORMAL" }, show_progress=False) def time_likelihood_sample(self): self.s.likelihood_weighted_sample(evidence=[("HISTORY", "TRUE"), ("HR", "NORMAL")], size=int(1e4)) def time_gibbs_sampling(self): gibbs_samples = GibbsSampling(model=self.model) gibbs_sampling.sample(size=int(1e4))
def bayesian_net(): musicianship_model = BayesianModel([('Difficulty', 'Rating'), ('Musicianship', 'Rating'), ('Musicianship', 'Exam'), ('Rating', 'Letter')]) cpd_diff = TabularCPD(variable='Difficulty', variable_card=2, values=[[0.6], [0.4]]) #0->Low, 1->High cpd_music = TabularCPD(variable='Musicianship', variable_card=2, values=[[0.7], [0.3]]) #0->Weak 1->Strong cpd_rating = TabularCPD(variable='Rating', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['Difficulty', 'Musicianship'], evidence_card=[2, 2]) #0->* 1->** 2-->*** cpd_exam = TabularCPD(variable='Exam', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['Musicianship'], evidence_card=[2]) #0-->Low 1-->High cpd_letter = TabularCPD(variable='Letter', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['Rating'], evidence_card=[3]) #0-->Weak 1-->Strong musicianship_model.add_cpds(cpd_diff, cpd_music, cpd_rating, cpd_exam, cpd_letter) musicianship_model.check_model() infer = SimpleInference(musicianship_model) # query without normalization print('------------------------') print(' EXACT INFERENCE') print('------------------------') print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NOT NORMALIZED' ) print('--------------------') print( infer.query(['Letter'], evidence={('Difficulty', 0), ('Musicianship', 1), ('Rating', 1), ('Exam', 1)})) print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NORMALIZED' ) print('--------------------') infer = VariableElimination(musicianship_model) # query normalized print( infer.query(['Letter'], evidence={ 'Difficulty': 0, 'Musicianship': 1, 'Rating': 1, 'Exam': 1 })['Letter']) print('--------------------') print(' QUERY Letter with no evidence') print('--------------------') print(infer.query(['Letter'])['Letter']) print('--------------------') print(' QUERY Letter with evidence Musicianship: 0 NORMALIZED') print('--------------------') print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter']) sampling = BayesianModelSampling(musicianship_model) data = sampling.likelihood_weighted_sample(evidence={}, size=2000, return_type='dataframe') musicianship_model_bis = BayesianModel([('Difficulty', 'Rating'), ('Musicianship', 'Rating'), ('Rating', 'Letter'), ('Musicianship', 'Exam')]) musicianship_model_bis.fit(data, estimator=BayesianEstimator) musicianship_model_bis.check_model() infer = VariableElimination(musicianship_model_bis) # query normalized for cpd in musicianship_model_bis.get_cpds(): print("CPD of {variable}:".format(variable=cpd.variable)) print(cpd) print('------------------------') print(' APPROXIMATE INFERENCE') print('------------------------') print('--------------------') print( ' QUERY Letter with evidence Difficulty: 0, Musicianship: 1, Rating: 1, Exam:1 NORMALIZED' ) print('--------------------') print( infer.query(['Letter'], evidence={ 'Difficulty': 0, 'Musicianship': 1, 'Rating': 1, 'Exam': 1 })['Letter']) print('--------------------') print(' QUERY Letter with no evidence') print('--------------------') print(infer.query(['Letter'])['Letter']) print('--------------------') print(' QUERY Letter with evidence Musicianship: 0 NORMALIZED') print('--------------------') print(infer.query(['Letter'], evidence={'Musicianship': 0})['Letter'])
model.fit(trainData, estimator=MaximumLikelihoodEstimator) #for cpd in model.get_cpds(): # print("CPD of {variable}:".format(variable=cpd.variable)) # print(cpd) model_sample = BayesianModelSampling(model) pickle.dump(model_sample, open('results/sampler.p', 'wb')) # open the nhts sample and add the inferred resType requirements nhtsSample = pd.read_csv('results/nhtsSample.csv') resType = [] for ind, row in nhtsSample.iterrows(): evidence = [ State('IncomeQ', min(row['hh_income'] - 1, 10)), State('HhSize', min(row['hh_size'] - 1, 5)) ] sample = model_sample.likelihood_weighted_sample(evidence=evidence, size=1) resType.extend([int(sample['Bedrooms']) * 3 + int(sample['RentQ'])]) nhtsSample['resType'] = resType os.chdir('..') nhtsSample[nhtsSample['occupation_type'] == 1].sample( n=50, replace=True).to_csv('ABM/includes/pop_occat_1.csv', index=False) nhtsSample[nhtsSample['occupation_type'] == 2].sample( n=50, replace=True).to_csv('ABM/includes/pop_occat_2.csv', index=False) nhtsSample[nhtsSample['occupation_type'] == 3].sample( n=50, replace=True).to_csv('ABM/includes/pop_occat_3.csv', index=False) nhtsSample[nhtsSample['occupation_type'] == 4].sample( n=50, replace=True).to_csv('ABM/includes/pop_occat_4.csv', index=False) nhtsSample[nhtsSample['occupation_type'] == 5].sample( n=50, replace=True).to_csv('ABM/includes/pop_occat_5.csv', index=False)
class TestBayesianModelSampling(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel() def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model) def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample( [State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({1})) self.assertTrue(set(sample.J).issubset({1})) self.assertTrue(set(sample.R).issubset({1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True) def test_rejection_sample_less_arg(self, forward_sample): sample = self.sampling_inference.rejection_sample(size=5) forward_sample.assert_called_once_with(self.sampling_inference, 5) self.assertEqual(sample, forward_sample.return_value) def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample( [State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def tearDown(self): del self.sampling_inference del self.bayesian_model del self.markov_model
class TestBayesianModelSampling(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.sampling_inference = BayesianModelSampling(self.bayesian_model) self.markov_model = MarkovModel() def test_init(self): with self.assertRaises(TypeError): BayesianModelSampling(self.markov_model) def test_forward_sample(self): sample = self.sampling_inference.forward_sample(25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def test_rejection_sample_basic(self): sample = self.sampling_inference.rejection_sample([State('A', 1), State('J', 1), State('R', 1)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 6) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertTrue(set(sample.A).issubset({1})) self.assertTrue(set(sample.J).issubset({1})) self.assertTrue(set(sample.R).issubset({1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) @patch("pgmpy.sampling.BayesianModelSampling.forward_sample", autospec=True) def test_rejection_sample_less_arg(self, forward_sample): sample = self.sampling_inference.rejection_sample(size=5) forward_sample.assert_called_once_with(self.sampling_inference, 5) self.assertEqual(sample, forward_sample.return_value) def test_likelihood_weighted_sample(self): sample = self.sampling_inference.likelihood_weighted_sample([State('A', 0), State('J', 1), State('R', 0)], 25) self.assertEquals(len(sample), 25) self.assertEquals(len(sample.columns), 7) self.assertIn('A', sample.columns) self.assertIn('J', sample.columns) self.assertIn('R', sample.columns) self.assertIn('Q', sample.columns) self.assertIn('G', sample.columns) self.assertIn('L', sample.columns) self.assertIn('_weight', sample.columns) self.assertTrue(set(sample.A).issubset({0, 1})) self.assertTrue(set(sample.J).issubset({0, 1})) self.assertTrue(set(sample.R).issubset({0, 1})) self.assertTrue(set(sample.Q).issubset({0, 1})) self.assertTrue(set(sample.G).issubset({0, 1})) self.assertTrue(set(sample.L).issubset({0, 1})) def tearDown(self): del self.sampling_inference del self.bayesian_model del self.markov_model