def load_total_cpds(): # All the nodes in the graph (157 nodes) gnodes = total_G.nodes data = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, len(gnodes))), columns=gnodes) # Option 1 of fitting cpds estimator = BayesianEstimator(total_G, data) p = estimator.get_parameters(prior_type='BDeu', equivalent_sample_size=5) for i, cpd in enumerate(p): total_G.add_cpds(cpd) # Option 2 of fitting cpds for i in range(1, num_sub_symptoms + 1): cpd_sub = estimator.estimate_cpd('sub_sympt_' + str(i), prior_type="BDeu") total_G.add_cpds(cpd_sub) if i <= num_symptoms: cpd_symp = estimator.estimate_cpd('sympt_' + str(i), prior_type="BDeu") total_G.add_cpds(cpd_symp) # this is the time cruncher. for i in range(1, num_conditions + 1): cpd_cond = estimator.estimate_cpd('cond_' + str(i), prior_type="BDeu") total_G.add_cpds(cpd_cond)
class TimeBayesianEstimator: timeout = 1200 def setup(self): self.alarm = get_example_model('alarm') self.alarm_model = BayesianNetwork(self.alarm.edges()) self.alarm_est = BayesianEstimator( self.alarm_model, self.alarm.simulate(int(1e4), show_progress=False)) self.munin = get_example_model('munin1') self.munin_model = BayesianNetwork(self.munin.edges()) self.munin_est = BayesianEstimator( self.munin_model, self.munin.simulate(int(1e4), show_progress=False)) def time_alarm_bayesian_estimator(self): self.alarm_est.get_parameters() def time_munin_bayesian_estimator(self): self.munin_est.get_parameters()
mydata = np.genfromtxt( r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\activities+time_ordered_withoutdatetime.csv', delimiter=",") #pd.read_csv(r'E:\Lessons_tutorials\Behavioural user profile articles\Datasets\7 twor.2009\twor.2009\converted\pgmpy\data.csv') #print(mydata) data = pd.DataFrame(mydata, columns=feature_names) #['X', 'Y']) print(data) list_of_scoring_methods = [ BicScore(data), #BdeuScore(data), #K2Score(data) ] for scoreMethod in list_of_scoring_methods: start_time = time.time() hc = HillClimbSearch(data, scoreMethod) best_model = hc.estimate() print(hc.scoring_method) print(best_model.edges()) end_time = time.time() print("execution time in seconds:") print(end_time - start_time) estimator = BayesianEstimator(best_model, data) print(estimator.get_parameters(prior_type='K2')) #, equivalent_sample_size=5) #casas7_model = BayesianModel() #casas7_model.fit(data, estimator=BayesianEstimator)#MaximumLikelihoodEstimator) #print(casas7_model.get_cpds()) #casas7_model.get_n
best_model = est.estimate() print("Structure found!") print("Best Model") print(best_model.edges()) ''' Parameters Estimation and CPD tables ''' from pgmpy.models import BayesianModel from pgmpy.estimators import BayesianEstimator model = BayesianModel(best_model.edges()) data = pd.read_csv("final_training_set.csv") estimator = BayesianEstimator(model, data) parameters = estimator.get_parameters(prior_type='BDeu', equivalent_sample_size=10) for cpd in parameters: model.add_cpds(cpd) ''' Inference and Validation ''' from pgmpy.inference import VariableElimination import csv f = open("validation_data.csv") reader = csv.reader(f) inference = VariableElimination(model) valid = 0 invalid = 0
path = r'C:\Users\Javier\Documents\MEGA\Universitattt\Master\Thesis\CDS_data\toy_data' file1 = r'epsilon_csv.csv' data = pd.read_csv(os.path.join(path,file1), index_col = 0, skiprows = [19 + i for i in range(2)]).transpose() print('Using BicScore Method') est = HillClimbSearch(data, scoring_method = BicScore(data)) best_model = est.estimate() # MLE_estimator = MaximumLikelihoodEstimator(best_model, data) # MLE_parameters = MLE_estimator.get_parameters() bay_estimator = BayesianEstimator(best_model, data) bay_parameters = bay_estimator.get_parameters() print('Edges:') print(best_model.edges()) # print('MLE Parameters') # for m in MLE_parameters: # print(m) print('Bayesian Parameters') for b in bay_parameters: print(b)
class TestBayesianEstimator(unittest.TestCase): def setUp(self): self.m1 = BayesianModel([('A', 'C'), ('B', 'C')]) self.d1 = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) self.d2 = pd.DataFrame(data={'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2], 'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'], 'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]}) self.est1 = BayesianEstimator(self.m1, self.d1) self.est2 = BayesianEstimator(self.m1, self.d1, state_names={'A': [0, 1, 2], 'B': [0, 1], 'C': [0, 1, 23]}) self.est3 = BayesianEstimator(self.m1, self.d2) def test_estimate_cpd_dirichlet(self): cpd_A = self.est1.estimate_cpd('A', prior_type="dirichlet", pseudo_counts=[0, 1]) self.assertEqual(cpd_A, TabularCPD('A', 2, [[0.5], [0.5]])) cpd_B = self.est1.estimate_cpd('B', prior_type="dirichlet", pseudo_counts=[9, 3]) self.assertEqual(cpd_B, TabularCPD('B', 2, [[11.0/15], [4.0/15]])) cpd_C = self.est1.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[0.4, 0.6]) self.assertEqual(cpd_C, TabularCPD('C', 2, [[0.2, 0.2, 0.7, 0.4], [0.8, 0.8, 0.3, 0.6]], evidence=['A', 'B'], evidence_card=[2, 2])) def test_estimate_cpd_improper_prior(self): cpd_C = self.est1.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[0, 0]) cpd_C_correct = (TabularCPD('C', 2, [[0.0, 0.0, 1.0, np.NaN], [1.0, 1.0, 0.0, np.NaN]], evidence=['A', 'B'], evidence_card=[2, 2], state_names={'A': [0, 1], 'B': [0, 1], 'C': [0, 1]})) # manual comparison because np.NaN != np.NaN self.assertTrue(((cpd_C.values == cpd_C_correct.values) | np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values)).all()) def test_estimate_cpd_shortcuts(self): cpd_C1 = self.est2.estimate_cpd('C', prior_type='BDeu', equivalent_sample_size=9) cpd_C1_correct = TabularCPD('C', 3, [[0.2, 0.2, 0.6, 1./3, 1./3, 1./3], [0.6, 0.6, 0.2, 1./3, 1./3, 1./3], [0.2, 0.2, 0.2, 1./3, 1./3, 1./3]], evidence=['A', 'B'], evidence_card=[3, 2]) self.assertEqual(cpd_C1, cpd_C1_correct) cpd_C2 = self.est3.estimate_cpd('C', prior_type='K2') cpd_C2_correct = TabularCPD('C', 2, [[0.5, 0.6, 1./3, 2./3, 0.75, 2./3], [0.5, 0.4, 2./3, 1./3, 0.25, 1./3]], evidence=['A', 'B'], evidence_card=[3, 2]) self.assertEqual(cpd_C2, cpd_C2_correct) def test_get_parameters(self): cpds = set([self.est3.estimate_cpd('A'), self.est3.estimate_cpd('B'), self.est3.estimate_cpd('C')]) self.assertSetEqual(set(self.est3.get_parameters()), cpds) def test_get_parameters2(self): pseudo_counts = {'A': [1, 2, 3], 'B': [4, 5], 'C': [6, 7]} cpds = set([self.est3.estimate_cpd('A', prior_type="dirichlet", pseudo_counts=pseudo_counts['A']), self.est3.estimate_cpd('B', prior_type="dirichlet", pseudo_counts=pseudo_counts['B']), self.est3.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=pseudo_counts['C'])]) self.assertSetEqual(set(self.est3.get_parameters(prior_type="dirichlet", pseudo_counts=pseudo_counts)), cpds) def tearDown(self): del self.m1 del self.d1 del self.d2 del self.est1 del self.est2
] prior_type = ["dirichlet", "BDeu", "K2"] for i in range(1, 3): f = open( r'C:\Users\Adele\Desktop\PhD thesis tests\parameterLearning_' + prior_type[i], 'w') print(prior_type[i]) model = BayesianModel(structures[i]) estimator = BayesianEstimator(model, data) end_time = time.time() print("parameter learning in seconds:{}".format(end_time - start_time)) all_cpds = estimator.get_parameters( prior_type=prior_type[i]) #BDeu")#"dirichlet") for c in all_cpds: print(c) f.write(c.tostring()) # python will convert \n to os.linesep f.close() #for my_node in feature_names: # the_cpd = estimator.estimate_cpd(my_node, prior_type="dirichlet") # print(the_cpd) #casas7_model = BayesianModel() #casas7_model.fit(data, estimator=BayesianEstimator)#MaximumLikelihoodEstimator) #print(casas7_model.get_cpds()) #casas7_model.get_n
print(mle.estimate_cpd(node='CVP')) # Estimating CPDs for all the nodes in the model print(mle.get_parameters()[:10]) # Show just the first 10 CPDs in the output # Verifying that the learned parameters are almost equal. import numpy as np print(np.allclose(alarm_model.get_cpds('FIO2').values, mle.estimate_cpd('FIO2').values, atol=0.01)) # Fitting the using Bayesian Estimator from pgmpy.estimators import BayesianEstimator best = BayesianEstimator(model=model_struct, data=samples) print(best.estimate_cpd(node='FIO2', prior_type="BDeu", equivalent_sample_size=1000)) # Uniform pseudo count for each state. Can also accept an array of the size of CPD. print(best.estimate_cpd(node='CVP', prior_type="dirichlet", pseudo_counts=100)) # Learning CPDs for all the nodes in the model. For learning all parameters with BDeU prior, a dict of # pseudo_counts need to be provided print(best.get_parameters(prior_type="BDeu", equivalent_sample_size=1000)[:10]) # Shortcut for learning all the parameters and adding the CPDs to the model. model_struct = BayesianModel(ebunch=alarm_model.edges()) model_struct.fit(data=samples, estimator=MaximumLikelihoodEstimator) print(model_struct.get_cpds('FIO2')) model_struct = BayesianModel(ebunch=alarm_model.edges()) model_struct.fit(data=samples, estimator=BayesianEstimator, prior_type='BDeu', equivalent_sample_size=1000) print(model_struct.get_cpds('FIO2'))
class TestBayesianEstimator(unittest.TestCase): def setUp(self): self.m1 = BayesianModel([('A', 'C'), ('B', 'C')]) self.d1 = pd.DataFrame(data={ 'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0] }) self.d2 = pd.DataFrame( data={ 'A': [0, 0, 1, 0, 2, 0, 2, 1, 0, 2], 'B': ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'], 'C': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0] }) self.est1 = BayesianEstimator(self.m1, self.d1) self.est2 = BayesianEstimator(self.m1, self.d1, state_names={ 'A': [0, 1, 2], 'B': [0, 1], 'C': [0, 1, 23] }) self.est3 = BayesianEstimator(self.m1, self.d2) def test_estimate_cpd_dirichlet(self): cpd_A = self.est1.estimate_cpd('A', prior_type="dirichlet", pseudo_counts=[[0], [1]]) self.assertEqual(cpd_A, TabularCPD('A', 2, [[0.5], [0.5]])) cpd_B = self.est1.estimate_cpd('B', prior_type="dirichlet", pseudo_counts=[[9], [3]]) self.assertEqual(cpd_B, TabularCPD('B', 2, [[11.0 / 15], [4.0 / 15]])) cpd_C = self.est1.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[[0.4, 0.4, 0.4, 0.4], [0.6, 0.6, 0.6, 0.6]]) self.assertEqual( cpd_C, TabularCPD('C', 2, [[0.2, 0.2, 0.7, 0.4], [0.8, 0.8, 0.3, 0.6]], evidence=['A', 'B'], evidence_card=[2, 2])) def test_estimate_cpd_improper_prior(self): cpd_C = self.est1.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[[0, 0, 0, 0], [0, 0, 0, 0]]) cpd_C_correct = (TabularCPD( 'C', 2, [[0.0, 0.0, 1.0, np.NaN], [1.0, 1.0, 0.0, np.NaN]], evidence=['A', 'B'], evidence_card=[2, 2], state_names={ 'A': [0, 1], 'B': [0, 1], 'C': [0, 1] })) # manual comparison because np.NaN != np.NaN self.assertTrue( ((cpd_C.values == cpd_C_correct.values) | np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values)).all()) def test_estimate_cpd_shortcuts(self): cpd_C1 = self.est2.estimate_cpd('C', prior_type='BDeu', equivalent_sample_size=9) cpd_C1_correct = TabularCPD('C', 3, [[0.2, 0.2, 0.6, 1. / 3, 1. / 3, 1. / 3], [0.6, 0.6, 0.2, 1. / 3, 1. / 3, 1. / 3], [0.2, 0.2, 0.2, 1. / 3, 1. / 3, 1. / 3]], evidence=['A', 'B'], evidence_card=[3, 2]) self.assertEqual(cpd_C1, cpd_C1_correct) cpd_C2 = self.est3.estimate_cpd('C', prior_type='K2') cpd_C2_correct = TabularCPD('C', 2, [[0.5, 0.6, 1. / 3, 2. / 3, 0.75, 2. / 3], [0.5, 0.4, 2. / 3, 1. / 3, 0.25, 1. / 3]], evidence=['A', 'B'], evidence_card=[3, 2]) self.assertEqual(cpd_C2, cpd_C2_correct) def test_get_parameters(self): cpds = set([ self.est3.estimate_cpd('A'), self.est3.estimate_cpd('B'), self.est3.estimate_cpd('C') ]) self.assertSetEqual(set(self.est3.get_parameters()), cpds) def test_get_parameters2(self): pseudo_counts = { 'A': [[1], [2], [3]], 'B': [[4], [5]], 'C': [[6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7]] } cpds = set([ self.est3.estimate_cpd('A', prior_type="dirichlet", pseudo_counts=pseudo_counts['A']), self.est3.estimate_cpd('B', prior_type="dirichlet", pseudo_counts=pseudo_counts['B']), self.est3.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=pseudo_counts['C']) ]) self.assertSetEqual( set( self.est3.get_parameters(prior_type="dirichlet", pseudo_counts=pseudo_counts)), cpds) def tearDown(self): del self.m1 del self.d1 del self.d2 del self.est1 del self.est2
class TestBayesianEstimator(unittest.TestCase): def setUp(self): self.m1 = BayesianModel([("A", "C"), ("B", "C")]) self.d1 = pd.DataFrame(data={"A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0]}) self.d2 = pd.DataFrame( data={ "A": [0, 0, 1, 0, 2, 0, 2, 1, 0, 2], "B": ["X", "Y", "X", "Y", "X", "Y", "X", "Y", "X", "Y"], "C": [1, 1, 1, 0, 0, 0, 0, 0, 0, 0], } ) self.est1 = BayesianEstimator(self.m1, self.d1) self.est2 = BayesianEstimator( self.m1, self.d1, state_names={"A": [0, 1, 2], "B": [0, 1], "C": [0, 1, 23]} ) self.est3 = BayesianEstimator(self.m1, self.d2) def test_estimate_cpd_dirichlet(self): cpd_A = self.est1.estimate_cpd( "A", prior_type="dirichlet", pseudo_counts=[[0], [1]] ) self.assertEqual(cpd_A, TabularCPD("A", 2, [[0.5], [0.5]])) cpd_B = self.est1.estimate_cpd( "B", prior_type="dirichlet", pseudo_counts=[[9], [3]] ) self.assertEqual(cpd_B, TabularCPD("B", 2, [[11.0 / 15], [4.0 / 15]])) cpd_C = self.est1.estimate_cpd( "C", prior_type="dirichlet", pseudo_counts=[[0.4, 0.4, 0.4, 0.4], [0.6, 0.6, 0.6, 0.6]], ) self.assertEqual( cpd_C, TabularCPD( "C", 2, [[0.2, 0.2, 0.7, 0.4], [0.8, 0.8, 0.3, 0.6]], evidence=["A", "B"], evidence_card=[2, 2], ), ) def test_estimate_cpd_improper_prior(self): cpd_C = self.est1.estimate_cpd( "C", prior_type="dirichlet", pseudo_counts=[[0, 0, 0, 0], [0, 0, 0, 0]] ) cpd_C_correct = TabularCPD( "C", 2, [[0.0, 0.0, 1.0, np.NaN], [1.0, 1.0, 0.0, np.NaN]], evidence=["A", "B"], evidence_card=[2, 2], state_names={"A": [0, 1], "B": [0, 1], "C": [0, 1]}, ) # manual comparison because np.NaN != np.NaN self.assertTrue( ( (cpd_C.values == cpd_C_correct.values) | np.isnan(cpd_C.values) & np.isnan(cpd_C_correct.values) ).all() ) def test_estimate_cpd_shortcuts(self): cpd_C1 = self.est2.estimate_cpd( "C", prior_type="BDeu", equivalent_sample_size=9 ) cpd_C1_correct = TabularCPD( "C", 3, [ [0.2, 0.2, 0.6, 1.0 / 3, 1.0 / 3, 1.0 / 3], [0.6, 0.6, 0.2, 1.0 / 3, 1.0 / 3, 1.0 / 3], [0.2, 0.2, 0.2, 1.0 / 3, 1.0 / 3, 1.0 / 3], ], evidence=["A", "B"], evidence_card=[3, 2], ) self.assertEqual(cpd_C1, cpd_C1_correct) cpd_C2 = self.est3.estimate_cpd("C", prior_type="K2") cpd_C2_correct = TabularCPD( "C", 2, [ [0.5, 0.6, 1.0 / 3, 2.0 / 3, 0.75, 2.0 / 3], [0.5, 0.4, 2.0 / 3, 1.0 / 3, 0.25, 1.0 / 3], ], evidence=["A", "B"], evidence_card=[3, 2], ) self.assertEqual(cpd_C2, cpd_C2_correct) def test_get_parameters(self): cpds = set( [ self.est3.estimate_cpd("A"), self.est3.estimate_cpd("B"), self.est3.estimate_cpd("C"), ] ) self.assertSetEqual(set(self.est3.get_parameters()), cpds) def test_get_parameters2(self): pseudo_counts = { "A": [[1], [2], [3]], "B": [[4], [5]], "C": [[6, 6, 6, 6, 6, 6], [7, 7, 7, 7, 7, 7]], } cpds = set( [ self.est3.estimate_cpd( "A", prior_type="dirichlet", pseudo_counts=pseudo_counts["A"] ), self.est3.estimate_cpd( "B", prior_type="dirichlet", pseudo_counts=pseudo_counts["B"] ), self.est3.estimate_cpd( "C", prior_type="dirichlet", pseudo_counts=pseudo_counts["C"] ), ] ) self.assertSetEqual( set( self.est3.get_parameters( prior_type="dirichlet", pseudo_counts=pseudo_counts ) ), cpds, ) def tearDown(self): del self.m1 del self.d1 del self.d2 del self.est1 del self.est2