def main(): # Defining the network structure model = BayesianModel([('C', 'H'), ('P', 'H')]) # H: host # P: prize # C: contestant # Defining the CPDs: cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]]) cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]]) cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5], [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5], [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]], evidence=['C', 'P'], evidence_card=[3, 3]) # Associating the CPDs with the network structure. model.add_cpds(cpd_c, cpd_p, cpd_h) # Some other methods # model.get_cpds() # check_model check for the model structure and the associated CPD and # returns True if everything is correct otherwise throws an exception # print model.check_model() # Infering the posterior probability infer = VariableElimination(model) posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0}) print(posterior_p['H'])
def predict(self, data): """ Predicts states of all the missing variables. Parameters ---------- data : pandas DataFrame object A DataFrame object with column names same as the variables in the model. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> train_data = values[:800] >>> predict_data = values[800:] >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> predict_data = predict_data.copy() >>> predict_data.drop('E', axis=1, inplace=True) >>> y_pred = model.predict(predict_data) >>> y_pred E 800 0 801 1 802 1 803 1 804 0 ... ... 993 0 994 0 995 1 996 1 997 0 998 0 999 0 """ from pgmpy.inference import VariableElimination if set(data.columns) == set(self.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.columns) - set(self.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(self.nodes()) - set(data.columns) pred_values = defaultdict(list) # Send state_names dict from one of the estimated CPDs to the inference class. model_inference = VariableElimination(self, state_names=self.get_cpds()[0].state_names) for index, data_point in data.iterrows(): states_dict = model_inference.map_query(variables=missing_variables, evidence=data_point.to_dict()) for k, v in states_dict.items(): pred_values[k].append(v) return pd.DataFrame(pred_values, index=data.index)
class TimeVE: def setup(self): values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E']) model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) model.fit(values) self.inference = VariableElimination(model) def time_query(self): self.inference.query(['A', 'B'])
class HailfinderVE: def setup(self): reader = BIFReader('hailfinder.bif') model = reader.get_bayesian_model() self.inference = VariableElimination(model) def time_hailfinder_bound(self): self.inference.query('Boundaries') def time_hailfinder_Wind(self): self.inference.query('WindFieldPln')
def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]) cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]) cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]) cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model)
def bys1_init(self): bysmodel1 = bysmodel([('ED', 'MD'), ('AD', 'MD'), \ ('EU', 'MU'), ('AU', 'MU'), \ ('ER', 'MR'), ('AR', 'MR'), \ ('EL', 'ML'), ('AL', 'ML'), \ ('END', 'PR')]) EU = tcpd(variable='EU', variable_card=2, \ values=[[0.01, 0.99]]) ED = tcpd(variable='ED', variable_card=2, \ values=[[0.01, 0.99]]) EL = tcpd(variable='EL', variable_card=2, \ values=[[0.01, 0.99]]) ER = tcpd(variable='ER', variable_card=2, \ values=[[0.01, 0.99]]) AU = tcpd(variable='AU', variable_card=2, \ values=[[0.01, 0.99]]) AD = tcpd(variable='AD', variable_card=2, \ values=[[0.01, 0.99]]) AL = tcpd(variable='AL', variable_card=2, \ values=[[0.01, 0.99]]) AR = tcpd(variable='AR', variable_card=2, \ values=[[0.01, 0.99]]) MD = tcpd(variable='MD', variable_card=2, \ evidence=['ED', 'AD'], evidence_card=[2, 2],\ values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]]) MU = tcpd(variable='MU', variable_card=2, \ evidence=['EU', 'AU'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]]) ML = tcpd(variable='ML', variable_card=2, \ evidence=['EL', 'AL'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]]) MR = tcpd(variable='MR', variable_card=2, \ evidence=['ER', 'AR'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]]) END = tcpd(variable='END', variable_card=2, \ values=[[0.01, 0.99]]) PR = tcpd(variable='PR', variable_card=2, evidence=['END'], \ evidence_card=[2], values=[[1.0, 0.0], [0.0, 1.0]]) bysmodel1.add_cpds(EU, ED, EL, ER, AU, AD, AL, AR, \ MD, MU, ML, MR, END, PR) self.VEbysmodel1 = VariableElimination(bysmodel1)
def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L'), ('A', 'R'), ('J', 'G')]) factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]).to_factor() factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]).to_factor() factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]).to_factor() factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model)
def setUp(self): self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good']} self.sn1 = {'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night']} self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = VariableElimination(student) self.model2 = VariableElimination(student, state_names=self.sn2)
def Test_Data_Inference_map_n_steps(self, df_test, n_tsteps): """ Perform both map and marignal inference and report values """ df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy() df_inference_results['M_t_orig'] = df_inference_results['M_t'] infer = VariableElimination(self.model) dict_unique_vals = dict( zip(df_test.columns, [df_test[i].unique() for i in df_test.columns])) result_list = ['M_t'] if n_tsteps > 1: result_list = result_list + [ "M_t+{}".format(x) for x in range(1, n_tsteps) ] count = 0 for key, value in df_test.filter( items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items(): index_key = key if check_data_in_evidence(value, dict_unique_vals): #MAP query tic = time.time() map_result = infer.map_query(variables=result_list, evidence=value) toc = time.time() - tic logging.info( "thermostat {} - Elapsed seconds for MAP query {:.2f}". format(self.thermostat.tstat_id, toc)) for n in result_list: tic = time.time() result = infer.query(variables=[n], evidence=value) toc = time.time() if TIME_INFERENCE: print('Elapsed: %s' % (toc - tic)) logging.info( "thermostat {} - Elapsed seconds for query {:.2f}". format(self.thermostat.tstat_id, toc)) df_inference_results.at[ index_key, '{}_0'.format(n)] = result[n].values[0] df_inference_results.at[ index_key, '{}_1'.format(n)] = result[n].values[1] df_inference_results.at[index_key, '{}'.format(n)] = Map_Occ_Values( result[n].values[1]) df_inference_results.at[index_key, '{}_map'.format(n)] = map_result[n] else: for n in result_list: df_inference_results.at[index_key, '{}_0'.format(n)] = np.nan df_inference_results.at[index_key, '{}_1'.format(n)] = np.nan df_inference_results.at[index_key, '{}'.format(n)] = np.nan df_inference_results.at[index_key, '{}_map'.format(n)] = np.nan count += 1 logging.info("thermostat {} - Iterations of test {}".format( self.thermostat.tstat_id, count)) return df_inference_results
values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Cancer'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2, values=[[0.65, 0.3], [0.35, 0.7]], evidence=['Cancer'], evidence_card=[2]) # Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # Checking if the cpds are valid for the model. cancer_model.check_model() cancer_infer = VariableElimination(cancer_model) print('All local independecies are as follows') cancer_model.get_independencies() print('Displaying CPDs') print(cancer_model.get_cpds('Pollution')) print(cancer_model.get_cpds('Smoker')) print(cancer_model.get_cpds('Cancer')) print(cancer_model.get_cpds('Xray')) print(cancer_model.get_cpds('Dyspnoea')) print('\n Probablity of Cancer given smoker') q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1}) print(q) print('\n Probablity of Cancer given smoker')
import pandas as pd data = pd.read_csv("datasetheart.csv",names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','RESULT']) print(data.head(5)) print(data.tail(5)) from pgmpy.models import BayesianModel from pgmpy.estimators import MaximumLikelihoodEstimator model = BayesianModel([("A","B"),("B","C"),("C","D"),("D","RESULT")]) model.fit(data,estimator=MaximumLikelihoodEstimator) from pgmpy.inference import VariableElimination infer = VariableElimination(model) q = infer.query(variables=['RESULT'],evidence={"C":2}) print(q)
heartDisease = pd.read_csv('heart.csv', names= attributes) heartDisease = heartDisease.replace('?', np.nan) # Handling missing values # View the data print('Few examples from the dataset are given below- ') print(heartDisease.head()) print('\nAttributes and data types-') print(heartDisease.dtypes) # Model a Bayesian Network model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'), ('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease', 'chol')]) # Learning CPD's (Conditional Probability Distribution) using Maximum Likelihood Estimators print('\nLearning CPDs using Maximum Likelihood Estimators...') model.fit(heartDisease, estimator=MaximumLikelihoodEstimator) #Deducing with Bayesian Network print('\nInferencing with Bayesian Network:') HeartDisease_infer = VariableElimination(model) print('\n1.Probability of HeartDisease given Age = 20') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 20}) print(q) # print('\n2. Probability of HeartDisease given chol (Cholestoral) = 100') # q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'sex': 0, 'chol': 100}) # print(q)
('cost','no_of_people'), ('location','no_of_people')]) cpd_location = TabularCPD('location', 2, [[0.6,0.4]]) cpd_quality = TabularCPD('quality', 3, [[0.3,0.5,0.2]]) cpd_cost = TabularCPD('cost', 2, [[0.8,0.6,0.1,0.6,0.6,0.05], # 2 X 2 X 3 = 12 --> 6 each row X 2 [0.2,0.1,0.9,0.4,0.4,0.95]], ['location','quality'], [2,3]) cpd_no_of_people = TabularCPD('no_of_people', 2, [[0.6,0.8,0.1,0.6], # 2 X 2 X 3 = 12 --> 6 each row X 2 [0.4,0.2,0.9,0.4]], ['cost','location'], [2,2]) resurant.add_cpds(cpd_location, cpd_quality, cpd_cost, cpd_no_of_people) # Creating the inference object of the model resurant_inference = VariableElimination(resurant) # Doing simple queries over one or multiple variables resurant_inference.query(variables=['location']) resurant_inference.query(variables=['location','no_of_people']) resurant_inference.query(variables=['no_of_people'], evidence={'location':1, 'quality':1}) # If we have evidence resurant_inference.query(variables=['no_of_people'], evidence={'location':1}, elimination_order=['quality', 'cost']) # can sepcify elimination sequence / otherwise system will choose automatically -2- " Induced Graph " " also defined as the undirected graph constructed by the unionof all the graphs formed in each step of variable elimination " # Check induced graph induced_graph = resurant_inference.induced_graph(['cost', 'location', 'no_of_people', 'quality'])
print(heartDisease.head()) #display the Attributes names and datatyes print('\n Attributes and datatypes') print(heartDisease.dtypes) #Creat Model- Bayesian Network model = BayesianModel([('age', 'heartdisease'), ('sex', 'heartdisease'), ('exang', 'heartdisease'), ('cp', 'heartdisease'), ('heartdisease', 'restecg'), ('heartdisease', 'chol')]) #Learning CPDs using Maximum Likelihood Estimators print('\n Learning CPD using Maximum likelihood estimators') model.fit(heartDisease, estimator=MaximumLikelihoodEstimator) # Inferencing with Bayesian Network print('\n Inferencing with Bayesian Network:') HeartDiseasetest_infer = VariableElimination(model) #computing the Probability of HeartDisease given restecg print('\n 1.Probability of HeartDisease given evidence= restecg :1') q1 = HeartDiseasetest_infer.query(variables=['heartdisease'], evidence={'restecg': 1}) print(q1) #computing the Probability of HeartDisease given cp print('\n 2.Probability of HeartDisease given evidence= cp:2 ') q2 = HeartDiseasetest_infer.query(variables=['heartdisease'], evidence={'cp': 2}) print(q2)
evidence=['I'],evidence_card=[3]) femaleSchool_cpd = TabularCPD( variable='F',variable_card=2, values=[[.8,.3,.2],[.2,.7,.8]], evidence=['I'],evidence_card=[3]) govtCorr_cpd = TabularCPD( variable='G', variable_card=2, values=[[.05,.4,.55,.85],[.95,.6,.45,.15]], evidence=['F','L'],evidence_card=[2,2]) model.add_cpds(income_cpd, lifeExp_cpd, femaleSchool_cpd, govtCorr_cpd) inference = VariableElimination(model) prob_G = inference.query(variables='G',evidence=dict([('L',1)])) print(prob_G['G']) # +-----+----------+ # | G | phi(G) | # |-----+----------| # | G_0 | 0.7292 | # | G_1 | 0.2708 | # +-----+----------+ inference = VariableElimination(model) prob_G = inference.query(variables='G',evidence=dict([('F',1)])) print(prob_G['G']) # +-----+----------+ # | G | phi(G) | # |-----+----------| # | G_0 | 0.7174 |
class TestVariableEliminationMarkov(unittest.TestCase): def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L'), ('A', 'R'), ('J', 'G')]) factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]).to_factor() factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]).to_factor() factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]).to_factor() factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.markov_inference.query(['J']) query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.markov_inference.query(['Q', 'J']) query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.markov_inference.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): map_query = self.markov_inference.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.markov_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.markov_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.markov_inference del self.markov_model
def setup(self): reader = BIFReader('hailfinder.bif') model = reader.get_bayesian_model() self.inference = VariableElimination(model)
root.attributes("-fullscreen", False) var = 0 root.bind("<F11>", f) root.configure(background='#599442') ############ ############ ############ ############ ############ # READ THE DATASET AND INITIALIZE THE MODELS # ############ ############ ############ ############ ############ data = pd.read_csv("../Datasets/Final_Processed_Training.csv") data = data.drop(columns=["Unnamed: 0"]) # Select the already processed datasets and train the BN & Inference models BN_Model = Bayesian_Net_Model(data) inference_model = VariableElimination(BN_Model) # Set the entry for the first parameter var1_descr = " Enter a text review: " label_descr1 = tk.Label(root, text=var1_descr, font='Helvetica 11 bold', bg='#599442') parameter_1 = tkst.ScrolledText(root, width=75, height=5, wrap=WORD, bd=3, font='Helvetica 10') #parameter_1 = tk.Entry(root, width =100, bd=3)
class InputAgent: def __init__(self,k_output): self.VEbysmodel1 = 0 self.input = 0 self.info1 = {} self.bys1_input = 0 self.keyarray = k_output self.beta_input = 0 self.beta_input_list = [] self.bys1_beta_input = 0 def generate_input(self): self.input = random.randint(0, 4) print("input ",self.input) def bys1_init(self): bysmodel1 = bysmodel([('ED', 'MD'), ('AD', 'MD'), \ ('EU', 'MU'), ('AU', 'MU'), \ ('ER', 'MR'), ('AR', 'MR'), \ ('EL', 'ML'), ('AL', 'ML'), \ ('END', 'PR')]) EU = tcpd(variable='EU', variable_card=2, \ values=[[0.01, 0.99]]) ED = tcpd(variable='ED', variable_card=2, \ values=[[0.01, 0.99]]) EL = tcpd(variable='EL', variable_card=2, \ values=[[0.01, 0.99]]) ER = tcpd(variable='ER', variable_card=2, \ values=[[0.01, 0.99]]) AU = tcpd(variable='AU', variable_card=2, \ values=[[0.01, 0.99]]) AD = tcpd(variable='AD', variable_card=2, \ values=[[0.01, 0.99]]) AL = tcpd(variable='AL', variable_card=2, \ values=[[0.01, 0.99]]) AR = tcpd(variable='AR', variable_card=2, \ values=[[0.01, 0.99]]) MD = tcpd(variable='MD', variable_card=2, \ evidence=['ED', 'AD'], evidence_card=[2, 2],\ values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]]) MU = tcpd(variable='MU', variable_card=2, \ evidence=['EU', 'AU'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]]) ML = tcpd(variable='ML', variable_card=2, \ evidence=['EL', 'AL'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]]) MR = tcpd(variable='MR', variable_card=2, \ evidence=['ER', 'AR'], evidence_card=[2, 2], \ values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]]) END = tcpd(variable='END', variable_card=2, \ values=[[0.01, 0.99]]) PR = tcpd(variable='PR', variable_card=2, evidence=['END'], \ evidence_card=[2], values=[[1.0, 0.0], [0.0, 1.0]]) bysmodel1.add_cpds(EU, ED, EL, ER, AU, AD, AL, AR, \ MD, MU, ML, MR, END, PR) self.VEbysmodel1 = VariableElimination(bysmodel1) def bys1_generate(self,info): self.reset_info() self.condition_cal(info) VEbys1_query = self.VEbysmodel1.query(['MD', 'MU', 'ML', 'MR', 'PR'], \ evidence=self.info1) max_p = -1.0 max_p2 = -1.0 target_move = '' target_move2 = '' counter = 0 for key in VEbys1_query.keys(): tempv = VEbys1_query[key].values[1] if counter == 0: if max_p < tempv: max_p = tempv target_move = key else: if max_p < tempv: max_p2 = max_p target_move2 = target_move max_p = tempv target_move = key elif max_p2 < tempv: max_p2 = tempv target_move2 = key counter += 1 print(target_move,target_move2,max_p,max_p2) self.bys1_input = (self.keyarray.index(target_move),self.keyarray.index(target_move2)) def condition_cal(self,info): player_cord, apple_cord, enemy_array = self.info_generate(info) if player_cord[0] < apple_cord[0]: self.info1['AR'] = 1 if player_cord[0] > apple_cord[0]: self.info1['AL'] = 1 if player_cord[1] < apple_cord[1]: self.info1['AD'] = 1 if player_cord[1] > apple_cord[1]: self.info1['AU'] = 1 for enemy in enemy_array: if (player_cord[0] == enemy[0]-1) and (player_cord[1] == enemy[1]): self.info1['ER'] = 1 if (player_cord[0] == enemy[0]+1) and (player_cord[1] == enemy[1]): self.info1['EL'] = 1 if (player_cord[1] == enemy[1]-1) and (player_cord[0] == enemy[0]): self.info1['ED'] = 1 if (player_cord[1] == enemy[1]+1) and (player_cord[0] == enemy[0]): self.info1['EU'] = 1 if info['dead']: self.info1['END'] = 1 def reset_info(self): self.info1['EU'] = 0 self.info1['ED'] = 0 self.info1['ER'] = 0 self.info1['EL'] = 0 self.info1['AU'] = 0 self.info1['AD'] = 0 self.info1['AL'] = 0 self.info1['AR'] = 0 self.info1['END'] = 0 def info_generate(self,info): enemy_array = [] player = info['player'] player_cord = (player.x[0],player.y[0]) for i in range(1,player.length): enemy_array.append((player.x[i],player.y[i])) for wall in info['wall']: enemy_array.append(wall) apple_cord = info['apple'] return (player_cord,apple_cord,enemy_array) def info_generate_withp(self,info): return (self.info_generate(info),info['player']) def beta_generate(self,info,steps): self.beta_input_list.clear() temp_info = info.copy() right_score = self.beta_recursion(temp_info, steps, 0) left_score = self.beta_recursion(temp_info, steps, 1) up_score = self.beta_recursion(temp_info, steps, 2) down_score = self.beta_recursion(temp_info, steps, 3) diff = 2 score_lsit = [] score_lsit.append(right_score) score_lsit.append(left_score) score_lsit.append(up_score) score_lsit.append(down_score) max = -65525 counter = 0 target = 0 for score in score_lsit: if score > max: max = score target = counter counter += 1 self.beta_input = target counter = 0 for score in score_lsit: print(score,end="") if abs(max-score) <= diff: self.beta_input_list.append(counter) counter += 1 print(self.beta_input_list) def beta_recursion(self,info,steps,player_move): (player_cord, apple_cord, enemy_array), player = self.info_generate_withp(info) total_score = 0 for enemy in enemy_array: if self.isCollission(player_cord, enemy): return -40 if self.isCollission(player_cord, apple_cord): total_score += 40 if steps == 0: return total_score+20 else: steps -= 1 player_copy = player.copyself() up_score = 0 down_score = 0 left_score = 0 right_score = 0 if player_move == 0: if player_copy.direction != 1: player_copy.moveRight() else: return -40 elif player_move == 1: if player_copy.direction != 0: player_copy.moveLeft() else: return -40 elif player_move == 2: if player_copy.direction != 3: player_copy.moveUp() else: return -40 elif player_move == 3: if player_copy.direction != 2: player_copy.moveDown() else: return -40 player_copy.update() temp_info = info.copy() temp_info['player'] = player_copy right_score = self.beta_recursion(temp_info, steps, 0) left_score = self.beta_recursion(temp_info, steps, 1) up_score = self.beta_recursion(temp_info, steps, 2) down_score = self.beta_recursion(temp_info, steps, 3) total_score += ((right_score+left_score+up_score+down_score)//4) return total_score def isCollission(self,cord1,cord2): if cord1[0] == cord2[0]: if cord1[1] == cord2[1]: return True return False def combine_bys1_beta(self,info,steps): self.bys1_generate(info) self.beta_generate(info,steps) if (self.bys1_input[0] in self.beta_input_list): self.bys1_beta_input = self.bys1_input[0] elif (self.bys1_input[1] in self.beta_input_list): self.bys1_beta_input = self.bys1_input[1] else: self.bys1_beta_input = self.beta_input
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(['J']) query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.bayesian_inference.query(['Q', 'J']) query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
data = pd.read_csv(sys.argv[1], ",") data_size = len(data) # pr = {} # data = pd.read_csv('data.csv') #"fisrm.csv" # data_size = len(data) model = BayesianModel() list_edges = [('TQ', 'DFT'), ('DPQ', 'DI'), ('C','DI'),('DI','DFT'),('DI','RD'),('DFT','RD'),('RD','DFO'),('OU','DFO')] model.add_edges_from(list_edges) model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10) for edge in model.edges(): print(edge) print("\n") infer = VariableElimination(model) nodes = model.nodes() Distribution = {} for key in pr.keys(): Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)] nodes.remove(key) print('pr done') for key in nodes: Distribution[key] = infer.query([key], evidence = pr)[key].values print('done' + key) print(Distribution['DPQ']) plt.subplot(4, 2, 1)
# Associating the parameters with the model structure. for cpd in cpds: grass_model.add_cpds(cpd) # Checking if the cpds are valid for the model. grass_model.check_model() grass_model.get_cpds() # Inference with BN # Now that we have represented the BN with a complete JPD of all variables, it is theoretically possible to answer any query of certain variable(s) by marginalizing all irrelevant variables. # This procedure is called **inference**. # In general, a variable elimination method is employed to make use of the CPDs. # Do exact inference using Variable Elimination grass_infer = VariableElimination(grass_model) # Computing the probability of cloudy, sprinkler and rain given evidence of wet grass. q = grass_infer.query(variables=NODES[:-1], evidence={NODES[-1]: 1}) print('Inference with Evidence of Wet=True') for node in NODES[:-1]: print(node, '\n', q[node]) # Migrate distribution from BN to MN # Chimera-structured Boltzmann machine. # Represent a Bayesian network with joint probability # Moralize Bayesian network to Markov network # Define the two features as binary quadratic functions
import numpy as np import pandas as pd from pgmpy.inference import VariableElimination from pgmpy.models import BayesianModel data = pd.read_csv('~/Documents/unifiedMLData.csv') #print data movie_model = BayesianModel([ ('occupation','rating') #,('gender','rating') #,('age','rating') #,('age','occupation') #,('gender','occupation') #,('genre','movie_title') #,('movie_title','rating') ]) movie_model.fit(data) model_infer = VariableElimination(movie_model) results = model_infer.query('rating') print(results['rating']) #print(movie_model.get_cpds('rating'))
def configure(self, rf): # command format will be the following: # trainPGClassifier selfName networkStructure print sys.argv # read network structure and make graph # labels in networkStructure identical to model names # networkStructure as a string containing a list of tuples # selfName = 'actionPGN' # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]" selfName = sys.argv[1] netStructureString = sys.argv[2] netStructure = ast.literal_eval(netStructureString) print netStructure # collect all model names in a list to extract a unique set modelList = [] for k in netStructure: modelList += list(k) print list(set(modelList)) # create a port to connect to /sam/rpc:i to query model path for each model name portsList = [] querySupervisorPort = yarp.RpcClient() querySupervisorPortName = '/sam/' + selfName + '/queryRpc' querySupervisorPort.open(querySupervisorPortName) portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort}) yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i') # --------------------------------------------------------------------------------------------------------------- modelDict = dict() failFlag = False for j in modelList: if j != selfName: modNameSplit = j.split(' ') cmd = yarp.Bottle() cmd.addString('dataDir') for l in modNameSplit: cmd.addString(l) reply = yarp.Bottle() querySupervisorPort.write(cmd, reply) if reply.get(0).asString() != 'nack': modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None} # try: # load pickle for the model file currPickle = pickle.load(open(reply.get(1).asString(), 'rb')) # try loading labelComparisonDict from the pickle if 'labelComparisonDict' in currPickle.keys(): modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict'] print j, 'labelComparisonDict loaded' else: print modNameSplit[0], 'labelComparisonDict not found' failFlag = True if 'overallPerformanceLabels' in currPickle.keys(): modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels'] print j, 'overallPerformanceLabels loaded' else: print j, 'overallPerformanceLabels not found' failFlag = True # except: # failFlag = True else: failFlag = True print 'FAIL?', failFlag if failFlag: return False modelList = modelDict.keys() print modelList # --------------------------------------------------------------------------------------------------------------- # extract unique lists from the collected data # the unique list of pickleData[original] represents the possibleClassifications for each model modelDict[selfName] = dict() modelDict[selfName]['labels'] = [] selfModelCol = 1 for j in modelList: modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])]) print j, 'unique labels:', modelDict[j]['labels'] print j, 'CPD shape', modelDict[j]['CPD'].shape modelDict[selfName]['labels'] += modelDict[j]['labels'] selfModelCol *= len(modelDict[j]['labels']) print # the possibleClassifications for both models (outputs of the PGN) # are the unique list of the model specific labels for all models modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels'])) modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original'] modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol]) print selfName, 'unique labels:', modelDict[selfName]['labels'] print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape # check that original classifications of both are identical # otherwise cannot combine them with a single node. # This is currently a big limitation that will be removed later print modelDict[selfName]['labels'] for j in modelList: print j, for k in range(len(modelDict[j]['pickleData']['original'])): print modelDict[j]['pickleData']['original'][k] if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']: modelDict[j]['pickleData']['original'][k] = 'unknown' for j in modelList: if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']: failFlag = True print 'original classifications of', j, 'are not identical to those of', selfName if failFlag: return False # Update netStructureString to reflect changes in the modelList names strSections = netStructureString.split("'") for k in range(len(strSections)): if len(strSections[k]) > 2 and ',' not in strSections[k]: strSections[k] = strSections[k].split(' ')[0] netStructureString = "'".join(strSections) netStructure = ast.literal_eval(netStructureString) # --------------------------------------------------------------------------------------------------------------- # iterate through actual labels # for each actual label, iterate through models # for each model find classification label of this model for current actual label # get the index of the current classification and add it to its CPD # also calculate which item in the joint CPD needs to be incremented for j in range(len(modelDict[selfName]['actualLabels'])): currActualLabel = modelDict[selfName]['actualLabels'][j] row = modelDict[selfName]['labels'].index(currActualLabel) colVar = np.zeros([len(modelList)]) for k in range(len(modelList)): cmod = modelList[k] if k != 0: pmod = modelList[k-1] colVar *= len(modelDict[pmod]['labels']) colVar[k] = modelDict[cmod]['labels'].index( modelDict[cmod]['pickleData']['results'][j]) modelDict[cmod]['CPD'][0, colVar[k]] += 1 col = sum(colVar) modelDict[selfName]['CPD'][row, col] += 1 # take all CPD's and normalise the matrices evidenceCard = copy.deepcopy(modelList) for j in modelDict: if j == selfName: # this is a joint CPD matrix # normalise columns to have sum = 1 modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1') else: # normalise sum of matrix = 1 modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD']) evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels']) print modelDict[j]['CPD'] model = BayesianModel(netStructure) # create TabularCPD data structure to nest calculated CPD for j in modelDict: if j == selfName: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD'], evidence=modelList, evidence_card=evidenceCard) else: modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']), values=modelDict[j]['CPD']) # Associating the CPDs with the network for j in modelDict: model.add_cpds(modelDict[j]['cpdObject']) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. if not model.check_model(): print 'Model check returned unsuccessful' return False infer = VariableElimination(model) confMatrix = np.zeros(len(modelDict[selfName]['labels'])) # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased for j in range(len(modelDict[selfName]['actualLabels'])): currEvidenceDict = dict() for k in modelList: currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j]) q = infer.query([selfName], currEvidenceDict) inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)] actualClass = modelDict[selfName]['actualLabels'][j] confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1 print "%Accuracy with PGN" dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix) return True
def bayesian_network_prediction(dataset, ad_cpt, gh_cpt, ga_cpt, prediction_cpt): ###创建模型代码 # coding: utf-8 # In[16]: # Starting with defining the network structure dolores_model = BayesianModel([('ability_difference', 'goals_home'), ('ability_difference', 'goals_away'), ('goals_home', 'Prediction'), ('goals_away', 'Prediction')]) cpd_AD = TabularCPD(variable='ability_difference', variable_card=42, values=ad_cpt) cpd_GH = TabularCPD(variable='goals_home', variable_card=8, values=gh_cpt, evidence=['ability_difference'], evidence_card=[42]) cpd_GA = TabularCPD(variable='goals_away', variable_card=8, values=ga_cpt, evidence=['ability_difference'], evidence_card=[42]) cpd_P = TabularCPD(variable='Prediction', variable_card=3, values=prediction_cpt, evidence=['goals_home', 'goals_away'], evidence_card=[8, 8]) # Associating the parameters with the model structure. dolores_model.add_cpds(cpd_AD, cpd_GH, cpd_GA, cpd_P) # Checking if the cpds are valid for the model. dolores_model.check_model() dolores_model.get_independencies() from pgmpy.inference import VariableElimination inference = VariableElimination(dolores_model) histogram, home_scores, away_scores = get_histogram(dataset) predictions = [] results = [] for n in range(len(histogram)): rank = histogram[n] result = dataset[n]['result'] pred = inference.query(variables=['Prediction'], evidence={'ability_difference': rank}) predictions.append(pred.values) results.append(result) predictions = np.array(predictions) predictions = np.around(predictions, 2) results = np.array(results) results = results.reshape((results.shape[0], 1)) results = np.around(results, 0) kk = np.concatenate((predictions, results), axis=1) pred0 = inference.query(variables=['Prediction'], evidence={'ability_difference': 0}) pred1 = inference.query(variables=['Prediction'], evidence={'ability_difference': 5}) pred2 = inference.query(variables=['Prediction'], evidence={'ability_difference': 10}) pred3 = inference.query(variables=['Prediction'], evidence={'ability_difference': 15}) pred4 = inference.query(variables=['Prediction'], evidence={'ability_difference': 20}) pred5 = inference.query(variables=['Prediction'], evidence={'ability_difference': 21}) pred6 = inference.query(variables=['Prediction'], evidence={'ability_difference': 22}) pred7 = inference.query(variables=['Prediction'], evidence={'ability_difference': 23}) pred8 = inference.query(variables=['Prediction'], evidence={'ability_difference': 24}) pred9 = inference.query(variables=['Prediction'], evidence={'ability_difference': 25}) return 0
hc = HillClimbSearch(df, scoring_method=bic) #hc = ExhaustiveSearch(df, k2) model = hc.estimate() for ee in model.edges(): print(ee) ##参数学习 from pgmpy.models import BayesianModel mod = BayesianModel(model.edges()) mod.fit(df) for cpd in mod.get_cpds(): print(cpd) #print(mod.local_independencies('HA')) ##模型推理 from pgmpy.inference import VariableElimination, BeliefPropagation cancer_infer = VariableElimination(mod) q = cancer_infer.query(variables=['HA']) print(q) #cancer_infer = BeliefPropagation(mod) #q = cancer_infer.query(variables=['HA']) #print(q)
class GeneralModel(Model): """ Allows construction of an arbitray causal graph & action space with discrete (currently assumed binary) CPD tables. This implementation will not scale to large graphs. """ def __init__(self, model, actions, py_func): """ model is a pgmpy.BayesianModel actions is a list of (var,value) tuples """ self.py_func = py_func self.parents = sorted(model.get_parents('Y')) self.N = len(self.parents) self.actions = actions self.K = len(actions) self.observational_model = model self.observational_inference = VariableElimination( self.observational_model) self.post_action_models = [ GeneralModel.do(model, action) for action in actions ] self.samplers = [ BayesianModelSampling(model_a) for model_a in self.post_action_models ] self.interventional_distributions = [] for indx, new_model in enumerate(self.post_action_models): infer = VariableElimination(new_model) _, distribution_over_parents = infer.query(self.parents) self.interventional_distributions.append(distribution_over_parents) self.pre_compute() def expected_Y_observational(self): """ return a vector of length K with the expected Y given we observe the variable-value pair corresponding to each action """ expected_Y = np.zeros(self.K) for indx, action in enumerate(self.actions): var, value = action if var is None: _, distribution = self.observational_inference.query(['Y']) else: _, distribution = self.observational_inference.query( ['Y'], evidence=dict([action])) pyis1 = distribution.reduce([('Y', 1)], inplace=False).values expected_Y[indx] = pyis1 return expected_Y def _expected_Y(self): expected_Y = np.zeros(self.K) for indx, new_model in enumerate(self.post_action_models): infer = VariableElimination(new_model) _, distribution_over_reward = infer.query(['Y']) expected_reward = distribution_over_reward.reduce( [('Y', 1)], inplace=False ).values #TODO investigate failing if inplace=True - bug in pgmpy? expected_Y[indx] = expected_reward return expected_Y @staticmethod def build_ycpd(py_func, N): cpd = np.zeros((2, 2**N)) for i, x in enumerate(Model.generate_binary_assignments(N)): cpd[0, i] = 1 - py_func(x) cpd[1, i] = py_func(x) return cpd def pYgivenX(self, x): return self.py_func(x) @classmethod def create_confounded_parallel(cls, N, N1, pz, pY, q, act_on_z=True): """ convinience method for constructing equivelent models to Confounded_Parallel""" q10, q11, q20, q21 = q pZ = [[1 - pz, pz]] pXgivenZ_N1 = [[1 - q10, 1 - q11], [q10, q11]] pXgivenZ_N2 = [[1 - q20, 1 - q21], [q20, q21]] xvars = ['X' + str(i) for i in range(1, N + 1)] edges = chain([('Z', v) for v in xvars], [(v, 'Y') for v in xvars]) model = BayesianModel(edges) cpds = [TabularCPD(variable='Z', variable_card=2, values=pZ)] cpds.extend([ TabularCPD(variable=v, variable_card=2, values=pXgivenZ_N1, evidence=['Z'], evidence_card=[2]) for v in xvars[0:N1] ]) cpds.extend([ TabularCPD(variable=v, variable_card=2, values=pXgivenZ_N2, evidence=['Z'], evidence_card=[2]) for v in xvars[N1:] ]) def py(x): i, j = x[0], x[N - 1] return pY[i, j] ycpd = GeneralModel.build_ycpd(py, N) cpds.append( TabularCPD(variable='Y', variable_card=2, values=ycpd, evidence=xvars, evidence_card=[2] * len(xvars))) model.add_cpds(*cpds) model.check_model() if act_on_z: actions = list( chain([(x, 0) for x in xvars], [(x, 1) for x in xvars], [("Z", i) for i in (0, 1)], [(None, None)])) else: actions = list( chain([(x, 0) for x in xvars], [(x, 1) for x in xvars], [(None, None)])) pgm_model = cls(model, actions, py) return pgm_model @classmethod def create_very_confounded(cls, Nz, pZ1, pZ, a, b, py): """ construct a very confounded model """ zvars = ['Z' + str(i) for i in range(1, Nz + 1)] xvars = ['X' + str(i) for i in range(1, 3)] edges = chain(product(zvars, xvars), product(xvars, ['Y'])) bayes_model = BayesianModel(edges) z_other = list(product((0, 1), repeat=(Nz - 1))) px1 = np.hstack((np.full(2**(Nz - 1), a), [np.mean(z) for z in z_other])) px2 = np.hstack((np.full(2**(Nz - 1), b), [np.prod(z) for z in z_other])) cpds = [ TabularCPD(variable='Z1', variable_card=2, values=np.vstack((1 - pZ1, pZ1))) ] cpds.extend([ TabularCPD(variable=v, variable_card=2, values=np.vstack((1 - pZ, pZ))) for v in zvars[1:] ]) cpds.append( TabularCPD(variable='X1', variable_card=2, values=np.vstack((1 - px1, px1)), evidence=zvars, evidence_card=[2] * Nz)) cpds.append( TabularCPD(variable='X2', variable_card=2, values=np.vstack((1 - px2, px2)), evidence=zvars, evidence_card=[2] * Nz)) cpds.append( TabularCPD(variable='Y', variable_card=2, values=np.vstack((1 - py, py)), evidence=xvars, evidence_card=[2] * len(xvars))) bayes_model.add_cpds(*cpds) bayes_model.check_model() actions = list( chain([(z, 0) for z in zvars], [(z, 1) for z in zvars], [(x, 0) for x in xvars], [(x, 1) for x in xvars], [(None, None)])) model = cls(bayes_model, actions) return model @classmethod def do(cls, model, action): var, value = action new_model = BayesianModel(model.edges()) if var is not None: for p in model.get_parents(var): new_model.remove_edge(p, var) cpds = [] for cpd in model.get_cpds(): if cpd.variable == var: values = np.zeros((cpd.variable_card, 1)) values[value] = 1.0 values[1 - value] = 0.0 cpd_new = TabularCPD(variable=var, variable_card=cpd.variable_card, values=values) cpds.append(cpd_new) else: cpds.append(cpd.copy()) new_model.add_cpds(*cpds) new_model.check_model() return new_model def sample(self, action): """ samples given the specified action index and returns the values of the parents of Y, Y. """ s = self.samplers[action].forward_sample() x = s.loc[:, self.parents].values[0] y = s.loc[:, ['Y']].values[0][0] return x, y def P(self, x): """ returns the probability of the given assignment to the parents of Y for given each action. """ assignment = zip(self.parents, x) pa = np.asarray([ q.reduce(assignment, inplace=False).values for q in self.interventional_distributions ]) return pa
def gradeBayesianInference(evidences): grades_infer = VariableElimination(grades) votoOffer = grades_infer.query(variables=['Voto'], evidence=evidences) return votoOffer
class StateNameDecorator(unittest.TestCase): def setUp(self): self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good']} self.sn1 = {'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night']} self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = VariableElimination(student) self.model2 = VariableElimination(student, state_names=self.sn2) def test_assignment_statename(self): req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')], [('speed', 'low'), ('switch', 'off'), ('time', 'day')]] req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)], [('speed', 0), ('switch', 1), ('time', 0)]] self.assertEqual(self.phi1.assignment([1, 2]), req_op2) self.assertEqual(self.phi2.assignment([1, 2]), req_op1) def test_factor_reduce_statename(self): phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 'medium'), ('time', 'day')]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 1), ('time', 0)]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) def test_reduce_cpd_statename(self): cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 'high')]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 0)]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 'high')], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 0)], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) def test_inference_query_statename(self): inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.query(['grade'], evidence={'intel': 0}) req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))} self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op) inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0}) req_op = {'grade': 'F'} self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op)
# Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # Checking if the cpds are valid for the model. print(cancer_model.check_model()) # Check d-separations. This is only meant for those interested. You do not need to understand this to do the project. print(cancer_model.is_active_trail('Pollution', 'Smoker')) print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])) print(cancer_model.local_independencies('Xray')) print(cancer_model.get_independencies()) # Print model information print(cancer_model.edges()) print(cancer_model.nodes()) print(cancer_model.get_cpds()) # Doing exact inference using Variable Elimination from pgmpy.inference import VariableElimination cancer_infer = VariableElimination(cancer_model) # Query print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0})) print( cancer_infer.query(variables=['Cancer'], evidence={ 'Smoker': 0, 'Pollution': 0 }))
from pgmpy.readwrite import BIFReader from pgmpy.inference import VariableElimination import os curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = curPath[:curPath.find("sklearn\\") + len("sklearn\\")] dataPath = rootPath + "Input/MLWorkHome/experiment6/img/asia.bif" reader = BIFReader(dataPath) asia_model = reader.get_model() # 通过nodes函数可以查看模型中有哪些结点 print(asia_model.nodes()) # NodeView(('xray', 'bronc', 'asia', 'dysp', 'lung', 'either', 'smoke', 'tub')) # 练习1 在下面的单元格中,实现判断,判断tub结点和either结点之间是否存在有向连接: print("练习1:") print(asia_model.is_active_trail('tub', 'either')) # 练习2 在下面的单元格中,实现判断,判断tub结点和dysp结点之间能否通过either结点有向连接: print("练习2:") print(asia_model.is_active_trail('tub', 'dysp', observed=['either'])) asia_infer = VariableElimination(asia_model) # 给出当smoke为0时,bronc的概率分布情况 q = asia_infer.query(variables=['bronc'], evidence={'smoke': 0}) print(q['bronc']) # 练习3 在下面的单元格中,实现查询,当either为1时,xray的概率分布情况: print("练习3:") asia_infer2 = VariableElimination(asia_model) p = asia_infer2.query(variables=['xray'], evidence={'either': 1}) print(p['xray'])
cpd_vehicleRunsHot, cpd_badCarbuerator, cpd_weakBattery, cpd_badStarter, cpd_noFuelPressure, cpd_faultyFuelFilter, cpd_cloggedAirFilter, cpd_wornDistributor, cpd_wornEngineMounts, cpd_harmonicBalancer, cpd_vacuumLeaks, cpd_engineTuneUp, cpd_sparkPlug, cpd_pistonNotWorking, cpd_lowCoolantLevel, cpd_faultyEngineCoolingFan, cpd_stuckThermostat, cpd_corrodedBatteryTerminal, cpd_fuelSystemCleaning, cpd_fuelPumpReplacement, cpd_badIgnitionSytem, cpd_badTimingChain, cpd_brokenMissingFanAssembly, cpd_noSpark, cpd_ignitionCoilForSpark) #validate model model.check_model() #applying inference from pgmpy.inference import VariableElimination infer = VariableElimination(model) #function for getting all the CPDs with the node given as evidence def getAllProbabilities(user_evidence): print(user_evidence) for i in range(len(user_evidence)): activeTrailNodes = model.active_trail_nodes(user_evidence[i]) print(activeTrailNodes) nodes=[] for value in activeTrailNodes: nodes.append(value) print("printing..", nodes)
UGM = DGM.to_markov_model() jtree = UGM.to_junction_tree() evidence = {'A': 1} marginal = jta(UGM, jtree, evidence.items()) print "Results of the implemented JTA" for m in marginal: print m print "\n=======================================\n" print "Results of the Variable Elimination from pgmpy" inference = VariableElimination(DGM) for v in get_different(DGM.nodes(), evidence): print inference.query(variables=[v], evidence=evidence)[v] # visualization part # nx.draw_circular(DGM, with_labels=True, node_color="white", node_size=1000) # plt.draw() # plt.show() # nx.draw_circular(UGM, with_labels=True, node_color="white", node_size=1000) # plt.draw() # plt.show() # nx.draw_circular(jtree, with_labels=True, node_color="white", node_shape='s', node_size=8000) # plt.draw() # plt.show()
def predict_probability(self, data): """ Predicts probabilities of all states of the missing variables. Parameters ---------- data : pandas DataFrame object A DataFrame object with column names same as the variables in the model. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> train_data = values[:80] >>> predict_data = values[80:] >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> predict_data = predict_data.copy() >>> predict_data.drop('B', axis=1, inplace=True) >>> y_prob = model.predict_probability(predict_data) >>> y_prob B_0 B_1 80 0.439178 0.560822 81 0.581970 0.418030 82 0.488275 0.511725 83 0.581970 0.418030 84 0.510794 0.489206 85 0.439178 0.560822 86 0.439178 0.560822 87 0.417124 0.582876 88 0.407978 0.592022 89 0.429905 0.570095 90 0.581970 0.418030 91 0.407978 0.592022 92 0.429905 0.570095 93 0.429905 0.570095 94 0.439178 0.560822 95 0.407978 0.592022 96 0.559904 0.440096 97 0.417124 0.582876 98 0.488275 0.511725 99 0.407978 0.592022 """ from pgmpy.inference import VariableElimination if set(data.columns) == set(self.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.columns) - set(self.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(self.nodes()) - set(data.columns) pred_values = defaultdict(list) model_inference = VariableElimination(self) for index, data_point in data.iterrows(): states_dict = model_inference.query(variables=missing_variables, evidence=data_point.to_dict()) for k, v in states_dict.items(): for l in range(len(v.values)): state = self.get_cpds(k).state_names[k][l] pred_values[k + '_' + str(state)].append(v.values[l]) return pd.DataFrame(pred_values, index=data.index)
evidence_card=[2] ) student_model.add_cpds( grade_cpd, difficulty_cpd, intel_cpd, letter_cpd, sat_cpd ) print(student_model.get_cpds()) print(student_model.get_independencies()) from pgmpy.inference import VariableElimination student_infer = VariableElimination(student_model) prob_G = student_infer.query( variables=["G"], evidence={"I": 1, "D": 0}) print(prob_G) import numpy as np import pandas as pd raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=["D", "I", "G", "L", "S"]) data.head() from pgmpy.models import BayesianModel from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator model = BayesianModel([("D", "G"), ("I", "G"), ("I", "S"), ("G", "L")]) # 基于极大似然估计进行模型训练
# %% codecell drawGraph(carModel) # %% markdown [markdown] # #### Testing conditional independence: # $$ # \color{DodgerBlue}{\text{WorkCapacity (observed)}: \;\;\;\;\;\;\; \text{Experience} \; \bot \; \text{Absenteeism} \; | \; \text{WorkCapacity}} # $$ # Given that **WorkCapacity**'s state is observed, we can make the following equivalent statements: # * there is NO active trail between **Experience** and **Absenteeism**. # * **Experience** and **Absenteeism** are locally independent. # * the probability of **Experience** won't influence probability of **Absenteeism** (and vice versa). # # %% codecell elim: VariableElimination = VariableElimination(model=carModel) # %% markdown [markdown] # **Testing Conditional Independence:** Using Active Trails Methods # %% codecell assert carModel.is_active_trail(start=Experience.var, end=Absenteeism.var, observed=None) assert carModel.is_active_trail( start=Experience.var, end=Absenteeism.var, observed=[WorkCapacity.var] ), "Check: still need to condition on extra variable for this not to be an active trail" # Finding out which extra variable to condition on: # TODO OBSERVEDVARS: must fix observedvars function so that (assuming causal chain) it can identify in the graph what is the middle node between these passed 'start' and 'end' nodes and also include that middle node in the output list (along with existing backdoors) observedVars(carModel, start=Experience, end=Absenteeism)
('Tuberculose', 'TbOuCa'), ('Bronchite', 'Dyspnea')]) #apprentissage des paramètres #print("estimation des cpds :") from pgmpy.estimators import BayesianEstimator est = BayesianEstimator(best_model, data) print(est.estimate_cpd('Cancer', prior_type='BDeu', equivalent_sample_size=10)) best_model.fit(data, estimator=BayesianEstimator, prior_type='BDeu') #for cpd in best_model.get_cpds(): # print(cpd) #Caractéristique des personnes ayant un cancer model_infer = VariableElimination(best_model) q = model_infer.query(variables=[ 'Age', 'Fumeur', 'Tuberculose', 'VisiteAsie', 'Radiographie', 'Bronchite', 'Dyspnea', 'Geographie', 'TbOuCa' ], evidence={'Cancer': 2}) # 0 = ? , 1=False, 2=True print("Caratéristiques des personnes ayant le cancer :") #print(q['Age']) print(q['Fumeur']) print(q['Tuberculose']) print(q['VisiteAsie']) print(q['Radiographie']) print(q['Bronchite']) print(q['Dyspnea']) print(q['Geographie']) print(q['TbOuCa'])
import pandas as pd from pgmpy.estimators import BayesianEstimator from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination f=open('data7_name.csv','r') attributes= f.readline().split(',') heartDisease=pd.read_csv('data7.csv',names=attributes) print("\nAttributes and datatypes") print(heartDisease.dtypes) model=BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('exang','trestbps'), ('trestbps','heartdisease'),('fbs','heartdisease')]) model.fit(heartDisease,BayesianEstimator) HeartDisease_infer=VariableElimination(model) print("\n 1. Probability heart disease given age=28") q=HeartDisease_infer.query(['heartdisease'],{'age':28}) print(q['heartdisease']) print("\n 2. Probability of heart disease for male") q=HeartDisease_infer.query(['heartdisease'],{'sex':1}) print(q['heartdisease'])
import numpy as np import pandas as pd from pgmpy.estimators import MaximumLikelihoodEstimator from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination heart_data = pd.read_csv("Data7.csv") heart_data = heart_data.replace("?", np.nan) model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'), ('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease', 'chol')]) model.fit(heart_data, estimator=MaximumLikelihoodEstimator) infer = VariableElimination(model) q = infer.query(variables=['heartdisease'], evidence={'chol': 100}) print(q["heartdisease"]) q = infer.query(variables=['heartdisease'], evidence={'age': 28}) print(q["heartdisease"])
import pandas as pd from pgmpy.estimators import MaximumLikelihoodEstimator from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination dataset = pd.read_csv('dataset.csv') model = BayesianModel([('HD', 'AGE'), ('HD', 'GENDER'), ('CP', 'AGE'), ('CHOLESTEROL', 'AGE'), ('HD', 'BP'), ('GENDER', 'CP')]) model.fit(dataset, estimator=MaximumLikelihoodEstimator) print('\n Inferencing with Bayesian Network:') HeartDisease_infer = VariableElimination(model) # for cpd in model.get_cpds(): # print("CPD of {variable}:".format(variable=cpd.variable)) # print(cpd) # print(model.check_model()) print('\n1.Probability of HeartDisease given Gender = Female') q = HeartDisease_infer.query(variables=['HD'], evidence={'GENDER': 1}) print(q['HD']) print('\n2. Probability of HeartDisease given BP = Low') q = HeartDisease_infer.query(variables=['HD'], evidence={'BP': 1}) print(q['HD'])
#Question2 #create a Bayesian Model and generate CPD using MLE from pgmpy.models import BayesianModel from pgmpy.estimators import MaximumLikelihoodEstimator estimator = MaximumLikelihoodEstimator(model, data) cpds = estimator.get_parameters() #Write your code fruit_cpd = cpds[0] size_cpd = cpds[1] tasty_cpd = cpds[2] print(tasty_cpd) #write cpd of tasty to csv res = pd.DataFrame(b) res.to_csv('/code/output/output2.csv', index=False, header=False) #Question3 for i in range(0, 3): model.add_cpds(cpds[i]) #create a Bayesian model and run variable elimination algorithm on it from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination model_inference = VariableElimination(model) query = model_inference.map_query(variables=['tasty']) #Expected Output print(query) result = pd.DataFrame(query, index=[0]) #write you output to csv result.to_csv('/code/output/output3.csv', index=False)
attributes = lines[0] # Read Cleveland Heart disease data heartDisease = pd.read_csv('heart.csv', names=attributes) heartDisease = heartDisease.replace('?', np.nan) # Display the data print('Few examples from the dataset are given below') print(heartDisease.head()) print('\nAttributes and datatypes') print(heartDisease.dtypes) # Model Bayesian Network model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), ('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'), ('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease', 'chol')]) # Learning CPDs using Maximum Likelihood Estimators print('\nLearning CPDs using Maximum Likelihood Estimators...') model.fit(heartDisease, estimator=MaximumLikelihoodEstimator) # Deducing with Bayesian Network print('\nInferencing with Bayesian Network:') HeartDisease_infer = VariableElimination(model) print('\n1.Probability of HeartDisease given Age=20') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 40}) print(q['heartdisease']) print('\n2. Probability of HeartDisease given chol (Cholestoral) =100') q = HeartDisease_infer.query(variables=['heartdisease'], evidence={ 'sex': 0, 'chol': 100 }) print(q['heartdisease'])
report_cpd = TabularCPD(variable='Report', variable_card=2, evidence=['Leaving'], evidence_card=[2], values=[[0.75, 0.01], [0.25, 0.99]]) smoke_cpd = TabularCPD(variable='Smoke', variable_card=2, evidence=['Fire'], evidence_card=[2], values=[[0.9, 0.1], [0.1, 0.9]]) report_model.add_cpds(fire_cpd, smoke_cpd, tampering_cpd, alarm_cpd, leaving_cpd, report_cpd) # print(report_model.get_cpds()) # print(report_model.active_trail_nodes('Report')) # print(report_model.local_independencies('Alarm')) # print(report_model.get_independencies()) report_infer = VariableElimination(report_model) prob_temp = report_infer.query(variables=['Report', 'Leaving']) # print(prob_temp['Report']) # print(prob_temp['Leaving']) prob_alarm_given_smoke_report = report_infer.query(variables=['Alarm'], evidence={ 'Tampering': 0, 'Fire': 1 }) print(prob_alarm_given_smoke_report['Alarm'])
cpd_l = TabularCPD(variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) cpd_s = TabularCPD(variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) # Associating the CPDs with the network model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model.check_model() from pgmpy.inference import VariableElimination infer = VariableElimination(model) evidence = {'G': 2, 'S': 1} # grade=C, SAT=Good postD = infer.query(['D'], evidence=evidence).values postI = infer.query(['I'], evidence=evidence).values print('\n') print('Pr(Difficulty=Hard|Grade=C,SAT=Good) = {:0.2f}'.format(postD[1])) print('Pr(Intelligent=High|Grade=C,SAT=Good) = {:0.2f}'.format(postI[1]))
df_result = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b2_input.csv") # This is the input file which contains input data. here there is a slight change. in actual we have 3 levels level-1/2/3 # but in this file the levels are 0/1/2 because by default it starts from 0 so we have renamed the actual levels , 1->0,2->1,3->2 df = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b3_input.csv") df_test = df.iloc[ 401: 501, :] # for five fold cross validation we need to run this code 5 times with different range. like 0-101,101,201 and so on a = df_test.index df_train = df.drop(df.index[a]) model_asset.fit(df_train) model_asset.get_cpds() model_asset.get_cardinality() infer_asset = VariableElimination(model_asset) df_test['Bayesian_label'] = 0 df_test = df_test.reset_index() df_test = df_test.drop(['index'], axis=1) # print df_test for index, row in df_test.iterrows(): #print index a, b, c = row['Literacy'], row['Formal Employment'], row['Current Status'] #print a,b,c,d,e q_asset = infer_asset.query(['CHH_Change'], evidence={ 'Literacy': a, 'Formal Employment': b, 'Current Status': c })
cpd_getting_up_late = TabularCPD('getting_up_late', 2, [[0.6], [0.4]]) cpd_late_for_school = TabularCPD('late_for_school', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['getting_up_late', 'traffic_jam'], evidence_card=[2, 2]) cpd_long_queues = TabularCPD('long_queues', 2, [[0.9, 0.2], [0.1, 0.8]], evidence=['traffic_jam'], evidence_card=[2]) model.add_cpds(cpd_rain, cpd_accident, cpd_traffic_jam, cpd_getting_up_late, cpd_late_for_school, cpd_long_queues) # Calculating max marginals model_inference = VariableElimination(model) model_inference.max_marginal(variables=['late_for_school']) model_inference.max_marginal(variables=['late_for_school', 'traffic_jam']) # For any evidence in the network we can simply pass the evidence # argument which is a dict of the form of {variable: state} model_inference.max_marginal(variables=['late_for_school'], evidence={'traffic_jam': 1}) model_inference.max_marginal(variables=['late_for_school'], evidence={'traffic_jam': 1, 'getting_up_late': 0}) model_inference.max_marginal(variables=['late_for_school','long_queues'], evidence={'traffic_jam': 1, 'getting_up_late': 0} # Again as in the case of VariableEliminaion we can also pass the # elimination order of variables for MAP queries. If not specified # pgmpy automatically computes the best elimination order for the
from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination from pgmpy.factors import TabularCPD # Now first create the model. restaurant = BayesianModel([('location', 'cost'), ('quality', 'cost'), ('cost', 'no_of_people'), ('location', 'no_of_people')]) cpd_location = TabularCPD('location', 2, [[0.6, 0.4]]) cpd_quality = TabularCPD('quality', 3, [[0.3, 0.5, 0.2]]) cpd_cost = TabularCPD('cost', 2, [[0.8, 0.6, 0.1, 0.6, 0.6, 0.05], [0.2, 0.1, 0.9, 0.4, 0.4, 0.95]], ['location', 'quality'], [2, 3]) cpd_no_of_people = TabularCPD('no_of_people', 2, [[0.6, 0.8, 0.1, 0.6], [0.4, 0.2, 0.9, 0.4]], ['cost', 'location'], [2, 2]) restaurant.add_cpds(cpd_location, cpd_quality, cpd_cost, cpd_no_of_people) # Creating the inference object of the model restaurant_inference = VariableElimination(restaurant) # Doing simple queries over one or multiple variables. restaurant_inference.query(variables=['location']) restaurant_inference.query(variables=['location', 'no_of_people']) # We can also specify the order in which the variables are to be # eliminated. If not specified pgmpy automatically computes the # best possible elimination order. restaurant_inference.query(variables=['no_of_people'], elimination_order=['location', 'cost', 'quality'])