def map_query(bayes_net, query_vars, evidence_vars): bayes_net_infer = VariableElimination(bayes_net) if evidence_vars: q = bayes_net_infer.map_query(variables=query_vars, evidence=evidence_vars, show_progress=False) else: q = bayes_net_infer.map_query(variables=query_vars, evidence=None, show_progress=False) return q
def probnet_inference(model, h, b, d): H = 1 if h > 10 else 0 B = 1 if b > 20 else 0 D = 1 if d > 3 else 0 print(H, B, D) infer = VariableElimination(model) return infer.map_query(['S'], evidence={'H': H, 'B': B, 'D': D})
def predict(self, data): """ Predicts states of all the missing variables. Parameters ---------- data : pandas DataFrame object A DataFrame object with column names same as the variables in the model. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> train_data = values[:800] >>> predict_data = values[800:] >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> predict_data = predict_data.copy() >>> predict_data.drop('E', axis=1, inplace=True) >>> y_pred = model.predict(predict_data) >>> y_pred E 800 0 801 1 802 1 803 1 804 0 ... ... 993 0 994 0 995 1 996 1 997 0 998 0 999 0 """ from pgmpy.inference import VariableElimination if set(data.columns) == set(self.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.columns) - set(self.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(self.nodes()) - set(data.columns) pred_values = defaultdict(list) # Send state_names dict from one of the estimated CPDs to the inference class. model_inference = VariableElimination( self, state_names=self.get_cpds()[0].state_names) for index, data_point in data.iterrows(): states_dict = model_inference.map_query( variables=missing_variables, evidence=data_point.to_dict()) for k, v in states_dict.items(): pred_values[k].append(v) return pd.DataFrame(pred_values, index=data.index)
def predict(self, data): """ Predicts states of all the missing variables. Parameters ---------- data : pandas DataFrame object A DataFrame object with column names same as the variables in the model. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> train_data = values[:800] >>> predict_data = values[800:] >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> model.fit(values) >>> predict_data = predict_data.copy() >>> predict_data.drop('E', axis=1, inplace=True) >>> y_pred = model.predict(predict_data) >>> y_pred E 800 0 801 1 802 1 803 1 804 0 ... ... 993 0 994 0 995 1 996 1 997 0 998 0 999 0 """ from pgmpy.inference import VariableElimination if set(data.columns) == set(self.nodes()): raise ValueError("No variable missing in data. Nothing to predict") elif set(data.columns) - set(self.nodes()): raise ValueError("Data has variables which are not in the model") missing_variables = set(self.nodes()) - set(data.columns) pred_values = defaultdict(list) # Send state_names dict from one of the estimated CPDs to the inference class. model_inference = VariableElimination(self, state_names=self.get_cpds()[0].state_names) for index, data_point in data.iterrows(): states_dict = model_inference.map_query(variables=missing_variables, evidence=data_point.to_dict()) for k, v in states_dict.items(): pred_values[k].append(v) return pd.DataFrame(pred_values, index=data.index)
def map_query(self, targets, evidences, algorithm): if algorithm == "Variable Elimination": from pgmpy.inference import VariableElimination model_infer = VariableElimination(self.model_pgmpy) if algorithm == "Belief Propagation": from pgmpy.inference import BeliefPropagation model_infer = BeliefPropagation(self.model_pgmpy) if algorithm == "MPLP": from pgmpy.inference import Mplp model_infer = Mplp(self.model_pgmpy.to_markov_model()) return model_infer.map_query(variables=list(targets), evidence=evidences)
def Test_Data_Inference_map_n_steps(self, df_test, n_tsteps): # make a function that can predict N timesteps ahead. df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy() infer = VariableElimination(self.model) dict_unique_vals = dict(zip(df_test.columns, [df_test[i].unique() for i in df_test.columns])) result_list = ['M_t'] if n_tsteps > 1: result_list = result_list+["M_t+{}".format(x) for x in range(1,n_tsteps)] count = 0 for key, value in df_test.filter(items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items(): index_key = key if check_data_in_evidence(value, dict_unique_vals): tic = time.time() result = infer.query(variables=result_list,evidence=value) toc = time.time() - tic logging.info("thermostat {} - Elapsed seconds for query {:.2f}".format(self.thermostat.tstat_id, toc)) tic = time.time() map_result = infer.map_query(variables=result_list,evidence=value) toc = time.time() - tic logging.info("thermostat {} - Elapsed seconds for MAP query {:.2f}".format(self.thermostat.tstat_id, toc)) for n in result_list: df_inference_results.at[index_key,'{}_0'.format(n)] = result[n].values[0] df_inference_results.at[index_key,'{}_1'.format(n)] = result[n].values[1] df_inference_results.at[index_key,'{}'.format(n)] = Map_Occ_Values(result[n].values[1]) df_inference_results.at[index_key, '{}_map'.format(n)] = map_result[n] else: for n in result_list: df_inference_results.at[index_key,'{}_0'.format(n)] = np.nan df_inference_results.at[index_key,'{}_1'.format(n)] = np.nan df_inference_results.at[index_key,'{}'.format(n)] = np.nan df_inference_results.at[index_key, '{}_map'.format(n)] = np.nan count+=1 logging.info("thermostat {} - Iterations of test {}".format(self.thermostat.tstat_id, count)) return df_inference_results
class TestVariableEliminationMarkov(unittest.TestCase): def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L'), ('A', 'R'), ('J', 'G')]) factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]).to_factor() factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]).to_factor() factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]).to_factor() factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.markov_inference.query(['J']) query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.markov_inference.query(['Q', 'J']) query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.markov_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) query_result = self.markov_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.markov_inference.max_marginal( ['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.markov_inference.max_marginal( ['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.markov_inference.map_query() self.assertDictEqual(map_query, { 'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0 }) def test_map_query_with_evidence(self): map_query = self.markov_inference.map_query(['A', 'R', 'L'], { 'J': 0, 'Q': 1, 'G': 0 }) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.markov_inference.induced_graph( ['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.markov_inference.induced_width( ['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.markov_inference del self.markov_model
def main(): #Fetching features data features_data = pd.read_csv(fileloc_features) features_data_f = features_data.add_prefix('f') features_data_g = features_data.add_prefix('g') #Seen Training Data seen_traindata = pd.read_csv(fileloc_seen_training, usecols = ['left','right','label']) #seen_traindata_f = pd.read_csv(fileloc_seen_training, usecols = ['left','label']) #seen_traindata_g = pd.read_csv(fileloc_seen_training, usecols = ['right','label']) seen_traindata_merged_f = seen_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') seen_traindata_merged_g = seen_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') seen_traindata_merged_f = seen_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) seen_traindata_merged_g = seen_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) seen_features_traindata_final = pd.concat([seen_traindata_merged_f, seen_traindata_merged_g], axis = 1) seen_label_traindata_final = seen_traindata.loc[:, 'label'] seen_traindata_final = pd.concat([seen_features_traindata_final, seen_label_traindata_final], axis = 1) seen_traindata_final.replace([np.inf, -np.inf], np.nan) seen_traindata_final.dropna(inplace=True) seen_traindata_final = seen_traindata_final.astype(int) seen_traindata_final_NDArray = seen_traindata_final.values #Seen Validation Data seen_validationdata = pd.read_csv(fileloc_seen_validation, usecols = ['left','right','label']) #seen_validationdata_f = pd.read_csv(fileloc_seen_validation, usecols = ['left','label']) #seen_validationdata_g = pd.read_csv(fileloc_seen_validation, usecols = ['right','label']) seen_validationdata_merged_f = seen_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') seen_validationdata_merged_g = seen_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') seen_validationdata_merged_f = seen_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) seen_validationdata_merged_g = seen_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) seen_features_validationdata_final = pd.concat([seen_validationdata_merged_f, seen_validationdata_merged_g], axis = 1) seen_label_validationdata_final = seen_validationdata.loc[:, 'label'] seen_validationdata_final = pd.concat([seen_features_validationdata_final, seen_label_validationdata_final], axis = 1) seen_validationdata_final.replace([np.inf, -np.inf], np.nan) seen_validationdata_final.dropna(inplace=True) seen_validationdata_final = seen_validationdata_final.astype(int) seen_validationdata_final_NDArray = seen_validationdata_final.values #Shuffled Training Data shuffled_traindata = pd.read_csv(fileloc_shuffled_training, usecols = ['left','right','label']) #shuffled_traindata_f = pd.read_csv(fileloc_shuffled_training, usecols = ['left','label']) #shuffled_traindata_g = pd.read_csv(fileloc_shuffled_training, usecols = ['right','label']) shuffled_traindata_merged_f = shuffled_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') shuffled_traindata_merged_g = shuffled_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') shuffled_traindata_merged_f = shuffled_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) shuffled_traindata_merged_g = shuffled_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) shuffled_features_traindata_final = pd.concat([shuffled_traindata_merged_f, shuffled_traindata_merged_g], axis = 1) shuffled_label_traindata_final = shuffled_traindata.loc[:, 'label'] shuffled_traindata_final = pd.concat([shuffled_features_traindata_final, shuffled_label_traindata_final], axis = 1) shuffled_traindata_final.replace([np.inf, -np.inf], np.nan) shuffled_traindata_final.dropna(inplace=True) shuffled_traindata_final = shuffled_traindata_final.astype(int) shuffled_traindata_final_NDArray = shuffled_traindata_final.values #Shuffled Validation Data shuffled_validationdata = pd.read_csv(fileloc_shuffled_validation, usecols = ['left','right','label']) #shuffled_validationdata_f = pd.read_csv(fileloc_shuffled_validation, usecols = ['left','label']) #shuffled_validationdata_g = pd.read_csv(fileloc_shuffled_validation, usecols = ['right','label']) shuffled_validationdata_merged_f = shuffled_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') shuffled_validationdata_merged_g = shuffled_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') shuffled_validationdata_merged_f = shuffled_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) shuffled_validationdata_merged_g = shuffled_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) shuffled_features_validationdata_final = pd.concat([shuffled_validationdata_merged_f, shuffled_validationdata_merged_g], axis = 1) shuffled_label_validationdata_final = shuffled_validationdata.loc[:, 'label'] shuffled_validationdata_final = pd.concat([shuffled_features_validationdata_final, shuffled_label_validationdata_final], axis = 1) shuffled_validationdata_final.replace([np.inf, -np.inf], np.nan) shuffled_validationdata_final.dropna(inplace=True) shuffled_validationdata_final = shuffled_validationdata_final.astype(int) shuffled_validationdata_final_NDArray = shuffled_validationdata_final.values #Unseen Training Data unseen_traindata = pd.read_csv(fileloc_unseen_training, usecols = ['left','right','label']) #unseen_traindata_f = pd.read_csv(fileloc_unseen_training, usecols = ['left','label']) #unseen_traindata_g = pd.read_csv(fileloc_unseen_training, usecols = ['right','label']) unseen_traindata_merged_f = unseen_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') unseen_traindata_merged_g = unseen_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') unseen_traindata_merged_f = unseen_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) unseen_traindata_merged_g = unseen_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) unseen_features_traindata_final = pd.concat([unseen_traindata_merged_f, unseen_traindata_merged_g], axis = 1) unseen_label_traindata_final = unseen_traindata.loc[:, 'label'] unseen_traindata_final = pd.concat([unseen_features_traindata_final, unseen_label_traindata_final], axis = 1) unseen_traindata_final.replace([np.inf, -np.inf], np.nan) unseen_traindata_final.dropna(inplace=True) unseen_traindata_final = unseen_traindata_final.astype(int) unseen_traindata_final_NDArray = unseen_traindata_final.values #Unseen Validation Data unseen_validationdata = pd.read_csv(fileloc_unseen_validation, usecols = ['left','right','label']) #unseen_validationdata_f = pd.read_csv(fileloc_unseen_validation, usecols = ['left','label']) #unseen_validationdata_g = pd.read_csv(fileloc_unseen_validation, usecols = ['right','label']) unseen_validationdata_merged_f = unseen_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename') unseen_validationdata_merged_g = unseen_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename') unseen_validationdata_merged_f = unseen_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1) unseen_validationdata_merged_g = unseen_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1) unseen_features_validationdata_final = pd.concat([unseen_validationdata_merged_f, unseen_validationdata_merged_g], axis = 1) unseen_label_validationdata_final = unseen_validationdata.loc[:, 'label'] unseen_validationdata_final = pd.concat([unseen_features_validationdata_final, unseen_label_validationdata_final], axis = 1) unseen_validationdata_final.replace([np.inf, -np.inf], np.nan) unseen_validationdata_final.dropna(inplace=True) unseen_validationdata_final = unseen_validationdata_final.astype(int) unseen_validationdata_final_NDArray = unseen_validationdata_final.values #Creating base models featureNamesList = ["pen_pressure","letter_spacing","size","dimension","is_lowercase","is_continuous","slantness","tilt","entry_stroke_a", "staff_of_a","formation_n","staff_of_d","exit_stroke_d","word_formation","constancy"] features_only_data = features_data[featureNamesList] initial_hcs = HillClimbSearch(features_only_data) initial_model = initial_hcs.estimate() #print(initial_model.edges()) print("Hill Climb Done") basemodel = BayesianModel([('fpen_pressure', 'fis_lowercase'), ('fpen_pressure', 'fletter_spacing'), ('fsize', 'fslantness'), ('fsize', 'fpen_pressure'), ('fsize', 'fstaff_of_d'), ('fsize', 'fletter_spacing'), ('fsize', 'fexit_stroke_d'), ('fsize', 'fentry_stroke_a'), ('fdimension', 'fsize'), ('fdimension', 'fis_continuous'), ('fdimension', 'fslantness'), ('fdimension', 'fpen_pressure'), ('fis_lowercase', 'fstaff_of_a'), ('fis_lowercase', 'fexit_stroke_d'), ('fis_continuous', 'fexit_stroke_d'), ('fis_continuous', 'fletter_spacing'), ('fis_continuous', 'fentry_stroke_a'), ('fis_continuous', 'fstaff_of_a'), ('fis_continuous', 'fis_lowercase'), ('fslantness', 'fis_continuous'), ('fslantness', 'ftilt'), ('fentry_stroke_a', 'fpen_pressure'), ('fformation_n', 'fconstancy'), ('fformation_n', 'fword_formation'), ('fformation_n', 'fdimension'), ('fformation_n', 'fstaff_of_d'), ('fformation_n', 'fis_continuous'), ('fformation_n', 'fsize'), ('fformation_n', 'fstaff_of_a'), ('fstaff_of_d', 'fis_continuous'), ('fstaff_of_d', 'fexit_stroke_d'), ('fstaff_of_d', 'fis_lowercase'), ('fstaff_of_d', 'fslantness'), ('fstaff_of_d', 'fentry_stroke_a'), ('fword_formation', 'fdimension'), ('fword_formation', 'fstaff_of_a'), ('fword_formation', 'fsize'), ('fword_formation', 'fstaff_of_d'), ('fword_formation', 'fconstancy'), ('fconstancy', 'fstaff_of_a'), ('fconstancy', 'fletter_spacing'), ('fconstancy', 'fdimension'), ('gpen_pressure', 'gis_lowercase'), ('gpen_pressure', 'gletter_spacing'), ('gsize', 'gslantness'), ('gsize', 'gpen_pressure'), ('gsize', 'gstaff_of_d'), ('gsize', 'gletter_spacing'), ('gsize', 'gexit_stroke_d'), ('gsize', 'gentry_stroke_a'), ('gdimension', 'gsize'), ('gdimension', 'gis_continuous'), ('gdimension', 'gslantness'), ('gdimension', 'gpen_pressure'), ('gis_lowercase', 'gstaff_of_a'), ('gis_lowercase', 'gexit_stroke_d'), ('gis_continuous', 'gexit_stroke_d'), ('gis_continuous', 'gletter_spacing'), ('gis_continuous', 'gentry_stroke_a'), ('gis_continuous', 'gstaff_of_a'), ('gis_continuous', 'gis_lowercase'), ('gslantness', 'gis_continuous'), ('gslantness', 'gtilt'), ('gentry_stroke_a', 'gpen_pressure'), ('gformation_n', 'gconstancy'), ('gformation_n', 'gword_formation'), ('gformation_n', 'gdimension'), ('gformation_n', 'gstaff_of_d'), ('gformation_n', 'gis_continuous'), ('gformation_n', 'gsize'), ('gformation_n', 'gstaff_of_a'), ('gstaff_of_d', 'gis_continuous'), ('gstaff_of_d', 'gexit_stroke_d'), ('gstaff_of_d', 'gis_lowercase'), ('gstaff_of_d', 'gslantness'), ('gstaff_of_d', 'gentry_stroke_a'), ('gword_formation', 'gdimension'), ('gword_formation', 'gstaff_of_a'), ('gword_formation', 'gsize'), ('gword_formation', 'gstaff_of_d'), ('gword_formation', 'gconstancy'), ('gconstancy', 'gstaff_of_a'), ('gconstancy', 'gletter_spacing'), ('gconstancy', 'gdimension'), ('fis_continuous', 'label'), ('fword_formation','label'), ('gis_continuous', 'label'), ('gword_formation','label')]) model_seen = basemodel.copy() model_shuffled = basemodel.copy() model_unseen = basemodel.copy() accuracies = {} #Training Seen Model model_seen.fit(seen_traindata_final) estimator_seen = BayesianEstimator(model_seen, seen_traindata_final) cpds=[] for featureName in featureNamesList : cpd = estimator_seen.estimate_cpd('f'+featureName) cpds.append(cpd) cpd = estimator_seen.estimate_cpd('g'+featureName) cpds.append(cpd) cpd = estimator_seen.estimate_cpd('label') cpds.append(cpd) model_seen.add_cpds(*cpds) print("CPDs Calculated") #Testing Seen Model - Training model_seen_ve = VariableElimination(model_seen) model_seen_traindata_predictions = [] for i in range(seen_traindata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=(seen_traindata_final_NDArray[i,index]-1) evidenceDic['g'+featureName]=(seen_traindata_final_NDArray[i+15,index]-1) temp = model_seen_ve.map_query(variables=['label'],evidence=evidenceDic) model_seen_traindata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_seen_traindata_predictions)): if(int(model_seen_traindata_predictions[i]) == int(seen_traindata_final_NDArray[i,30])): correctCnt+=1 accuracies["seen_train"]=correctCnt/len(model_seen_traindata_predictions)*100 print("Bayesian Model Accuracy for Seen Training Data = "+str(accuracies["seen_train"])) #Testing Seen Model - Validation model_seen_ve = VariableElimination(model_seen) model_seen_validationdata_predictions = [] for i in range(seen_validationdata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=seen_validationdata_final_NDArray[i,index]-1 evidenceDic['g'+featureName]=seen_validationdata_final_NDArray[i+15,index]-1 temp = model_seen_ve.map_query(variables=['label'],evidence=evidenceDic) model_seen_validationdata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_seen_validationdata_predictions)): if(int(model_seen_validationdata_predictions[i]) == int(seen_validationdata_final_NDArray[i,30])): correctCnt+=1 accuracies["seen_validation"]=correctCnt/len(model_seen_validationdata_predictions)*100 print("Bayesian Model Accuracy for Seen Validation Data = "+str(accuracies["seen_validation"])) #Training Shuffled Model model_shuffled.fit(shuffled_traindata_final) estimator_shuffled = BayesianEstimator(model_shuffled, shuffled_traindata_final) cpds=[] for featureName in featureNamesList : cpd = estimator_shuffled.estimate_cpd('f'+featureName) cpds.append(cpd) cpd = estimator_shuffled.estimate_cpd('g'+featureName) cpds.append(cpd) cpd = estimator_shuffled.estimate_cpd('label') cpds.append(cpd) model_shuffled.add_cpds(*cpds) #Testing Shuffled Model - Training model_shuffled_ve = VariableElimination(model_shuffled) model_shuffled_traindata_predictions = [] for i in range(shuffled_traindata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=shuffled_traindata_final_NDArray[i,index]-1 evidenceDic['g'+featureName]=shuffled_traindata_final_NDArray[i+15,index]-1 temp = model_shuffled_ve.map_query(variables=['label'],evidence=evidenceDic) model_shuffled_traindata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_shuffled_traindata_predictions)): if(int(model_shuffled_traindata_predictions[i]) == int(shuffled_traindata_final_NDArray[i,30])): correctCnt+=1 accuracies["shuffled_train"]=correctCnt/len(model_shuffled_traindata_predictions)*100 print("Bayesian Model Accuracy for Shuffled Training Data = "+str(accuracies["shuffled_train"])) #Testing Shuffled Model - Validation model_shuffled_ve = VariableElimination(model_shuffled) model_shuffled_validationdata_predictions = [] for i in range(shuffled_validationdata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=shuffled_validationdata_final_NDArray[i,index]-1 evidenceDic['g'+featureName]=shuffled_validationdata_final_NDArray[i+15,index]-1 temp = model_shuffled_ve.map_query(variables=['label'],evidence=evidenceDic) model_shuffled_validationdata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_shuffled_validationdata_predictions)): if(int(model_shuffled_validationdata_predictions[i]) == int(shuffled_validationdata_final_NDArray[i,30])): correctCnt+=1 accuracies["shuffled_validation"]=correctCnt/len(model_shuffled_validationdata_predictions)*100 print("Bayesian Model Accuracy for Shuffled Validation Data = "+str(accuracies["shuffled_validation"])) #Training Unseen Model model_unseen.fit(unseen_traindata_final) estimator_unseen = BayesianEstimator(model_unseen, unseen_traindata_final) cpds=[] for featureName in featureNamesList : cpd = estimator_unseen.estimate_cpd('f'+featureName) cpds.append(cpd) cpd = estimator_unseen.estimate_cpd('g'+featureName) cpds.append(cpd) cpd = estimator_unseen.estimate_cpd('label') cpds.append(cpd) model_unseen.add_cpds(*cpds) #Testing Unseen Model - Training model_unseen_ve = VariableElimination(model_unseen) model_unseen_traindata_predictions = [] for i in range(unseen_traindata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=unseen_traindata_final_NDArray[i,index]-1 evidenceDic['g'+featureName]=unseen_traindata_final_NDArray[i+15,index]-1 temp = model_unseen_ve.map_query(variables=['label'],evidence=evidenceDic) model_unseen_traindata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_unseen_traindata_predictions)): if(int(model_unseen_traindata_predictions[i]) == int(unseen_traindata_final_NDArray[i,30])): correctCnt+=1 accuracies["unseen_train"]=correctCnt/len(model_unseen_traindata_predictions)*100 print("Bayesian Model Accuracy for Unseen Training Data = "+str(accuracies["unseen_train"])) #Testing Unseen Model - Validation model_unseen_ve = VariableElimination(model_unseen) model_unseen_validationdata_predictions = [] for i in range(unseen_validationdata_final_NDArray.shape[0]): evidenceDic = {} for index, featureName in enumerate(featureNamesList): evidenceDic['f'+featureName]=unseen_validationdata_final_NDArray[i,index]-1 evidenceDic['g'+featureName]=unseen_validationdata_final_NDArray[i+15,index]-1 temp = model_unseen_ve.map_query(variables=['label'],evidence=evidenceDic) model_unseen_validationdata_predictions.append(temp['label']) correctCnt = 0 for i in range(len(model_unseen_validationdata_predictions)): if(int(model_unseen_validationdata_predictions[i]) == int(unseen_validationdata_final_NDArray[i,30])): correctCnt+=1 accuracies["unseen_validation"]=correctCnt/len(model_unseen_validationdata_predictions)*100 print("Bayesian Model Accuracy for Unseen Validation Data = "+str(accuracies["unseen_validation"]))
class StateNameDecorator(unittest.TestCase): def setUp(self): self.sn2 = { 'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good'] } self.sn1 = { 'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night'] } self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = VariableElimination(student) self.model2 = VariableElimination(student, state_names=self.sn2) def test_assignment_statename(self): req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')], [('speed', 'low'), ('switch', 'off'), ('time', 'day')]] req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)], [('speed', 0), ('switch', 1), ('time', 0)]] self.assertEqual(self.phi1.assignment([1, 2]), req_op2) self.assertEqual(self.phi2.assignment([1, 2]), req_op1) def test_factor_reduce_statename(self): phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 'medium'), ('time', 'day')]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 1), ('time', 0)]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) def test_reduce_cpd_statename(self): cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 'high')]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 0)]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 'high')], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD( 'grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 0)], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) def test_inference_query_statename(self): inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.query(['grade'], evidence={'intel': 0}) req_op = { 'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8])) } self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op) inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0}) req_op = {'grade': 'F'} self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op)
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(['J']) query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.bayesian_inference.query(['Q', 'J']) query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) query_result = self.bayesian_inference.query(variables=['J'], evidence={ 'A': 0, 'R': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={ 'A': 0, 'R': 0, 'G': 0, 'L': 1 }) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G' ]), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, { 'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0 }) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], { 'J': 0, 'Q': 1, 'G': 0 }) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph( ['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.bayesian_inference.induced_width( ['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
class TestVariableEliminationMarkov(unittest.TestCase): def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([ ("A", "J"), ("R", "J"), ("J", "Q"), ("J", "L"), ("G", "L"), ("A", "R"), ("J", "G"), ]) factor_a = TabularCPD("A", 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD("R", 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD( "J", 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=["A", "R"], evidence_card=[2, 2], ).to_factor() factor_q = TabularCPD("Q", 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=["J"], evidence_card=[2]).to_factor() factor_l = TabularCPD( "L", 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=["J", "G"], evidence_card=[2, 2], ).to_factor() factor_g = TabularCPD("G", 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.markov_inference.query(["J"]) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=np.array([0.416, 0.584])), ) def test_query_multiple_variable(self): query_result = self.markov_inference.query(["Q", "J"]) self.assertEqual( query_result, DiscreteFactor( variables=["Q", "J"], cardinality=[2, 2], values=np.array([[0.3744, 0.1168], [0.0416, 0.4672]]), ), ) def test_query_single_variable_with_evidence(self): query_result = self.markov_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6, 0.4]), ) def test_query_multiple_variable_with_evidence(self): query_result = self.markov_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) self.assertEqual( query_result, DiscreteFactor( variables=["Q", "J"], cardinality=[2, 2], values=np.array([[0.081, 0.004], [0.009, 0.016]]), ), ) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.markov_inference.query(["J"]) query_result = self.markov_inference.query(["J"]) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=np.array([0.416, 0.584])), ) query_result = self.markov_inference.query(["Q", "J"]) query_result = self.markov_inference.query(["Q", "J"]) self.assertEqual( query_result, DiscreteFactor( variables=["Q", "J"], cardinality=[2, 2], values=np.array([[0.3744, 0.1168], [0.0416, 0.4672]]), ), ) query_result = self.markov_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) query_result = self.markov_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6, 0.4]), ) query_result = self.markov_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) query_result = self.markov_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) self.assertEqual( query_result, DiscreteFactor( variables=["Q", "J"], cardinality=[2, 2], values=np.array([[0.081, 0.004], [0.009, 0.016]]), ), ) def test_max_marginal(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(["G"]), 0.1659, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.markov_inference.max_marginal( ["G", "R"]), 0.1659, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.markov_inference.max_marginal( ["G", "R", "A"]), 0.1659, decimal=4) def test_map_query(self): map_query = self.markov_inference.map_query() self.assertDictEqual(map_query, { "A": 1, "R": 1, "J": 1, "Q": 1, "G": 0, "L": 0 }) def test_map_query_with_evidence(self): map_query = self.markov_inference.map_query(["A", "R", "L"], { "J": 0, "Q": 1, "G": 0 }) self.assertDictEqual(map_query, {"A": 1, "R": 0, "L": 0}) def test_induced_graph(self): induced_graph = self.markov_inference.induced_graph( ["G", "Q", "A", "J", "L", "R"]) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual( [ ["A", "J"], ["A", "R"], ["G", "J"], ["G", "L"], ["J", "L"], ["J", "Q"], ["J", "R"], ["L", "R"], ], result_edges, ) def test_induced_width(self): result_width = self.markov_inference.induced_width( ["G", "Q", "A", "J", "L", "R"]) self.assertEqual(2, result_width) def tearDown(self): del self.markov_inference del self.markov_model
#输出依赖关系 print(model.edges()) #查看某节点概率分布 print(model.get_cpds('Pclass').values) from pgmpy.inference import VariableElimination model_infer = VariableElimination(model) q = model_infer.query(variables=['Survived'], evidence={'Fare': 0}) print(q) ''' +------------+-----------------+ | Survived | phi(Survived) | +============+=================+ | Survived_0 | 0.6341 | +------------+-----------------+ | Survived_1 | 0.3659 | +------------+-----------------+ ''' q = model_infer.map_query(variables=['Fare', 'Age', 'Sex', 'Pclass', 'Cabin'], evidence={'Survived': 1}) print(q) #{'Sex': 0, 'Fare': 0, 'Age': 1, 'Pclass': 2, 'Cabin': 0} predict_data = test.drop(columns=['Survived'], axis=1) y_pred = model.predict(predict_data) y_survived = y_pred['Survived'].values test_survived = test['Survived'].values print('y_pred:', y_survived) print("test:", test_survived) print((y_survived == test_survived).sum() / len(test)) #测试集精度0.8131868131868132
cpd_s = TabularCPD(variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) # 将有向无环图与条件概率分布表关联 model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) # 验证模型:检查网络结构和CPD,并验证CPD是否正确定义和总和为1 model.check_model() # 获取概率图模型 model.get_cpds() # 获取节点G的概率表 #print(model.get_cpds('G')) # 获取节点G的基数 model.get_cardinality('G') # 获取整个贝叶斯网络的局部依赖 model.local_independencies(['D', 'I', 'S', 'G', 'L']) from pgmpy.inference import VariableElimination infer = VariableElimination(model) # 边缘化其他变量,求某一变量的概率 print(infer.query(['G'])['G']) # 计算条件概率分布 print(infer.query(['G'], evidence={'D': 1, 'I': 1})['G']) print(111, infer.query(['G'], evidence={'I': 1, 'L': 1, 'D': 1})['G']) # 对于给定条件的变量状态进行预测 print(infer.map_query('G')) print(infer.map_query('G', evidence={'D': 0, 'I': 1})) print(infer.map_query('G', evidence={'D': 0, 'I': 1, 'L': 1, 'S': 1}))
class StateNameDecorator(unittest.TestCase): def setUp(self): self.sn2 = { "grade": ["A", "B", "F"], "diff": ["high", "low"], "intel": ["poor", "good", "very good"], } self.sn1 = { "speed": ["low", "medium", "high"], "switch": ["on", "off"], "time": ["day", "night"], } self.phi1 = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) self.cpd2 = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], state_names=self.sn2, ) student = BayesianModel([("diff", "grade"), ("intel", "grade")]) student_state_names = BayesianModel([("diff", "grade"), ("intel", "grade")]) diff_cpd = TabularCPD("diff", 2, [[0.2, 0.8]]) intel_cpd = TabularCPD("intel", 2, [[0.3, 0.7]]) grade_cpd = TabularCPD( "grade", 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=["diff", "intel"], evidence_card=[2, 2], ) diff_cpd_state_names = TabularCPD( variable="diff", variable_card=2, values=[[0.2, 0.8]], state_names={"diff": ["high", "low"]}, ) intel_cpd_state_names = TabularCPD( variable="intel", variable_card=2, values=[[0.3, 0.7]], state_names={"intel": ["poor", "good", "very good"]}, ) grade_cpd_state_names = TabularCPD( "grade", 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=["diff", "intel"], evidence_card=[2, 2], state_names=self.sn2, ) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) student_state_names.add_cpds(diff_cpd_state_names, intel_cpd_state_names, grade_cpd_state_names) self.model_no_state_names = VariableElimination(student) self.model_with_state_names = VariableElimination(student_state_names) def test_assignment_statename(self): req_op1 = [ [("speed", "low"), ("switch", "on"), ("time", "night")], [("speed", "low"), ("switch", "off"), ("time", "day")], ] req_op2 = [ [("speed", 0), ("switch", 0), ("time", 1)], [("speed", 0), ("switch", 1), ("time", 0)], ] self.assertEqual(self.phi1.assignment([1, 2]), req_op2) self.assertEqual(self.phi2.assignment([1, 2]), req_op1) def test_factor_reduce_statename(self): phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([("speed", "medium"), ("time", "day")]) self.assertEqual(phi.variables, ["switch"]) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([("speed", "medium"), ("time", "day")], inplace=False) self.assertEqual(phi.variables, ["switch"]) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12)) phi.reduce([("speed", 1), ("time", 0)]) self.assertEqual(phi.variables, ["switch"]) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2], np.ones(12)) phi = phi.reduce([("speed", 1), ("time", 0)], inplace=False) self.assertEqual(phi.variables, ["switch"]) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) def test_reduce_cpd_statename(self): cpd = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], state_names=self.sn2, ) cpd.reduce([("diff", "high")]) self.assertEqual(cpd.variable, "grade") self.assertEqual(cpd.variables, ["grade", "intel"]) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]), ) cpd = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) cpd.reduce([("diff", 0)]) self.assertEqual(cpd.variable, "grade") self.assertEqual(cpd.variables, ["grade", "intel"]) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]), ) cpd = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], state_names=self.sn2, ) cpd = cpd.reduce([("diff", "high")], inplace=False) self.assertEqual(cpd.variable, "grade") self.assertEqual(cpd.variables, ["grade", "intel"]) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]), ) cpd = TabularCPD( "grade", 3, [ [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ], evidence=["diff", "intel"], evidence_card=[2, 3], ) cpd = cpd.reduce([("diff", 0)], inplace=False) self.assertEqual(cpd.variable, "grade") self.assertEqual(cpd.variables, ["grade", "intel"]) np_test.assert_array_equal( cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]), ) def test_inference_query_statename(self): inf_op1 = self.model_with_state_names.query(["grade"], evidence={"intel": "poor"}) inf_op2 = self.model_no_state_names.query(["grade"], evidence={"intel": 0}) req_op = DiscreteFactor(["grade"], [3], np.array([0.1, 0.1, 0.8])) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op) inf_op1 = self.model_with_state_names.map_query( ["grade"], evidence={"intel": "poor"}) inf_op2 = self.model_no_state_names.map_query(["grade"], evidence={"intel": 0}) req_op1 = {"grade": "F"} req_op2 = {"grade": 2} self.assertEqual(inf_op1, req_op1) self.assertEqual(inf_op2, req_op2)
class TestVariableEliminationMarkov(unittest.TestCase): def setUp(self): # It is just a moralised version of the above Bayesian network so all the results are same. Only factors # are under consideration for inference so this should be fine. self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L'), ('A', 'R'), ('J', 'G')]) factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor() factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor() factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=['A', 'R'], evidence_card=[2, 2]).to_factor() factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['J'], evidence_card=[2]).to_factor() factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=['J', 'G'], evidence_card=[2, 2]).to_factor() factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor() self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g) self.markov_inference = VariableElimination(self.markov_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.markov_inference.query(['J']) query_result = self.markov_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.markov_inference.query(['Q', 'J']) query_result = self.markov_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) query_result = self.markov_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) query_result = self.markov_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.markov_inference.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): map_query = self.markov_inference.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.markov_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.markov_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.markov_inference del self.markov_model
# Associating the CPDs with the network model2.add_cpds(cpd_r, cpd_s, cpd_g2) # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly # defined and sum to 1. model2.check_model() infer2 = VariableElimination(model2) # Exercise 2 d) # Answer to "Was it due to an overnight rain? Or that last night she forgot to turn off her sprinkler?" print("E2 D:") print(infer2.query(variables=['R'], evidence={'G': 'Wet'})) print(infer2.query(variables=['S'], evidence={'G': 'Wet'})) print(infer2.map_query(variables=['R','S'], evidence={'G': 'Wet'})) # With additional information about John's Grass # Defining the model structure. model3 = BayesianModel([('R', 'G'), ('S', 'G'), ('R', 'J')]) # Defining individual CPDs. # Only one new CPD needed. cpd_j = TabularCPD(variable='J', variable_card=2, values=[[0.85, 0], [0.15, 1]], evidence=['R'], evidence_card=[2], state_names={'J': ['JDry', 'JWet'], 'R': ['NotRain', 'Rain']})
mm = model2.to_markov_model() print("Nodes of model2 as markov:") print(mm.nodes()) print("Edges of model2 as markov:") print(mm.edges()) print("----------------------------------------------") print("Inference for th dataset") from pgmpy.inference import VariableElimination infer1 = VariableElimination(mm) print("Inference of x4:") print(infer1.query(['x4'])['x4']) print("Inference of x5|x2:") print(infer1.query(['x5'], evidence={'x2': 1})['x5']) print("---------------------------------------------------------") print("AND DATASET") data = pd.read_csv("AND-Features.csv") hc = HillClimbSearch(data, scoring_method=K2Score(data)) best_model = hc.estimate() print("Edges of bayesian model") print(best_model.edges()) '''Inference for and dataset''' mm1 = best_model.to_markov_model() print("Edges of markov model") print(mm1.edges()) print("Checking converted model:", mm1.check_model()) print("----------------------------------------------") print("Inference for and dataset") from pgmpy.inference import VariableElimination infer2 = VariableElimination(mm1) print(infer2.map_query(['f1', 'f9']))
# There can be cases in which we want to compute the conditional distribution let's say $ P(G | D=0, I=1) $. In such cases we need to modify our equations a bit: # # $ P(G | D=0, I=1) = \sum_L \sum_S P(L|G) * P(S| I=1) * P(G| D=0, I=1) * P(D=0) * P(I=1) $ # $ P(G | D=0, I=1) = P(D=0) * P(I=1) * P(G | D=0, I=1) * \sum_L P(L | G) * \sum_S P(S | I=1) $ # # In pgmpy we will just need to pass an extra argument in the case of conditional distributions: # In[41]: print(infer.query(['G'], evidence={'D': 0, 'I': 1})['G']) # #### Predicting values from new data points # Predicting values from new data points is quite similar to computing the conditional probabilities. We need to query for the variable that we need to predict given all the other features. The only difference is that rather than getting the probabilitiy distribution we are interested in getting the most probable state of the variable. # # In pgmpy this is known as MAP query. Here's an example: # In[42]: infer.map_query('G') # In[46]: infer.map_query('G', evidence={'D': 0, 'I': 1}) # In[47]: infer.map_query('G', evidence={'D': 0, 'I': 1, 'L': 1, 'S': 1}) # ### 5. Other methods for Inference # Even though exact inference algorithms like Variable Elimination optimize the inference task, it is still computationally quite expensive in the case of large models. For such cases we can use approximate algorithms like Message Passing Algorithms, Sampling Algorithms etc. We will talk about a few other exact and approximate algorithms in later parts of the tutorial.
from pgmpy.inference import VariableElimination # Create a Bayesian model model = BayesianModel(....) cpd_var1 = TabularCPD(....) cpd_var2 = TabularCPD(....) cpd_var3 = TabularCPD(....) cpd_var4 = TabularCPD(....) cpd_var5 = TabularCPD(....) model.add_cpds(..........) # Calculating the max marginals model_inference = VariableElimination(model) model_inference.map_query(variables=['late_for_school']) " {'late_for_school': 0} " model_inference.map_query(variables=['late_for_school', 'accident']) " {'accidnet': 1, late_for_school': 0} " model_inference.map_query(variables=['late_for_school'], evidence={'accident': 1}) " {'late_for_school': 0} " model_inference.map_query(variables=['no_of_people'], evidence={'location':1}, elimination_order=['quality', 'cost', 'location']) # can sepcify elimination sequence / otherwise system will choose automatically -5- " Using Model for Prediction - Example "
bn.summarise_variable( posterior, code_to_value_map ) # with pgmpy reply = G_infer.query( variables=[var_target], evidence=evidences ) # returns DiscreteFactor print( reply ) # test joint reply = G_infer.query( variables=["mut17q21"], evidence=evidences ) print( reply ) reply = G_infer.query( variables=["loss 17"], evidence=evidences ) print( reply ) reply = G_infer.query( variables=["mut17q21", "loss 17"], evidence=evidences ) print( reply ) # MAP query reply = G_infer.map_query( variables=[var_target], evidence=evidences ) #returns dict print( reply ) # MPE query # TODO reply = G_infer.map_query( variables=[var_target], evidence=evidences ) #returns dict print( reply ) # test independencies var_source = hp.var_questions_wrapper( "list", "Which variable do you want to check for independencies?", df_values ) # remove already added variables from choices df_values_dropped = df_values.drop( var_source, 1, inplace=False ) var_evidence = hp.var_questions_wrapper( "checkbox", "Which variables do you want to add as evidence?",
evidence_card=[2]) cpd_l = TabularCPD(variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) # 将有向无环图与条件概率分布表关联 model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) # 验证模型:检查网络结构和CPD,并验证CPD是否正确定义和总和为1 model.check_model() #获取上述代码构建的概率图模型: print(model.get_cpds()) #获取结点G的概率表: print(model.get_cpds('G')) #获取整个贝叶斯网络的局部依赖 print(model.local_independencies(['D', 'I', 'S', 'G', 'L'])) from pgmpy.inference import VariableElimination infer = VariableElimination(model) print(infer.query(['G'])['G']) #计算P(G|D=0,I=1)的条件分布概率 print(infer.query(['G'], evidence={'D': 0, 'I': 1})['G']) print(infer.map_query('G'))
print(seen_dat) sd = seen_dat.iloc[1:, 2] verify_model.fit(seen_dat, sd) inference = VariableElimination(verify_model) inference.induced_graph([ 'pen_pressure_f', 'letter_spacing_f', 'size_f', 'dimension_f', 'is_lowercase_f', 'is_continuous_f', 'slantness_f', 'tilt_f', 'entry_stroke_a_f', 'staff_of_a_f', 'formation_n_f', ' staff_of_d_f', 'exit_stroke_d_f ', 'word_formation', 'constancy' ]) phi_query = inference.map_query(variables=['pen_pressure_g'], evidence={ 'pen_pressure_f': 1, 'letter_spacing_g': 1, 'size_f': 2, 'is_lowercase_f': 2, 'slantness_f': 0, 'tilt_g': 1, 'entry_stroke_a_f': 1, 'staff_of_a_f': 0, 'formation_n_f': 1 }) print(phi_query) val_dataset = pd.read_csv('dataset_seen_validation_siamese.csv') val1 = val_dataset['label'] r = 0 for i in val1: if i == phi_query: r = r + 1 accuracy = (r / 905) * 100 print("Seen dataset accuracy:") print(accuracy)
#读取测试集属性数据,并存储class预测结果 a = [] model_infer = VariableElimination(model) with open(r'C:\Users\haomiaowu\Desktop\BN-Cheminformatics\test.csv', 'r', encoding="utf-8-sig") as f: reader = csv.reader(f) fieldnames = next(reader) # 获取数据的第一列,作为后续要转为字典的键名 生成器,next方法获取 csv_reader = csv.DictReader( f, fieldnames=fieldnames ) # self._fieldnames = fieldnames # list of keys for the dict 以list的形式存放键名 for row in csv_reader: d = {} for k, v in row.items(): d[k] = int(v) prob_class = model_infer.map_query(variables=["Class"], evidence=d) a.append(list(prob_class.values())[0]) print('预测结果:') print(a) print( '--------------------------------------------------------------------------------------------------' ) print('已预测化合物个数:', len(a)) #导入测试集正确分类结果 d = pd.read_csv( r'C:\Users\haomiaowu\Desktop\BN-Cheminformatics\test-class.csv') b = d['Class'].values print('测试集包含化合物个数:', len(b))
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"), ("J", "Q"), ("J", "L"), ("G", "L")]) cpd_a = TabularCPD("A", 2, values=[[0.2], [0.8]]) cpd_r = TabularCPD("R", 2, values=[[0.4], [0.6]]) cpd_j = TabularCPD( "J", 2, values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], evidence=["A", "R"], evidence_card=[2, 2], ) cpd_q = TabularCPD("Q", 2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=["J"], evidence_card=[2]) cpd_l = TabularCPD( "L", 2, values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], evidence=["J", "G"], evidence_card=[2, 2], ) cpd_g = TabularCPD("G", 2, values=[[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(["J"]) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=[0.416, 0.584]), ) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(["Q", "J"]) self.assertEqual( query_result, DiscreteFactor( variables=["J", "Q"], cardinality=[2, 2], values=np.array([[0.3744, 0.0416], [0.1168, 0.4672]]), ), ) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6, 0.4]), ) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) self.assertEqual( query_result, DiscreteFactor( variables=["J", "Q"], cardinality=[2, 2], values=np.array([[0.73636364, 0.08181818], [0.03636364, 0.14545455]]), ), ) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(["J"]) query_result = self.bayesian_inference.query(["J"]) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=np.array([0.416, 0.584])), ) query_result = self.bayesian_inference.query(["Q", "J"]) query_result = self.bayesian_inference.query(["Q", "J"]) self.assertEqual( query_result, DiscreteFactor( variables=["J", "Q"], cardinality=[2, 2], values=np.array([[0.3744, 0.0416], [0.1168, 0.4672]]), ), ) query_result = self.bayesian_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) query_result = self.bayesian_inference.query(variables=["J"], evidence={ "A": 0, "R": 1 }) self.assertEqual( query_result, DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6, 0.4]), ) query_result = self.bayesian_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) query_result = self.bayesian_inference.query(variables=["J", "Q"], evidence={ "A": 0, "R": 0, "G": 0, "L": 1 }) self.assertEqual( query_result, DiscreteFactor( variables=["J", "Q"], cardinality=[2, 2], values=np.array([[0.73636364, 0.08181818], [0.03636364, 0.14545455]]), ), ) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(["G" ]), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ["G", "R"]), 0.3740, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal( ["G", "R", "A"]), 0.3061, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, { "A": 1, "R": 1, "J": 1, "Q": 1, "G": 0, "L": 0 }) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(["A", "R", "L"], { "J": 0, "Q": 1, "G": 0 }) self.assertDictEqual(map_query, {"A": 1, "R": 0, "L": 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph( ["G", "Q", "A", "J", "L", "R"]) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual( [ ["A", "J"], ["A", "R"], ["G", "J"], ["G", "L"], ["J", "L"], ["J", "Q"], ["J", "R"], ["L", "R"], ], result_edges, ) def test_induced_width(self): result_width = self.bayesian_inference.induced_width( ["G", "Q", "A", "J", "L", "R"]) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
class BaseModel(object): """ Un objeto de este tipo contiene al modelo gráfico probabilista, incluye su grafo y sus parámetros (CPD) además de un objeto para hacer inferencia. Args: config_file_path (str) : la ruta al json con la información de DAG y sus tablas de probabilidad condicional. data (dict) : si no se cuenta con un archivo de configuración se puede utilizar un diccionario con los elementos para inicializar el objeto. to-do : por ahora sólo funciona con valores binarias. """ def __init__(self, config_file_path=None, data=None): self.config_file_path = config_file_path self.digraph = None self.pgmodel = None self.infer_system = None self.ebunch = None self.nodes = None self.variables_dict = dict() if config_file_path: with open(config_file_path) as json_file: data = json.load(json_file) if data.get('digraph'): self.ebunch = data['digraph'] self.pgmodel = BayesianModel(self.ebunch) self.nodes = data.get('nodes', []) if self.nodes: self.pgmodel.add_nodes_from(self.nodes) self.init_graph(ebunch=self.ebunch, nodes=self.nodes) if data.get('cpdtables'): self.init_model(self.ebunch, data['cpdtables']) for table in self.pgmodel.get_cpds(): logging.info(table) self.target = data['target'] self.nature_variables = data['nature_variables'] self.intervention_variables = data['interventions'] def init_graph(self, ebunch, nodes=[], plot=True, graph_id='figures/dag'): """ Creo el DAG con DiGraph de la biblioteca networkx usando una lista de aristas. Args: ebunch (list) : una lista de que contiene a las aristas del grafo. plot (boolean) : una bandera para saber si guardo una imagen del grafo usando matplotlib. graph_id (str): el nombre para identificar el grafo. """ self.digraph = nx.DiGraph(ebunch) for node in nodes: self.digraph.add_node(node) if plot: self.save_digraph_as_img(graph_id) def reset(self, pgmodel, ebunch, nodes=[]): """ Método para cambiar el modelo y el grafo. Además, se actualiza el sistema de inferencia de acuerdo con el nuevo modelo. Este método se utiliza para hacer un modelo dinámico donde lo único que se mantienen son las variables. """ self.init_graph(ebunch, nodes=nodes, plot=False) for variable in pgmodel.nodes(): self.variables_dict[variable] = [0, 1] self.ebunch = ebunch self.nodes = nodes self.pgmodel = pgmodel self.update_infer_system() def show_graph(self): """ Usa matplolib para mostrar el grafo causal del modelo. """ pos = nx.circular_layout(self.digraph) nx.draw(self.digraph, with_labels=True, pos=pos) plt.show() plt.clf() def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'): """ Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe la listas de aristas y las tablas CPD. Args: ebunch (list) : una lista de que contiene a las aristas del grafo. cpdtables (list) : un arreglo de diccionarios donde cada diccionario contiene la información necesaria para crear una tabla de probabilidad. plot (boolean) : una bandera para saber si guardo una imagen del grafo usando matplotlib. graph_id (str): el nombre para identificar el grafo. """ for cpdtable in cpdtables: self.variables_dict[cpdtable['variable']] = [\ _ for _ in range(cpdtable['variable_card'])] table = TabularCPD(variable=cpdtable['variable'],\ variable_card=cpdtable['variable_card'],\ values=cpdtable['values'],\ evidence_card=cpdtable.get('evidence_card'),\ evidence=cpdtable.get('evidence')) if cpdtable.get('evidence'): table.reorder_parents(sorted(cpdtable.get('evidence'))) self.pgmodel.add_cpds(table) if not self.pgmodel.check_model(): raise ValueError("Error with CPDTs") self.update_infer_system() if plot: self.save_pgm_as_img(pgm_id) def update_infer_system(self): """ Actualiza el sistema de inferencia para que sea compatible con el pgm. Usa VariableElimination. """ self.infer_system = VariableElimination(self.pgmodel) def get_variable_values(self, variable): """ Obtiene una lista de los valores que puede tomar una variable. """ return self.variables_dict.get(variable) def get_target_variable(self): """ Regresa una lista con las variables objetivo. """ return self.target def get_intervention_variables(self): """ Regresa una lista con las variables intervenibles. """ return self.intervention_variables def get_nature_variables(self): """ Regresa una lista con las variables que la naturaleza mueve. """ return self.nature_variables def get_ebunch(self): """ Regresa lista de aristas del modelo. """ return self.ebunch def get_nodes(self): """ Regresa lista de nodos aislados del modelo. """ return self.nodes def get_nature_var_prob(self, nature_variable): """ Regresa una lista con las probabilidades de los valores de una variable de la naturaleza dada como argumento. Args: nature_variable (str) : nombre de la variable. """ if nature_variable in self.nature_variables: return np.squeeze( self.pgmodel.get_cpds(nature_variable).get_values()) def conditional_probability(self, variable, evidence): """ Calcula la probabilidad de todos los valores de una variable dada la evidencia usando el método de eliminación de variable. """ return self.infer_system.query([variable], \ evidence=evidence, show_progress=False) def make_inference(self, variable, evidence): """ Ejecuta el motor de inferencia para obtener el valor de una variable dada la evidencia en un diccionario. Args: variable (str) : nombre de la variable a inferir. evidence (dict) : un diccionario con la evidencia de otras variables de la forma {variable : value}. """ return self.infer_system.map_query([variable],\ evidence=evidence, show_progress=False)[variable] def save_digraph_as_img(self, filename): """ Método auxiliar para guardar el DAG de networkx como imagen. """ pos = nx.circular_layout(self.digraph) nx.draw(self.digraph, with_labels=True, pos=pos) plt.savefig(filename) plt.show() plt.clf() def save_pgm_as_img(self, filename): """ Método auxiliar para guardar el DAG del pgmpy como imagen. """ nx.draw(self.digraph, with_labels=True) plt.show() plt.savefig(filename) plt.clf() def get_graph_toposort(self): """ Método que regresa una lista con las variables en orden topológico del DAG. """ return list(nx.topological_sort(self.digraph)) def get_nodes_and_predecessors(self): """ Regresa un arreglo de duplas nodo, predecesores ordenados. """ return { node : sorted(self.digraph.predecessors(node)) \ for node in self.digraph.nodes } def get_number_of_values(self, variable): """ to-do : un método para que me regrese cuantos valores posibles tiene una variable y tal vez hasta los valores correspondientes """ return len(self.variables_dict.get(variable, [])) def get_joint_prob_observation(self, observation): """ Obtiene la probabilidad de una observación. """ prob = self.infer_system.query(variables=list(observation.keys()), joint=True, show_progress=False) variables = prob.variables values = prob.values for i in range(len(variables)): value = observation[variables[i]] values = values[value] return values
''' Inference and Validation ''' from pgmpy.inference import VariableElimination import csv f = open("validation_data.csv") reader = csv.reader(f) inference = VariableElimination(model) valid = 0 invalid = 0 for row in reader: br = row[3] ig = row[1] #map_quey returns Dictionary!!! if int(row[0]) == inference.map_query(["SC"], evidence={"BR": int(br)})["SC"]: valid += 1 else: invalid += 1 total = valid + invalid print(valid) print(invalid) accuracy = float(valid / total) print("Accuracy: ", accuracy) f.close() os.system('spd-say -i -10 -p 50 -t female3 "Training Finished!"')
class TestVariableElimination(unittest.TestCase): def setUp(self): self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'), ('G', 'L')]) cpd_a = TabularCPD('A', 2, [[0.2], [0.8]]) cpd_r = TabularCPD('R', 2, [[0.4], [0.6]]) cpd_j = TabularCPD('J', 2, [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]], ['R', 'A'], [2, 2]) cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2]) cpd_l = TabularCPD('L', 2, [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]], ['G', 'J'], [2, 2]) cpd_g = TabularCPD('G', 2, [[0.6], [0.4]]) self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r) self.bayesian_inference = VariableElimination(self.bayesian_model) # All the values that are used for comparision in the all the tests are # found using SAMIAM (assuming that it is correct ;)) def test_query_single_variable(self): query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) def test_query_multiple_variable(self): query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) def test_query_single_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) def test_query_multiple_variable_with_evidence(self): query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_query_multiple_times(self): # This just tests that the models are not getting modified while querying them query_result = self.bayesian_inference.query(['J']) query_result = self.bayesian_inference.query(['J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) query_result = self.bayesian_inference.query(['Q', 'J']) query_result = self.bayesian_inference.query(['Q', 'J']) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.416, 0.584])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.4912, 0.5088])) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) query_result = self.bayesian_inference.query(variables=['J'], evidence={'A': 0, 'R': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.60, 0.40])) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) query_result = self.bayesian_inference.query(variables=['J', 'Q'], evidence={'A': 0, 'R': 0, 'G': 0, 'L': 1}) np_test.assert_array_almost_equal(query_result['J'].values, np.array([0.818182, 0.181818])) np_test.assert_array_almost_equal(query_result['Q'].values, np.array([0.772727, 0.227273])) def test_max_marginal(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4) def test_max_marginal_var(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4) def test_max_marginal_var1(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']), 0.4055, decimal=4) def test_max_marginal_var2(self): np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']), 0.3260, decimal=4) def test_map_query(self): map_query = self.bayesian_inference.map_query() self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0}) def test_map_query_with_evidence(self): map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], {'J': 0, 'Q': 1, 'G': 0}) self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0}) def test_induced_graph(self): induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R']) result_edges = sorted([sorted(x) for x in induced_graph.edges()]) self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'], ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']], result_edges) def test_induced_width(self): result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R']) self.assertEqual(2, result_width) def tearDown(self): del self.bayesian_inference del self.bayesian_model
t2 = timeit.timeit(lambda: inference_query(sachs_stretch, 'Erk', { 'P38': [0, 0, 1], 'Jnk': [0, 1, 0] }), number=N) print("\nTimes for: variable elimination, transformations, fraction, for", N, "runs") print(t1) print(t2) print("How much faster is transformations inference:", t1 / t2) print("\n* MAP query") vars = pick_from_list(model.nodes, 3) print(inference.map_query(variables=vars)) print("") sachs_stretch = stretch(model) print(inference_map_query(sachs_stretch, variables=vars)) #sachs_stretch = stretch(model,observed=True) #sachs_joint = evaluate_stretch(sachs_stretch['channels']) #print( sachs_joint.MAP() )
# for cpd in model.get_cpds(): # print(cpd) predict_data = test.drop(columns=["Survived"], axis=1) y_pred = model.predict(predict_data) (y_pred["Survived"] == test["Survived"]).sum() / len(test) # 测试集精度 model_infer = VariableElimination(model) q = model_infer.query(variables=["Survived"], evidence={"Fare": 0}) print(q["Survived"]) q = model_infer.map_query( variables=["Fare", "Age", "Sex", "Pclass", "Cabin"], evidence={"Survived": 1} ) print(q) # # 用结构学习建立模型 hc = HillClimbSearch(train, scoring_method=BicScore(train)) best_model = hc.estimate() print(best_model.edges()) best_model.fit( train, estimator=BayesianEstimator, prior_type="BDeu" ) # default equivalent_sample_size=5 predict_data = test.drop(columns=["Survived"], axis=1)
q = ve.query(variables=['age'], evidence={'delay': '0'}) print(q) print("Results using ratio function") for age in STATE_NAMES['age']: print('age : ' + age + '\n') for delay in STATE_NAMES['delay']: print('delay : ' + delay + '\n') print( ratio(data, lambda t: t['age'] == age, lambda t: t['delay'] == delay)) separator() q = ve.map_query(variables=None, evidence=None) print("MAP-query\n") print(q) mm = ve.max_marginal(variables=None, evidence=None) print("Max-Marginal query\n") print(mm) mm2 = ve.map_query(variables=['age'], evidence={'delay': '0'}) # End of Task 2 # Task 3 ------------ Reversed PGM data = pd.DataFrame(data=RAW_DATA) model = BayesianModel([('age', 'delay'), ('gender', 'delay'),
#Question2 #create a Bayesian Model and generate CPD using MLE from pgmpy.models import BayesianModel from pgmpy.estimators import MaximumLikelihoodEstimator estimator = MaximumLikelihoodEstimator(model, data) cpds = estimator.get_parameters() #Write your code fruit_cpd = cpds[0] size_cpd = cpds[1] tasty_cpd = cpds[2] print(tasty_cpd) #write cpd of tasty to csv res = pd.DataFrame(b) res.to_csv('/code/output/output2.csv', index=False, header=False) #Question3 for i in range(0, 3): model.add_cpds(cpds[i]) #create a Bayesian model and run variable elimination algorithm on it from pgmpy.models import BayesianModel from pgmpy.inference import VariableElimination model_inference = VariableElimination(model) query = model_inference.map_query(variables=['tasty']) #Expected Output print(query) result = pd.DataFrame(query, index=[0]) #write you output to csv result.to_csv('/code/output/output3.csv', index=False)
class StateNameDecorator(unittest.TestCase): def setUp(self): self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good']} self.sn1 = {'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night']} self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = VariableElimination(student) self.model2 = VariableElimination(student, state_names=self.sn2) def test_assignment_statename(self): req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')], [('speed', 'low'), ('switch', 'off'), ('time', 'day')]] req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)], [('speed', 0), ('switch', 1), ('time', 0)]] self.assertEqual(self.phi1.assignment([1, 2]), req_op2) self.assertEqual(self.phi2.assignment([1, 2]), req_op1) def test_factor_reduce_statename(self): phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 'medium'), ('time', 'day')]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi.reduce([('speed', 1), ('time', 0)]) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False) self.assertEqual(phi.variables, ['switch']) self.assertEqual(phi.cardinality, [2]) np_test.assert_array_equal(phi.values, np.array([1, 1])) def test_reduce_cpd_statename(self): cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 'high')]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 0)]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 'high')], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 0)], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) def test_inference_query_statename(self): inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.query(['grade'], evidence={'intel': 0}) req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))} self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op) inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'}) inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0}) req_op = {'grade': 'F'} self.assertEqual(inf_op1, inf_op2) self.assertEqual(inf_op1, req_op) self.assertEqual(inf_op1, req_op)
inference_model = VariableElimination(BN_Model) true_label = test["Overall"] pred_label = test["Overall"] test = test.drop(columns=["Overall"]) cols = test.columns.values evidences_values = [None] * len(cols) true_values = [None] * len(true_label) cont = 0 for (idx, row) in test.iterrows(): evidences_values = {} for i in cols: evidences_values[i] = row[i] pred = inference_model.map_query(variables = ['Overall'], evidence = evidences_values) true_values[cont] = true_label[idx] cont = cont + 1 pred_label[idx] = pred["Overall"]+1 y_true = true_values y_pred = pred_label accuracy = accuracy_score(y_true, y_pred) recall = recall_score(y_true, y_pred, average="weighted") precision = precision_score(y_true, y_pred, average="weighted") f1 = f1_score(y_true, y_pred, average='weighted') # Real on the Y axis, pred on the X axis print("Accuracy: ", accuracy) # Sul totale numero di istanze true con label = 0, quanti ne ha predetti correttamente a 0?