Example #1
0
 def map_query(bayes_net, query_vars, evidence_vars):
     bayes_net_infer = VariableElimination(bayes_net)
     if evidence_vars:
         q = bayes_net_infer.map_query(variables=query_vars,
                                       evidence=evidence_vars,
                                       show_progress=False)
     else:
         q = bayes_net_infer.map_query(variables=query_vars,
                                       evidence=None,
                                       show_progress=False)
     return q
Example #2
0
def probnet_inference(model, h, b, d):
    H = 1 if h > 10 else 0
    B = 1 if b > 20 else 0
    D = 1 if d > 3 else 0
    print(H, B, D)
    infer = VariableElimination(model)
    return infer.map_query(['S'], evidence={'H': H, 'B': B, 'D': D})
Example #3
0
    def predict(self, data):
        """
        Predicts states of all the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:800]
        >>> predict_data = values[800:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('E', axis=1, inplace=True)
        >>> y_pred = model.predict(predict_data)
        >>> y_pred
            E
        800 0
        801 1
        802 1
        803 1
        804 0
        ... ...
        993 0
        994 0
        995 1
        996 1
        997 0
        998 0
        999 0
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        # Send state_names dict from one of the estimated CPDs to the inference class.
        model_inference = VariableElimination(
            self, state_names=self.get_cpds()[0].state_names)
        for index, data_point in data.iterrows():
            states_dict = model_inference.map_query(
                variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                pred_values[k].append(v)
        return pd.DataFrame(pred_values, index=data.index)
    def predict(self, data):
        """
        Predicts states of all the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:800]
        >>> predict_data = values[800:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('E', axis=1, inplace=True)
        >>> y_pred = model.predict(predict_data)
        >>> y_pred
            E
        800 0
        801 1
        802 1
        803 1
        804 0
        ... ...
        993 0
        994 0
        995 1
        996 1
        997 0
        998 0
        999 0
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        # Send state_names dict from one of the estimated CPDs to the inference class.
        model_inference = VariableElimination(self, state_names=self.get_cpds()[0].state_names)
        for index, data_point in data.iterrows():
            states_dict = model_inference.map_query(variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                pred_values[k].append(v)
        return pd.DataFrame(pred_values, index=data.index)
    def map_query(self, targets, evidences, algorithm):
        if algorithm == "Variable Elimination":
            from pgmpy.inference import VariableElimination
            model_infer = VariableElimination(self.model_pgmpy)
        if algorithm == "Belief Propagation":
            from pgmpy.inference import BeliefPropagation
            model_infer = BeliefPropagation(self.model_pgmpy)
        if algorithm == "MPLP":
            from pgmpy.inference import Mplp
            model_infer = Mplp(self.model_pgmpy.to_markov_model())

        return model_infer.map_query(variables=list(targets),
                                     evidence=evidences)
Example #6
0
    def Test_Data_Inference_map_n_steps(self, df_test, n_tsteps):
        # make a function that can predict N timesteps ahead.
        df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy()
        infer = VariableElimination(self.model)

        dict_unique_vals = dict(zip(df_test.columns, [df_test[i].unique() for i in df_test.columns]))
        result_list = ['M_t']
        if n_tsteps > 1:
            result_list = result_list+["M_t+{}".format(x) for x in range(1,n_tsteps)]
        count = 0
        for key, value in df_test.filter(items=[x for x in df_test.columns if x not in result_list]).to_dict('index').items():

                index_key = key
                if check_data_in_evidence(value, dict_unique_vals):
                    tic = time.time()
                    result = infer.query(variables=result_list,evidence=value)
                    toc = time.time() - tic
                    logging.info("thermostat {} - Elapsed seconds for query {:.2f}".format(self.thermostat.tstat_id, toc))

                    tic = time.time()
                    map_result = infer.map_query(variables=result_list,evidence=value)
                    toc = time.time() - tic
                    logging.info("thermostat {} - Elapsed seconds for MAP query {:.2f}".format(self.thermostat.tstat_id, toc))

                    for n in result_list:
                        df_inference_results.at[index_key,'{}_0'.format(n)] = result[n].values[0]
                        df_inference_results.at[index_key,'{}_1'.format(n)] = result[n].values[1]
                        df_inference_results.at[index_key,'{}'.format(n)] = Map_Occ_Values(result[n].values[1])
                        df_inference_results.at[index_key, '{}_map'.format(n)] = map_result[n]
                else:
                    for n in result_list:
                        df_inference_results.at[index_key,'{}_0'.format(n)] = np.nan
                        df_inference_results.at[index_key,'{}_1'.format(n)] = np.nan
                        df_inference_results.at[index_key,'{}'.format(n)] = np.nan
                        df_inference_results.at[index_key, '{}_map'.format(n)] = np.nan
                count+=1

        logging.info("thermostat {} - Iterations of test {}".format(self.thermostat.tstat_id, count))
        return df_inference_results
class TestVariableEliminationMarkov(unittest.TestCase):
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                         ('J', 'L'), ('G', 'L'), ('A', 'R'),
                                         ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J',
                              2,
                              values=[[0.9, 0.6, 0.7, 0.1],
                                      [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'],
                              evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q',
                              2,
                              values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'],
                              evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L',
                              2,
                              values=[[0.9, 0.45, 0.8, 0.1],
                                      [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'],
                              evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q,
                                      factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 1
                                                   })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 0,
                                                       'G': 0,
                                                       'L': 1
                                                   })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.markov_inference.query(['J'])
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.markov_inference.query(['Q', 'J'])
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 1
                                                   })
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 1
                                                   })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 0,
                                                       'G': 0,
                                                       'L': 1
                                                   })
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={
                                                       'A': 0,
                                                       'R': 0,
                                                       'G': 0,
                                                       'L': 1
                                                   })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']),
                                    0.5714,
                                    decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(
            ['G', 'R']),
                                    0.4055,
                                    decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(
            ['G', 'R', 'A']),
                                    0.3260,
                                    decimal=4)

    def test_map_query(self):
        map_query = self.markov_inference.map_query()
        self.assertDictEqual(map_query, {
            'A': 1,
            'R': 1,
            'J': 1,
            'Q': 1,
            'G': 0,
            'L': 0
        })

    def test_map_query_with_evidence(self):
        map_query = self.markov_inference.map_query(['A', 'R', 'L'], {
            'J': 0,
            'Q': 1,
            'G': 0
        })
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.markov_inference.induced_graph(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.markov_inference.induced_width(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.markov_inference
        del self.markov_model
Example #8
0
def main():
	#Fetching features data
	features_data = pd.read_csv(fileloc_features)
	features_data_f = features_data.add_prefix('f')
	features_data_g = features_data.add_prefix('g')
	#Seen Training Data
	seen_traindata = pd.read_csv(fileloc_seen_training, usecols = ['left','right','label'])
	#seen_traindata_f = pd.read_csv(fileloc_seen_training, usecols = ['left','label'])
	#seen_traindata_g = pd.read_csv(fileloc_seen_training, usecols = ['right','label'])
	seen_traindata_merged_f = seen_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	seen_traindata_merged_g = seen_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	seen_traindata_merged_f = seen_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	seen_traindata_merged_g = seen_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	seen_features_traindata_final = pd.concat([seen_traindata_merged_f, seen_traindata_merged_g], axis = 1)
	seen_label_traindata_final = seen_traindata.loc[:, 'label']
	seen_traindata_final = pd.concat([seen_features_traindata_final, seen_label_traindata_final], axis = 1)
	seen_traindata_final.replace([np.inf, -np.inf], np.nan)
	seen_traindata_final.dropna(inplace=True)  
	seen_traindata_final = seen_traindata_final.astype(int)
	seen_traindata_final_NDArray = seen_traindata_final.values 
	#Seen Validation Data
	seen_validationdata = pd.read_csv(fileloc_seen_validation, usecols = ['left','right','label'])
	#seen_validationdata_f = pd.read_csv(fileloc_seen_validation, usecols = ['left','label'])
	#seen_validationdata_g = pd.read_csv(fileloc_seen_validation, usecols = ['right','label'])
	seen_validationdata_merged_f = seen_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	seen_validationdata_merged_g = seen_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	seen_validationdata_merged_f = seen_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	seen_validationdata_merged_g = seen_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	seen_features_validationdata_final = pd.concat([seen_validationdata_merged_f, seen_validationdata_merged_g], axis = 1)
	seen_label_validationdata_final = seen_validationdata.loc[:, 'label']
	seen_validationdata_final = pd.concat([seen_features_validationdata_final, seen_label_validationdata_final], axis = 1)
	seen_validationdata_final.replace([np.inf, -np.inf], np.nan)
	seen_validationdata_final.dropna(inplace=True)
	seen_validationdata_final = seen_validationdata_final.astype(int)
	seen_validationdata_final_NDArray = seen_validationdata_final.values
	#Shuffled Training Data
	shuffled_traindata = pd.read_csv(fileloc_shuffled_training, usecols = ['left','right','label'])
	#shuffled_traindata_f = pd.read_csv(fileloc_shuffled_training, usecols = ['left','label'])
	#shuffled_traindata_g = pd.read_csv(fileloc_shuffled_training, usecols = ['right','label'])
	shuffled_traindata_merged_f = shuffled_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	shuffled_traindata_merged_g = shuffled_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	shuffled_traindata_merged_f = shuffled_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	shuffled_traindata_merged_g = shuffled_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	shuffled_features_traindata_final = pd.concat([shuffled_traindata_merged_f, shuffled_traindata_merged_g], axis = 1)
	shuffled_label_traindata_final = shuffled_traindata.loc[:, 'label']
	shuffled_traindata_final = pd.concat([shuffled_features_traindata_final, shuffled_label_traindata_final], axis = 1)
	shuffled_traindata_final.replace([np.inf, -np.inf], np.nan)
	shuffled_traindata_final.dropna(inplace=True)
	shuffled_traindata_final = shuffled_traindata_final.astype(int)
	shuffled_traindata_final_NDArray = shuffled_traindata_final.values
	#Shuffled Validation Data
	shuffled_validationdata = pd.read_csv(fileloc_shuffled_validation, usecols = ['left','right','label'])
	#shuffled_validationdata_f = pd.read_csv(fileloc_shuffled_validation, usecols = ['left','label'])
	#shuffled_validationdata_g = pd.read_csv(fileloc_shuffled_validation, usecols = ['right','label'])
	shuffled_validationdata_merged_f = shuffled_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	shuffled_validationdata_merged_g = shuffled_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	shuffled_validationdata_merged_f = shuffled_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	shuffled_validationdata_merged_g = shuffled_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	shuffled_features_validationdata_final = pd.concat([shuffled_validationdata_merged_f, shuffled_validationdata_merged_g], axis = 1)
	shuffled_label_validationdata_final = shuffled_validationdata.loc[:, 'label']
	shuffled_validationdata_final = pd.concat([shuffled_features_validationdata_final, shuffled_label_validationdata_final], axis = 1)
	shuffled_validationdata_final.replace([np.inf, -np.inf], np.nan)
	shuffled_validationdata_final.dropna(inplace=True)
	shuffled_validationdata_final = shuffled_validationdata_final.astype(int)
	shuffled_validationdata_final_NDArray = shuffled_validationdata_final.values
	#Unseen Training Data
	unseen_traindata = pd.read_csv(fileloc_unseen_training, usecols = ['left','right','label'])
	#unseen_traindata_f = pd.read_csv(fileloc_unseen_training, usecols = ['left','label'])
	#unseen_traindata_g = pd.read_csv(fileloc_unseen_training, usecols = ['right','label'])
	unseen_traindata_merged_f = unseen_traindata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	unseen_traindata_merged_g = unseen_traindata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	unseen_traindata_merged_f = unseen_traindata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	unseen_traindata_merged_g = unseen_traindata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	unseen_features_traindata_final = pd.concat([unseen_traindata_merged_f, unseen_traindata_merged_g], axis = 1)
	unseen_label_traindata_final = unseen_traindata.loc[:, 'label']
	unseen_traindata_final = pd.concat([unseen_features_traindata_final, unseen_label_traindata_final], axis = 1)
	unseen_traindata_final.replace([np.inf, -np.inf], np.nan)
	unseen_traindata_final.dropna(inplace=True)
	unseen_traindata_final = unseen_traindata_final.astype(int)
	unseen_traindata_final_NDArray = unseen_traindata_final.values
	#Unseen Validation Data
	unseen_validationdata = pd.read_csv(fileloc_unseen_validation, usecols = ['left','right','label'])
	#unseen_validationdata_f = pd.read_csv(fileloc_unseen_validation, usecols = ['left','label'])
	#unseen_validationdata_g = pd.read_csv(fileloc_unseen_validation, usecols = ['right','label'])
	unseen_validationdata_merged_f = unseen_validationdata.merge(features_data_f, left_on = 'left', right_on = 'fimagename')
	unseen_validationdata_merged_g = unseen_validationdata.merge(features_data_g, left_on = 'right', right_on = 'gimagename')
	unseen_validationdata_merged_f = unseen_validationdata_merged_f.drop(['left', 'right','fimagename','label'], axis = 1)
	unseen_validationdata_merged_g = unseen_validationdata_merged_g.drop(['left', 'right','gimagename','label'], axis = 1)
	unseen_features_validationdata_final = pd.concat([unseen_validationdata_merged_f, unseen_validationdata_merged_g], axis = 1)
	unseen_label_validationdata_final = unseen_validationdata.loc[:, 'label']
	unseen_validationdata_final = pd.concat([unseen_features_validationdata_final, unseen_label_validationdata_final], axis = 1)
	unseen_validationdata_final.replace([np.inf, -np.inf], np.nan)
	unseen_validationdata_final.dropna(inplace=True)
	unseen_validationdata_final = unseen_validationdata_final.astype(int)
	unseen_validationdata_final_NDArray = unseen_validationdata_final.values
	#Creating base models
	featureNamesList = ["pen_pressure","letter_spacing","size","dimension","is_lowercase","is_continuous","slantness","tilt","entry_stroke_a", "staff_of_a","formation_n","staff_of_d","exit_stroke_d","word_formation","constancy"]
	features_only_data = features_data[featureNamesList]
	initial_hcs = HillClimbSearch(features_only_data)
	initial_model = initial_hcs.estimate()
	#print(initial_model.edges())
	print("Hill Climb Done")
	basemodel = BayesianModel([('fpen_pressure', 'fis_lowercase'), ('fpen_pressure', 'fletter_spacing'), ('fsize', 'fslantness'), ('fsize', 'fpen_pressure'), 
								('fsize', 'fstaff_of_d'), ('fsize', 'fletter_spacing'), ('fsize', 'fexit_stroke_d'), ('fsize', 'fentry_stroke_a'), 
								('fdimension', 'fsize'), ('fdimension', 'fis_continuous'), ('fdimension', 'fslantness'), ('fdimension', 'fpen_pressure'), 
								('fis_lowercase', 'fstaff_of_a'), ('fis_lowercase', 'fexit_stroke_d'), ('fis_continuous', 'fexit_stroke_d'), ('fis_continuous', 'fletter_spacing'), 
								('fis_continuous', 'fentry_stroke_a'), ('fis_continuous', 'fstaff_of_a'), ('fis_continuous', 'fis_lowercase'), ('fslantness', 'fis_continuous'), 
								('fslantness', 'ftilt'), ('fentry_stroke_a', 'fpen_pressure'), ('fformation_n', 'fconstancy'), ('fformation_n', 'fword_formation'), ('fformation_n', 'fdimension'), 
								('fformation_n', 'fstaff_of_d'), ('fformation_n', 'fis_continuous'), ('fformation_n', 'fsize'), ('fformation_n', 'fstaff_of_a'), ('fstaff_of_d', 'fis_continuous'), 
								('fstaff_of_d', 'fexit_stroke_d'), ('fstaff_of_d', 'fis_lowercase'), ('fstaff_of_d', 'fslantness'), ('fstaff_of_d', 'fentry_stroke_a'), 
								('fword_formation', 'fdimension'), ('fword_formation', 'fstaff_of_a'), ('fword_formation', 'fsize'), ('fword_formation', 'fstaff_of_d'), 
								('fword_formation', 'fconstancy'), ('fconstancy', 'fstaff_of_a'), ('fconstancy', 'fletter_spacing'), ('fconstancy', 'fdimension'), 
								('gpen_pressure', 'gis_lowercase'), ('gpen_pressure', 'gletter_spacing'), ('gsize', 'gslantness'), ('gsize', 'gpen_pressure'), 
								('gsize', 'gstaff_of_d'), ('gsize', 'gletter_spacing'), ('gsize', 'gexit_stroke_d'), ('gsize', 'gentry_stroke_a'), ('gdimension', 'gsize'), 
								('gdimension', 'gis_continuous'), ('gdimension', 'gslantness'), ('gdimension', 'gpen_pressure'), ('gis_lowercase', 'gstaff_of_a'), 
								('gis_lowercase', 'gexit_stroke_d'), ('gis_continuous', 'gexit_stroke_d'), ('gis_continuous', 'gletter_spacing'), ('gis_continuous', 'gentry_stroke_a'), 
								('gis_continuous', 'gstaff_of_a'), ('gis_continuous', 'gis_lowercase'), ('gslantness', 'gis_continuous'), ('gslantness', 'gtilt'), 
								('gentry_stroke_a', 'gpen_pressure'), ('gformation_n', 'gconstancy'), ('gformation_n', 'gword_formation'), ('gformation_n', 'gdimension'), 
								('gformation_n', 'gstaff_of_d'), ('gformation_n', 'gis_continuous'), ('gformation_n', 'gsize'), ('gformation_n', 'gstaff_of_a'), ('gstaff_of_d', 'gis_continuous'), 
								('gstaff_of_d', 'gexit_stroke_d'), ('gstaff_of_d', 'gis_lowercase'), ('gstaff_of_d', 'gslantness'), ('gstaff_of_d', 'gentry_stroke_a'), 
								('gword_formation', 'gdimension'), ('gword_formation', 'gstaff_of_a'), ('gword_formation', 'gsize'), ('gword_formation', 'gstaff_of_d'), 
								('gword_formation', 'gconstancy'), ('gconstancy', 'gstaff_of_a'), ('gconstancy', 'gletter_spacing'), ('gconstancy', 'gdimension'),
								('fis_continuous', 'label'), ('fword_formation','label'),
								('gis_continuous', 'label'), ('gword_formation','label')])
	model_seen = basemodel.copy()
	model_shuffled = basemodel.copy()
	model_unseen = basemodel.copy()
	accuracies = {}
	#Training Seen Model
	model_seen.fit(seen_traindata_final)
	estimator_seen = BayesianEstimator(model_seen, seen_traindata_final)
	cpds=[]
	for featureName in featureNamesList :
		cpd = estimator_seen.estimate_cpd('f'+featureName)
		cpds.append(cpd)
		cpd = estimator_seen.estimate_cpd('g'+featureName)
		cpds.append(cpd)
	cpd = estimator_seen.estimate_cpd('label')
	cpds.append(cpd)
	model_seen.add_cpds(*cpds)
	print("CPDs Calculated")
	#Testing Seen Model - Training
	model_seen_ve = VariableElimination(model_seen)
	model_seen_traindata_predictions = []
	for i in range(seen_traindata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=(seen_traindata_final_NDArray[i,index]-1)
			evidenceDic['g'+featureName]=(seen_traindata_final_NDArray[i+15,index]-1)
		temp = model_seen_ve.map_query(variables=['label'],evidence=evidenceDic)
		model_seen_traindata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_seen_traindata_predictions)):
	    if(int(model_seen_traindata_predictions[i]) == int(seen_traindata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["seen_train"]=correctCnt/len(model_seen_traindata_predictions)*100
	print("Bayesian Model Accuracy for Seen Training Data = "+str(accuracies["seen_train"]))
	#Testing Seen Model - Validation
	model_seen_ve = VariableElimination(model_seen)
	model_seen_validationdata_predictions = []
	for i in range(seen_validationdata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=seen_validationdata_final_NDArray[i,index]-1
			evidenceDic['g'+featureName]=seen_validationdata_final_NDArray[i+15,index]-1
		temp = model_seen_ve.map_query(variables=['label'],evidence=evidenceDic)
		model_seen_validationdata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_seen_validationdata_predictions)):
	    if(int(model_seen_validationdata_predictions[i]) == int(seen_validationdata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["seen_validation"]=correctCnt/len(model_seen_validationdata_predictions)*100
	print("Bayesian Model Accuracy for Seen Validation Data = "+str(accuracies["seen_validation"]))
	#Training Shuffled Model
	model_shuffled.fit(shuffled_traindata_final)
	estimator_shuffled = BayesianEstimator(model_shuffled, shuffled_traindata_final)
	cpds=[]
	for featureName in featureNamesList :
		cpd = estimator_shuffled.estimate_cpd('f'+featureName)
		cpds.append(cpd)
		cpd = estimator_shuffled.estimate_cpd('g'+featureName)
		cpds.append(cpd)
	cpd = estimator_shuffled.estimate_cpd('label')
	cpds.append(cpd)
	model_shuffled.add_cpds(*cpds)
	#Testing Shuffled Model - Training
	model_shuffled_ve = VariableElimination(model_shuffled)
	model_shuffled_traindata_predictions = []
	for i in range(shuffled_traindata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=shuffled_traindata_final_NDArray[i,index]-1
			evidenceDic['g'+featureName]=shuffled_traindata_final_NDArray[i+15,index]-1
		temp = model_shuffled_ve.map_query(variables=['label'],evidence=evidenceDic)
		model_shuffled_traindata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_shuffled_traindata_predictions)):
	    if(int(model_shuffled_traindata_predictions[i]) == int(shuffled_traindata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["shuffled_train"]=correctCnt/len(model_shuffled_traindata_predictions)*100
	print("Bayesian Model Accuracy for Shuffled Training Data = "+str(accuracies["shuffled_train"]))
	#Testing Shuffled Model - Validation
	model_shuffled_ve = VariableElimination(model_shuffled)
	model_shuffled_validationdata_predictions = []
	for i in range(shuffled_validationdata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=shuffled_validationdata_final_NDArray[i,index]-1
			evidenceDic['g'+featureName]=shuffled_validationdata_final_NDArray[i+15,index]-1
	temp = model_shuffled_ve.map_query(variables=['label'],evidence=evidenceDic)
	model_shuffled_validationdata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_shuffled_validationdata_predictions)):
	    if(int(model_shuffled_validationdata_predictions[i]) == int(shuffled_validationdata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["shuffled_validation"]=correctCnt/len(model_shuffled_validationdata_predictions)*100
	print("Bayesian Model Accuracy for Shuffled Validation Data = "+str(accuracies["shuffled_validation"]))
	#Training Unseen Model
	model_unseen.fit(unseen_traindata_final)
	estimator_unseen = BayesianEstimator(model_unseen, unseen_traindata_final)
	cpds=[]
	for featureName in featureNamesList :
		cpd = estimator_unseen.estimate_cpd('f'+featureName)
		cpds.append(cpd)
		cpd = estimator_unseen.estimate_cpd('g'+featureName)
		cpds.append(cpd)
	cpd = estimator_unseen.estimate_cpd('label')
	cpds.append(cpd)
	model_unseen.add_cpds(*cpds)
	#Testing Unseen Model - Training
	model_unseen_ve = VariableElimination(model_unseen)
	model_unseen_traindata_predictions = []
	for i in range(unseen_traindata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=unseen_traindata_final_NDArray[i,index]-1
			evidenceDic['g'+featureName]=unseen_traindata_final_NDArray[i+15,index]-1
		temp = model_unseen_ve.map_query(variables=['label'],evidence=evidenceDic)
		model_unseen_traindata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_unseen_traindata_predictions)):
	    if(int(model_unseen_traindata_predictions[i]) == int(unseen_traindata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["unseen_train"]=correctCnt/len(model_unseen_traindata_predictions)*100
	print("Bayesian Model Accuracy for Unseen Training Data = "+str(accuracies["unseen_train"]))
	#Testing Unseen Model - Validation
	model_unseen_ve = VariableElimination(model_unseen)
	model_unseen_validationdata_predictions = []
	for i in range(unseen_validationdata_final_NDArray.shape[0]):
		evidenceDic = {}
		for index, featureName in enumerate(featureNamesList): 
			evidenceDic['f'+featureName]=unseen_validationdata_final_NDArray[i,index]-1
			evidenceDic['g'+featureName]=unseen_validationdata_final_NDArray[i+15,index]-1
	temp = model_unseen_ve.map_query(variables=['label'],evidence=evidenceDic)
	model_unseen_validationdata_predictions.append(temp['label'])
	correctCnt = 0
	for i in range(len(model_unseen_validationdata_predictions)):
	    if(int(model_unseen_validationdata_predictions[i]) == int(unseen_validationdata_final_NDArray[i,30])):
	        correctCnt+=1
	accuracies["unseen_validation"]=correctCnt/len(model_unseen_validationdata_predictions)*100
	print("Bayesian Model Accuracy for Unseen Validation Data = "+str(accuracies["unseen_validation"]))
Example #9
0
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {
            'grade': ['A', 'B', 'F'],
            'diff': ['high', 'low'],
            'intel': ['poor', 'good', 'very good']
        }
        self.sn1 = {
            'speed': ['low', 'medium', 'high'],
            'switch': ['on', 'off'],
            'time': ['day', 'night']
        }

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                                   np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                                   np.ones(12),
                                   state_names=self.sn1)

        self.cpd1 = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3])
        self.cpd2 = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD(
            'grade',
            3,
            [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

        cpd = TabularCPD(
            'grade',
            3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
            evidence=['diff', 'intel'],
            evidence_card=[2, 3],
            state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {
            'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))
        }

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
Example #10
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'),
                                             ('J', 'Q'), ('J', 'L'),
                                             ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2, [[0.9, 0.2], [0.1, 0.8]], ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={
                                                         'A': 0,
                                                         'R': 0,
                                                         'G': 0,
                                                         'L': 1
                                                     })
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G'
                                                                          ]),
                                    0.5714,
                                    decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ['G', 'R']),
                                    0.4055,
                                    decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ['G', 'R', 'A']),
                                    0.3260,
                                    decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {
            'A': 1,
            'R': 1,
            'J': 1,
            'Q': 1,
            'G': 0,
            'L': 0
        })

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'], {
            'J': 0,
            'Q': 1,
            'G': 0
        })
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(
            ['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #11
0
class TestVariableEliminationMarkov(unittest.TestCase):
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([
            ("A", "J"),
            ("R", "J"),
            ("J", "Q"),
            ("J", "L"),
            ("G", "L"),
            ("A", "R"),
            ("J", "G"),
        ])

        factor_a = TabularCPD("A", 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD("R", 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD(
            "J",
            2,
            values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
            evidence=["A", "R"],
            evidence_card=[2, 2],
        ).to_factor()
        factor_q = TabularCPD("Q",
                              2,
                              values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=["J"],
                              evidence_card=[2]).to_factor()
        factor_l = TabularCPD(
            "L",
            2,
            values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
            evidence=["J", "G"],
            evidence_card=[2, 2],
        ).to_factor()
        factor_g = TabularCPD("G", 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q,
                                      factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.markov_inference.query(["J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"],
                           cardinality=[2],
                           values=np.array([0.416, 0.584])),
        )

    def test_query_multiple_variable(self):
        query_result = self.markov_inference.query(["Q", "J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["Q", "J"],
                cardinality=[2, 2],
                values=np.array([[0.3744, 0.1168], [0.0416, 0.4672]]),
            ),
        )

    def test_query_single_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=["J"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 1
                                                   })
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6,
                                                                     0.4]),
        )

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=["J", "Q"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 0,
                                                       "G": 0,
                                                       "L": 1
                                                   })
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["Q", "J"],
                cardinality=[2, 2],
                values=np.array([[0.081, 0.004], [0.009, 0.016]]),
            ),
        )

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.markov_inference.query(["J"])
        query_result = self.markov_inference.query(["J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"],
                           cardinality=[2],
                           values=np.array([0.416, 0.584])),
        )

        query_result = self.markov_inference.query(["Q", "J"])
        query_result = self.markov_inference.query(["Q", "J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["Q", "J"],
                cardinality=[2, 2],
                values=np.array([[0.3744, 0.1168], [0.0416, 0.4672]]),
            ),
        )

        query_result = self.markov_inference.query(variables=["J"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 1
                                                   })
        query_result = self.markov_inference.query(variables=["J"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 1
                                                   })
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6,
                                                                     0.4]),
        )

        query_result = self.markov_inference.query(variables=["J", "Q"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 0,
                                                       "G": 0,
                                                       "L": 1
                                                   })
        query_result = self.markov_inference.query(variables=["J", "Q"],
                                                   evidence={
                                                       "A": 0,
                                                       "R": 0,
                                                       "G": 0,
                                                       "L": 1
                                                   })
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["Q", "J"],
                cardinality=[2, 2],
                values=np.array([[0.081, 0.004], [0.009, 0.016]]),
            ),
        )

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(["G"]),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(
            ["G", "R"]),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(
            ["G", "R", "A"]),
                                    0.1659,
                                    decimal=4)

    def test_map_query(self):
        map_query = self.markov_inference.map_query()
        self.assertDictEqual(map_query, {
            "A": 1,
            "R": 1,
            "J": 1,
            "Q": 1,
            "G": 0,
            "L": 0
        })

    def test_map_query_with_evidence(self):
        map_query = self.markov_inference.map_query(["A", "R", "L"], {
            "J": 0,
            "Q": 1,
            "G": 0
        })
        self.assertDictEqual(map_query, {"A": 1, "R": 0, "L": 0})

    def test_induced_graph(self):
        induced_graph = self.markov_inference.induced_graph(
            ["G", "Q", "A", "J", "L", "R"])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual(
            [
                ["A", "J"],
                ["A", "R"],
                ["G", "J"],
                ["G", "L"],
                ["J", "L"],
                ["J", "Q"],
                ["J", "R"],
                ["L", "R"],
            ],
            result_edges,
        )

    def test_induced_width(self):
        result_width = self.markov_inference.induced_width(
            ["G", "Q", "A", "J", "L", "R"])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.markov_inference
        del self.markov_model
Example #12
0
#输出依赖关系
print(model.edges())
#查看某节点概率分布
print(model.get_cpds('Pclass').values)

from pgmpy.inference import VariableElimination

model_infer = VariableElimination(model)
q = model_infer.query(variables=['Survived'], evidence={'Fare': 0})
print(q)
'''
+------------+-----------------+
| Survived   |   phi(Survived) |
+============+=================+
| Survived_0 |          0.6341 |
+------------+-----------------+
| Survived_1 |          0.3659 |
+------------+-----------------+
'''
q = model_infer.map_query(variables=['Fare', 'Age', 'Sex', 'Pclass', 'Cabin'],
                          evidence={'Survived': 1})
print(q)  #{'Sex': 0, 'Fare': 0, 'Age': 1, 'Pclass': 2, 'Cabin': 0}

predict_data = test.drop(columns=['Survived'], axis=1)
y_pred = model.predict(predict_data)
y_survived = y_pred['Survived'].values
test_survived = test['Survived'].values
print('y_pred:', y_survived)
print("test:", test_survived)
print((y_survived == test_survived).sum() / len(test))
#测试集精度0.8131868131868132
Example #13
0
cpd_s = TabularCPD(variable='S',
                   variable_card=2,
                   values=[[0.95, 0.2], [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# 将有向无环图与条件概率分布表关联
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# 验证模型:检查网络结构和CPD,并验证CPD是否正确定义和总和为1
model.check_model()
# 获取概率图模型
model.get_cpds()
# 获取节点G的概率表
#print(model.get_cpds('G'))
# 获取节点G的基数
model.get_cardinality('G')
# 获取整个贝叶斯网络的局部依赖
model.local_independencies(['D', 'I', 'S', 'G', 'L'])
from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
# 边缘化其他变量,求某一变量的概率
print(infer.query(['G'])['G'])
# 计算条件概率分布
print(infer.query(['G'], evidence={'D': 1, 'I': 1})['G'])
print(111, infer.query(['G'], evidence={'I': 1, 'L': 1, 'D': 1})['G'])
# 对于给定条件的变量状态进行预测
print(infer.map_query('G'))
print(infer.map_query('G', evidence={'D': 0, 'I': 1}))
print(infer.map_query('G', evidence={'D': 0, 'I': 1, 'L': 1, 'S': 1}))
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {
            "grade": ["A", "B", "F"],
            "diff": ["high", "low"],
            "intel": ["poor", "good", "very good"],
        }
        self.sn1 = {
            "speed": ["low", "medium", "high"],
            "switch": ["on", "off"],
            "time": ["day", "night"],
        }

        self.phi1 = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                                   np.ones(12))
        self.phi2 = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                                   np.ones(12),
                                   state_names=self.sn1)

        self.cpd1 = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        self.cpd2 = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
            state_names=self.sn2,
        )

        student = BayesianModel([("diff", "grade"), ("intel", "grade")])
        student_state_names = BayesianModel([("diff", "grade"),
                                             ("intel", "grade")])

        diff_cpd = TabularCPD("diff", 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD("intel", 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD(
            "grade",
            3,
            [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]],
            evidence=["diff", "intel"],
            evidence_card=[2, 2],
        )

        diff_cpd_state_names = TabularCPD(
            variable="diff",
            variable_card=2,
            values=[[0.2, 0.8]],
            state_names={"diff": ["high", "low"]},
        )
        intel_cpd_state_names = TabularCPD(
            variable="intel",
            variable_card=2,
            values=[[0.3, 0.7]],
            state_names={"intel": ["poor", "good", "very good"]},
        )
        grade_cpd_state_names = TabularCPD(
            "grade",
            3,
            [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]],
            evidence=["diff", "intel"],
            evidence_card=[2, 2],
            state_names=self.sn2,
        )

        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        student_state_names.add_cpds(diff_cpd_state_names,
                                     intel_cpd_state_names,
                                     grade_cpd_state_names)

        self.model_no_state_names = VariableElimination(student)
        self.model_with_state_names = VariableElimination(student_state_names)

    def test_assignment_statename(self):
        req_op1 = [
            [("speed", "low"), ("switch", "on"), ("time", "night")],
            [("speed", "low"), ("switch", "off"), ("time", "day")],
        ]
        req_op2 = [
            [("speed", 0), ("switch", 0), ("time", 1)],
            [("speed", 0), ("switch", 1), ("time", 0)],
        ]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi.reduce([("speed", "medium"), ("time", "day")])
        self.assertEqual(phi.variables, ["switch"])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                             np.ones(12),
                             state_names=self.sn1)
        phi = phi.reduce([("speed", "medium"), ("time", "day")], inplace=False)
        self.assertEqual(phi.variables, ["switch"])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                             np.ones(12))
        phi.reduce([("speed", 1), ("time", 0)])
        self.assertEqual(phi.variables, ["switch"])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(["speed", "switch", "time"], [3, 2, 2],
                             np.ones(12))
        phi = phi.reduce([("speed", 1), ("time", 0)], inplace=False)
        self.assertEqual(phi.variables, ["switch"])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
            state_names=self.sn2,
        )
        cpd.reduce([("diff", "high")])
        self.assertEqual(cpd.variable, "grade")
        self.assertEqual(cpd.variables, ["grade", "intel"])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]),
        )

        cpd = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        cpd.reduce([("diff", 0)])
        self.assertEqual(cpd.variable, "grade")
        self.assertEqual(cpd.variables, ["grade", "intel"])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]),
        )

        cpd = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
            state_names=self.sn2,
        )
        cpd = cpd.reduce([("diff", "high")], inplace=False)
        self.assertEqual(cpd.variable, "grade")
        self.assertEqual(cpd.variables, ["grade", "intel"])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]),
        )

        cpd = TabularCPD(
            "grade",
            3,
            [
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
            ],
            evidence=["diff", "intel"],
            evidence_card=[2, 3],
        )
        cpd = cpd.reduce([("diff", 0)], inplace=False)
        self.assertEqual(cpd.variable, "grade")
        self.assertEqual(cpd.variables, ["grade", "intel"])
        np_test.assert_array_equal(
            cpd.get_values(),
            np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]),
        )

    def test_inference_query_statename(self):
        inf_op1 = self.model_with_state_names.query(["grade"],
                                                    evidence={"intel": "poor"})
        inf_op2 = self.model_no_state_names.query(["grade"],
                                                  evidence={"intel": 0})
        req_op = DiscreteFactor(["grade"], [3], np.array([0.1, 0.1, 0.8]))

        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model_with_state_names.map_query(
            ["grade"], evidence={"intel": "poor"})
        inf_op2 = self.model_no_state_names.map_query(["grade"],
                                                      evidence={"intel": 0})
        req_op1 = {"grade": "F"}
        req_op2 = {"grade": 2}

        self.assertEqual(inf_op1, req_op1)
        self.assertEqual(inf_op2, req_op2)
class TestVariableEliminationMarkov(unittest.TestCase):
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'),
                                         ('G', 'L'), ('A', 'R'), ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1],
                                              [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'], evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'], evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1],
                                              [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'], evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.markov_inference.query(['J'])
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.markov_inference.query(['Q', 'J'])
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.markov_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.markov_inference.map_query(['A', 'R', 'L'],
                                                    {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.markov_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.markov_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.markov_inference
        del self.markov_model
Example #16
0
# Associating the CPDs with the network
model2.add_cpds(cpd_r, cpd_s, cpd_g2)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
# defined and sum to 1.
model2.check_model()

infer2 = VariableElimination(model2)

# Exercise 2 d)
# Answer to "Was it due to an overnight rain? Or that last night she forgot to turn off her sprinkler?"
print("E2 D:")
print(infer2.query(variables=['R'], evidence={'G': 'Wet'}))
print(infer2.query(variables=['S'], evidence={'G': 'Wet'}))
print(infer2.map_query(variables=['R','S'], evidence={'G': 'Wet'}))


# With additional information about John's Grass
# Defining the model structure. 
model3 = BayesianModel([('R', 'G'), ('S', 'G'), ('R', 'J')])

# Defining individual CPDs.
# Only one new CPD needed.
cpd_j = TabularCPD(variable='J', variable_card=2, 
                   values=[[0.85, 0],
                           [0.15, 1]],
                  evidence=['R'],
                  evidence_card=[2],
                  state_names={'J': ['JDry', 'JWet'], 'R': ['NotRain', 'Rain']})
Example #17
0
mm = model2.to_markov_model()
print("Nodes of model2 as markov:")
print(mm.nodes())
print("Edges of model2 as markov:")
print(mm.edges())
print("----------------------------------------------")
print("Inference for th dataset")
from pgmpy.inference import VariableElimination
infer1 = VariableElimination(mm)
print("Inference of x4:")
print(infer1.query(['x4'])['x4'])
print("Inference of x5|x2:")
print(infer1.query(['x5'], evidence={'x2': 1})['x5'])
print("---------------------------------------------------------")
print("AND DATASET")
data = pd.read_csv("AND-Features.csv")
hc = HillClimbSearch(data, scoring_method=K2Score(data))
best_model = hc.estimate()
print("Edges of bayesian model")
print(best_model.edges())
'''Inference for and dataset'''
mm1 = best_model.to_markov_model()
print("Edges of markov model")
print(mm1.edges())
print("Checking converted model:", mm1.check_model())
print("----------------------------------------------")
print("Inference for and dataset")
from pgmpy.inference import VariableElimination
infer2 = VariableElimination(mm1)
print(infer2.map_query(['f1', 'f9']))
Example #18
0
# There can be cases in which we want to compute the conditional distribution let's say $ P(G | D=0, I=1) $. In such cases we need to modify our equations a bit:
#
# $ P(G | D=0, I=1) = \sum_L \sum_S P(L|G) * P(S| I=1) * P(G| D=0, I=1) * P(D=0) * P(I=1) $
# $ P(G | D=0, I=1) = P(D=0) * P(I=1) * P(G | D=0, I=1) * \sum_L P(L | G) * \sum_S P(S | I=1) $
#
# In pgmpy we will just need to pass an extra argument in the case of conditional distributions:

# In[41]:

print(infer.query(['G'], evidence={'D': 0, 'I': 1})['G'])

# ####  Predicting values from new data points
# Predicting values from new data points is quite similar to computing the conditional probabilities. We need to query for the variable that we need to predict given all the other features. The only difference is that rather than getting the probabilitiy distribution we are interested in getting the most probable state of the variable.
#
# In pgmpy this is known as MAP query. Here's an example:

# In[42]:

infer.map_query('G')

# In[46]:

infer.map_query('G', evidence={'D': 0, 'I': 1})

# In[47]:

infer.map_query('G', evidence={'D': 0, 'I': 1, 'L': 1, 'S': 1})

# ### 5. Other methods for Inference
# Even though exact inference algorithms like Variable Elimination optimize the inference task, it is still computationally quite expensive in the case of large models. For such cases we can use approximate algorithms like Message Passing Algorithms, Sampling Algorithms etc. We will talk about a few other exact and approximate algorithms in later parts of the tutorial.
Example #19
0
from pgmpy.inference import VariableElimination

# Create a Bayesian model
model = BayesianModel(....)

cpd_var1 = TabularCPD(....)
cpd_var2 = TabularCPD(....)
cpd_var3 = TabularCPD(....)
cpd_var4 = TabularCPD(....)
cpd_var5 = TabularCPD(....)

model.add_cpds(..........)

# Calculating the max marginals
model_inference = VariableElimination(model)
model_inference.map_query(variables=['late_for_school'])
" {'late_for_school': 0} "
model_inference.map_query(variables=['late_for_school', 'accident'])
" {'accidnet': 1, late_for_school': 0} "
model_inference.map_query(variables=['late_for_school'], evidence={'accident': 1})
" {'late_for_school': 0} "
model_inference.map_query(variables=['no_of_people'], evidence={'location':1}, elimination_order=['quality', 
                                                                                                  'cost',
                                                                                                  'location']) # can sepcify elimination sequence / otherwise system will choose automatically






-5- " Using Model for Prediction - Example "
    bn.summarise_variable( posterior, code_to_value_map )

    # with pgmpy
    reply = G_infer.query( variables=[var_target], evidence=evidences ) # returns DiscreteFactor
    print( reply )

    # test joint
    reply = G_infer.query( variables=["mut17q21"], evidence=evidences )
    print( reply )
    reply = G_infer.query( variables=["loss 17"], evidence=evidences )
    print( reply )
    reply = G_infer.query( variables=["mut17q21", "loss 17"], evidence=evidences )
    print( reply )

    #  MAP query
    reply = G_infer.map_query( variables=[var_target], evidence=evidences ) #returns dict
    print( reply )
    #  MPE query
    # TODO
    reply = G_infer.map_query( variables=[var_target], evidence=evidences ) #returns dict
    print( reply )


# test independencies
    var_source = hp.var_questions_wrapper( "list",
                                           "Which variable do you want to check for independencies?",
                                           df_values )
    # remove already added variables from choices
    df_values_dropped = df_values.drop( var_source, 1, inplace=False )
    var_evidence = hp.var_questions_wrapper( "checkbox",
                                             "Which variables do you want to add as evidence?",
Example #21
0
                   evidence_card=[2])

cpd_l = TabularCPD(variable='L',
                   variable_card=2,
                   values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])

# 将有向无环图与条件概率分布表关联
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# 验证模型:检查网络结构和CPD,并验证CPD是否正确定义和总和为1
model.check_model()

#获取上述代码构建的概率图模型:
print(model.get_cpds())

#获取结点G的概率表:
print(model.get_cpds('G'))
#获取整个贝叶斯网络的局部依赖
print(model.local_independencies(['D', 'I', 'S', 'G', 'L']))

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
print(infer.query(['G'])['G'])

#计算P(G|D=0,I=1)的条件分布概率
print(infer.query(['G'], evidence={'D': 0, 'I': 1})['G'])

print(infer.map_query('G'))
Example #22
0
print(seen_dat)
sd = seen_dat.iloc[1:, 2]
verify_model.fit(seen_dat, sd)
inference = VariableElimination(verify_model)
inference.induced_graph([
    'pen_pressure_f', 'letter_spacing_f', 'size_f', 'dimension_f',
    'is_lowercase_f', 'is_continuous_f', 'slantness_f', 'tilt_f',
    'entry_stroke_a_f', 'staff_of_a_f', 'formation_n_f', ' staff_of_d_f',
    'exit_stroke_d_f	', 'word_formation', 'constancy'
])
phi_query = inference.map_query(variables=['pen_pressure_g'],
                                evidence={
                                    'pen_pressure_f': 1,
                                    'letter_spacing_g': 1,
                                    'size_f': 2,
                                    'is_lowercase_f': 2,
                                    'slantness_f': 0,
                                    'tilt_g': 1,
                                    'entry_stroke_a_f': 1,
                                    'staff_of_a_f': 0,
                                    'formation_n_f': 1
                                })
print(phi_query)
val_dataset = pd.read_csv('dataset_seen_validation_siamese.csv')
val1 = val_dataset['label']
r = 0
for i in val1:
    if i == phi_query:
        r = r + 1
accuracy = (r / 905) * 100
print("Seen dataset accuracy:")
print(accuracy)
#读取测试集属性数据,并存储class预测结果
a = []
model_infer = VariableElimination(model)
with open(r'C:\Users\haomiaowu\Desktop\BN-Cheminformatics\test.csv',
          'r',
          encoding="utf-8-sig") as f:
    reader = csv.reader(f)
    fieldnames = next(reader)  # 获取数据的第一列,作为后续要转为字典的键名 生成器,next方法获取
    csv_reader = csv.DictReader(
        f, fieldnames=fieldnames
    )  # self._fieldnames = fieldnames # list of keys for the dict 以list的形式存放键名
    for row in csv_reader:
        d = {}
        for k, v in row.items():
            d[k] = int(v)
        prob_class = model_infer.map_query(variables=["Class"], evidence=d)
        a.append(list(prob_class.values())[0])

print('预测结果:')
print(a)
print(
    '--------------------------------------------------------------------------------------------------'
)
print('已预测化合物个数:', len(a))

#导入测试集正确分类结果
d = pd.read_csv(
    r'C:\Users\haomiaowu\Desktop\BN-Cheminformatics\test-class.csv')
b = d['Class'].values
print('测试集包含化合物个数:', len(b))
Example #24
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([("A", "J"), ("R", "J"),
                                             ("J", "Q"), ("J", "L"),
                                             ("G", "L")])
        cpd_a = TabularCPD("A", 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD("R", 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD(
            "J",
            2,
            values=[[0.9, 0.6, 0.7, 0.1], [0.1, 0.4, 0.3, 0.9]],
            evidence=["A", "R"],
            evidence_card=[2, 2],
        )
        cpd_q = TabularCPD("Q",
                           2,
                           values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=["J"],
                           evidence_card=[2])
        cpd_l = TabularCPD(
            "L",
            2,
            values=[[0.9, 0.45, 0.8, 0.1], [0.1, 0.55, 0.2, 0.9]],
            evidence=["J", "G"],
            evidence_card=[2, 2],
        )
        cpd_g = TabularCPD("G", 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(["J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"],
                           cardinality=[2],
                           values=[0.416, 0.584]),
        )

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(["Q", "J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.3744, 0.0416], [0.1168, 0.4672]]),
            ),
        )

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=["J"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6,
                                                                     0.4]),
        )

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=["J", "Q"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 0,
                                                         "G": 0,
                                                         "L": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.73636364, 0.08181818],
                                 [0.03636364, 0.14545455]]),
            ),
        )

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(["J"])
        query_result = self.bayesian_inference.query(["J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"],
                           cardinality=[2],
                           values=np.array([0.416, 0.584])),
        )

        query_result = self.bayesian_inference.query(["Q", "J"])
        query_result = self.bayesian_inference.query(["Q", "J"])
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.3744, 0.0416], [0.1168, 0.4672]]),
            ),
        )

        query_result = self.bayesian_inference.query(variables=["J"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 1
                                                     })
        query_result = self.bayesian_inference.query(variables=["J"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(variables=["J"], cardinality=[2], values=[0.6,
                                                                     0.4]),
        )

        query_result = self.bayesian_inference.query(variables=["J", "Q"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 0,
                                                         "G": 0,
                                                         "L": 1
                                                     })
        query_result = self.bayesian_inference.query(variables=["J", "Q"],
                                                     evidence={
                                                         "A": 0,
                                                         "R": 0,
                                                         "G": 0,
                                                         "L": 1
                                                     })
        self.assertEqual(
            query_result,
            DiscreteFactor(
                variables=["J", "Q"],
                cardinality=[2, 2],
                values=np.array([[0.73636364, 0.08181818],
                                 [0.03636364, 0.14545455]]),
            ),
        )

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(),
                                    0.1659,
                                    decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(["G"
                                                                          ]),
                                    0.5714,
                                    decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ["G", "R"]),
                                    0.3740,
                                    decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(
            ["G", "R", "A"]),
                                    0.3061,
                                    decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {
            "A": 1,
            "R": 1,
            "J": 1,
            "Q": 1,
            "G": 0,
            "L": 0
        })

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(["A", "R", "L"], {
            "J": 0,
            "Q": 1,
            "G": 0
        })
        self.assertDictEqual(map_query, {"A": 1, "R": 0, "L": 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(
            ["G", "Q", "A", "J", "L", "R"])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual(
            [
                ["A", "J"],
                ["A", "R"],
                ["G", "J"],
                ["G", "L"],
                ["J", "L"],
                ["J", "Q"],
                ["J", "R"],
                ["L", "R"],
            ],
            result_edges,
        )

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(
            ["G", "Q", "A", "J", "L", "R"])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #25
0
class BaseModel(object):
    """
	Un objeto de este tipo contiene al modelo gráfico probabilista, incluye su grafo
	y sus parámetros (CPD) además de un objeto para hacer inferencia.

	Args:
		config_file_path (str) : la ruta al json con la información de
		DAG y sus tablas de probabilidad condicional. 
		
		data (dict) : si no se cuenta con un archivo de configuración
		se puede utilizar un diccionario con los elementos para inicializar
		el objeto.

	to-do : por ahora sólo funciona con valores binarias. 
	"""
    def __init__(self, config_file_path=None, data=None):
        self.config_file_path = config_file_path
        self.digraph = None
        self.pgmodel = None
        self.infer_system = None
        self.ebunch = None
        self.nodes = None
        self.variables_dict = dict()
        if config_file_path:
            with open(config_file_path) as json_file:
                data = json.load(json_file)
        if data.get('digraph'):
            self.ebunch = data['digraph']
            self.pgmodel = BayesianModel(self.ebunch)
            self.nodes = data.get('nodes', [])
            if self.nodes:
                self.pgmodel.add_nodes_from(self.nodes)
            self.init_graph(ebunch=self.ebunch, nodes=self.nodes)
        if data.get('cpdtables'):
            self.init_model(self.ebunch, data['cpdtables'])
            for table in self.pgmodel.get_cpds():
                logging.info(table)
        self.target = data['target']
        self.nature_variables = data['nature_variables']
        self.intervention_variables = data['interventions']

    def init_graph(self, ebunch, nodes=[], plot=True, graph_id='figures/dag'):
        """
		Creo el DAG con DiGraph de la biblioteca networkx usando
		una lista de aristas.

		Args:
			ebunch (list) : una lista de que contiene a las aristas del grafo.
			plot (boolean) : una bandera para saber si guardo una imagen del grafo
			usando matplotlib.
			graph_id (str): el nombre para identificar el grafo. 
		"""
        self.digraph = nx.DiGraph(ebunch)
        for node in nodes:
            self.digraph.add_node(node)
        if plot: self.save_digraph_as_img(graph_id)

    def reset(self, pgmodel, ebunch, nodes=[]):
        """
		Método para cambiar el modelo y el grafo. Además,
		se actualiza el sistema de inferencia de acuerdo con el nuevo
		modelo. Este método se utiliza para hacer un modelo dinámico 
		donde lo único que se mantienen son las variables.
		"""
        self.init_graph(ebunch, nodes=nodes, plot=False)
        for variable in pgmodel.nodes():
            self.variables_dict[variable] = [0, 1]
        self.ebunch = ebunch
        self.nodes = nodes
        self.pgmodel = pgmodel
        self.update_infer_system()

    def show_graph(self):
        """
		Usa matplolib para mostrar el grafo causal del modelo.
		"""
        pos = nx.circular_layout(self.digraph)
        nx.draw(self.digraph, with_labels=True, pos=pos)
        plt.show()
        plt.clf()

    def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'):
        """
		Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe 
		la listas de aristas y las tablas CPD.

		Args:
			ebunch (list) : una lista de que contiene a las aristas del grafo.
			cpdtables (list) : un arreglo de diccionarios donde cada diccionario 
			contiene la información necesaria para crear una tabla de probabilidad.
			plot (boolean) : una bandera para saber si guardo una imagen del grafo
			usando matplotlib.
			graph_id (str): el nombre para identificar el grafo. 
		"""
        for cpdtable in cpdtables:
            self.variables_dict[cpdtable['variable']] = [\
             _ for _ in range(cpdtable['variable_card'])]
            table = TabularCPD(variable=cpdtable['variable'],\
               variable_card=cpdtable['variable_card'],\
               values=cpdtable['values'],\
               evidence_card=cpdtable.get('evidence_card'),\
               evidence=cpdtable.get('evidence'))
            if cpdtable.get('evidence'):
                table.reorder_parents(sorted(cpdtable.get('evidence')))
            self.pgmodel.add_cpds(table)
        if not self.pgmodel.check_model():
            raise ValueError("Error with CPDTs")
        self.update_infer_system()
        if plot: self.save_pgm_as_img(pgm_id)

    def update_infer_system(self):
        """
		Actualiza el sistema de inferencia para que sea compatible con
		el pgm. Usa VariableElimination.
		"""
        self.infer_system = VariableElimination(self.pgmodel)

    def get_variable_values(self, variable):
        """
		Obtiene una lista de los valores que puede
		tomar una variable.
		"""
        return self.variables_dict.get(variable)

    def get_target_variable(self):
        """
		Regresa una lista con las variables objetivo.
		"""
        return self.target

    def get_intervention_variables(self):
        """
		Regresa una lista con las variables intervenibles.
		"""
        return self.intervention_variables

    def get_nature_variables(self):
        """
		Regresa una lista con las variables que la naturaleza mueve.
		"""
        return self.nature_variables

    def get_ebunch(self):
        """
		Regresa lista de aristas del modelo.
		"""
        return self.ebunch

    def get_nodes(self):
        """
		Regresa lista de nodos aislados del modelo.
		"""
        return self.nodes

    def get_nature_var_prob(self, nature_variable):
        """
		Regresa una lista con las probabilidades de los valores
		de una variable de la naturaleza dada como argumento.

		Args:
			nature_variable (str) : nombre de la variable.
		"""
        if nature_variable in self.nature_variables:
            return np.squeeze(
                self.pgmodel.get_cpds(nature_variable).get_values())

    def conditional_probability(self, variable, evidence):
        """
		Calcula la probabilidad de todos los valores de una variable
		dada la evidencia usando el método de eliminación de 
		variable.
		"""
        return self.infer_system.query([variable], \
         evidence=evidence, show_progress=False)

    def make_inference(self, variable, evidence):
        """
		Ejecuta el motor de inferencia para obtener el valor de una variable
		dada la evidencia en un diccionario.

		Args:
			variable (str) : nombre de la variable a inferir.
			evidence (dict) : un diccionario con la evidencia de otras variables de la forma {variable :  value}.
		"""
        return self.infer_system.map_query([variable],\
                                           evidence=evidence, show_progress=False)[variable]

    def save_digraph_as_img(self, filename):
        """
		Método auxiliar para guardar el DAG de networkx como imagen.
		"""
        pos = nx.circular_layout(self.digraph)
        nx.draw(self.digraph, with_labels=True, pos=pos)
        plt.savefig(filename)
        plt.show()
        plt.clf()

    def save_pgm_as_img(self, filename):
        """
		Método auxiliar para guardar el DAG del pgmpy como imagen.
		"""
        nx.draw(self.digraph, with_labels=True)
        plt.show()
        plt.savefig(filename)
        plt.clf()

    def get_graph_toposort(self):
        """
		Método que regresa una lista con las variables en orden topológico
		del DAG.
		"""
        return list(nx.topological_sort(self.digraph))

    def get_nodes_and_predecessors(self):
        """
		Regresa un arreglo de duplas nodo, predecesores ordenados.
		"""
        return { node : sorted(self.digraph.predecessors(node)) \
         for node in self.digraph.nodes
        }

    def get_number_of_values(self, variable):
        """
		to-do : un método para que me regrese cuantos valores posibles tiene
		una variable y tal vez hasta los valores correspondientes
		"""
        return len(self.variables_dict.get(variable, []))

    def get_joint_prob_observation(self, observation):
        """
		Obtiene la probabilidad de una observación.
		"""
        prob = self.infer_system.query(variables=list(observation.keys()),
                                       joint=True,
                                       show_progress=False)
        variables = prob.variables
        values = prob.values
        for i in range(len(variables)):
            value = observation[variables[i]]
            values = values[value]
        return values
Example #26
0
'''
Inference and Validation
'''
from pgmpy.inference import VariableElimination
import csv

f = open("validation_data.csv")
reader = csv.reader(f)

inference = VariableElimination(model)
valid = 0
invalid = 0
for row in reader:
    br = row[3]
    ig = row[1]
    #map_quey returns Dictionary!!!
    if int(row[0]) == inference.map_query(["SC"], evidence={"BR":
                                                            int(br)})["SC"]:
        valid += 1
    else:
        invalid += 1

total = valid + invalid
print(valid)
print(invalid)
accuracy = float(valid / total)
print("Accuracy: ", accuracy)
f.close()

os.system('spd-say  -i -10 -p 50 -t female3  "Training Finished!"')
Example #27
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))



    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0,
                                         'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'],
                                                      {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
Example #28
0
t2 = timeit.timeit(lambda: inference_query(sachs_stretch, 'Erk', {
    'P38': [0, 0, 1],
    'Jnk': [0, 1, 0]
}),
                   number=N)

print("\nTimes for: variable elimination, transformations, fraction, for", N,
      "runs")
print(t1)
print(t2)
print("How much faster is transformations inference:", t1 / t2)

print("\n* MAP query")

vars = pick_from_list(model.nodes, 3)

print(inference.map_query(variables=vars))

print("")

sachs_stretch = stretch(model)

print(inference_map_query(sachs_stretch, variables=vars))

#sachs_stretch = stretch(model,observed=True)

#sachs_joint = evaluate_stretch(sachs_stretch['channels'])

#print( sachs_joint.MAP() )
Example #29
0

# for cpd in model.get_cpds():
#    print(cpd)
predict_data = test.drop(columns=["Survived"], axis=1)
y_pred = model.predict(predict_data)


(y_pred["Survived"] == test["Survived"]).sum() / len(test)  # 测试集精度


model_infer = VariableElimination(model)
q = model_infer.query(variables=["Survived"], evidence={"Fare": 0})
print(q["Survived"])
q = model_infer.map_query(
    variables=["Fare", "Age", "Sex", "Pclass", "Cabin"], evidence={"Survived": 1}
)
print(q)


# # 用结构学习建立模型


hc = HillClimbSearch(train, scoring_method=BicScore(train))
best_model = hc.estimate()
print(best_model.edges())

best_model.fit(
    train, estimator=BayesianEstimator, prior_type="BDeu"
)  # default equivalent_sample_size=5
predict_data = test.drop(columns=["Survived"], axis=1)
q = ve.query(variables=['age'], evidence={'delay': '0'})
print(q)

print("Results using ratio function")

for age in STATE_NAMES['age']:
    print('age : ' + age + '\n')
    for delay in STATE_NAMES['delay']:
        print('delay : ' + delay + '\n')
        print(
            ratio(data, lambda t: t['age'] == age,
                  lambda t: t['delay'] == delay))

separator()

q = ve.map_query(variables=None, evidence=None)
print("MAP-query\n")
print(q)

mm = ve.max_marginal(variables=None, evidence=None)
print("Max-Marginal query\n")
print(mm)

mm2 = ve.map_query(variables=['age'], evidence={'delay': '0'})

# End of Task 2

# Task 3 ------------ Reversed PGM

data = pd.DataFrame(data=RAW_DATA)
model = BayesianModel([('age', 'delay'), ('gender', 'delay'),
Example #31
0
#Question2

#create a Bayesian Model and generate CPD using MLE
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()
#Write your code
fruit_cpd = cpds[0]
size_cpd = cpds[1]
tasty_cpd = cpds[2]
print(tasty_cpd)
#write cpd of tasty to csv
res = pd.DataFrame(b)
res.to_csv('/code/output/output2.csv', index=False, header=False)

#Question3
for i in range(0, 3):
    model.add_cpds(cpds[i])
#create a Bayesian model and run variable elimination algorithm on it
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
model_inference = VariableElimination(model)
query = model_inference.map_query(variables=['tasty'])
#Expected Output
print(query)
result = pd.DataFrame(query, index=[0])
#write you output to csv
result.to_csv('/code/output/output3.csv', index=False)
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
Example #33
0
inference_model = VariableElimination(BN_Model)

true_label = test["Overall"]
pred_label = test["Overall"]
test = test.drop(columns=["Overall"])
cols = test.columns.values
evidences_values = [None] * len(cols)

true_values = [None] * len(true_label)
cont = 0

for (idx, row) in test.iterrows():
    evidences_values = {}
    for i in cols:
        evidences_values[i] = row[i]
    pred = inference_model.map_query(variables = ['Overall'], evidence = evidences_values)
    true_values[cont] = true_label[idx]
    cont = cont + 1
    pred_label[idx] = pred["Overall"]+1

y_true = true_values
y_pred = pred_label

accuracy = accuracy_score(y_true, y_pred)
recall = recall_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
f1 = f1_score(y_true, y_pred, average='weighted')
# Real on the Y axis, pred on the X axis

print("Accuracy: ", accuracy)
# Sul totale numero di istanze true con label = 0, quanti ne ha predetti correttamente a 0?