def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s)
def test_check_model2(self): cpd_s = TabularCPD('s', 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=2) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l)
def setUp(self): self.sn2 = {'grade': ['A', 'B', 'F'], 'diff': ['high', 'low'], 'intel': ['poor', 'good', 'very good']} self.sn1 = {'speed': ['low', 'medium', 'high'], 'switch': ['on', 'off'], 'time': ['day', 'night']} self.phi1 = Factor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12)) self.phi2 = Factor(['speed', 'switch', 'time'], [3, 2, 2], np.ones(12), state_names=self.sn1) self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) student = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]]) intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]]) grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 2]) student.add_cpds(diff_cpd, intel_cpd, grade_cpd) self.model1 = Inference(student) self.model2 = Inference(student, state_names=self.sn2)
def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = Factor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = Factor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = Factor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model)
def setUp(self): self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) a_cpd = TabularCPD('a', 2, [[0.4, 0.6]]) b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.3, 0.4]], evidence='a', evidence_card=[2]) c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.3, 0.4]], evidence='b', evidence_card=[2]) d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.2, 0.1]], evidence='c', evidence_card=[2]) e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.4, 0.1]], evidence='d', evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')]) factor_1 = Factor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])) factor_2 = Factor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) factor_3 = Factor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) factor_4 = Factor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
def name_cpd(aid): from pgmpy.factors import TabularCPD cpd = TabularCPD( variable='N' + aid, variable_card=num_names, values=[[1.0 / num_names] * num_names]) cpd.semtype = 'name' return cpd
def test_add_multiple_cpds(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertListEqual(self.graph.get_cpds(), [cpd1, cpd2, cpd3])
def test_get_cpd_raises_error(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertRaises(ValueError, self.graph.get_cpds, 'sat')
def initialize_initial_state(self): """ This method will automatically re-adjust the cpds and the edges added to the bayesian network. If an edge that is added as an intra time slice edge in the 0th timeslice, this method will automatically add it in the 1st timeslice. It will also add the cpds. However, to call this method, one needs to add cpds as well as the edges in the bayesian network of the whole skeleton including the 0th and the 1st timeslice,. Examples: ------- >>> from pgmpy.models import DynamicBayesianNetwork as DBN >>> from pgmpy.factors import TabularCPD >>> student = DBN() >>> student.add_nodes_from(['D', 'G', 'I', 'S', 'L']) >>> student.add_edges_from([(('D', 0),('G', 0)),(('I', 0),('G', 0)),(('D', 0),('D', 1)),(('I', 0),('I', 1))]) >>> grade_cpd = TabularCPD(('G', 0), 3, [[0.3, 0.05, 0.9, 0.5], ... [0.4, 0.25, 0.8, 0.03], ... [0.3, 0.7, 0.02, 0.2]], ... evidence=[('I', 0),('D', 0)], ... evidence_card=[2, 2]) >>> d_i_cpd = TabularCPD(('D', 1), 2, [[0.6, 0.3], ... [0.4, 0.7]], ... evidence=[('D', 0)], ... evidence_card=2) >>> diff_cpd = TabularCPD(('D', 0), 2, [[0.6, 0.4]]) >>> intel_cpd = TabularCPD(('I',0), 2, [[0.7, 0.3]]) >>> i_i_cpd = TabularCPD(('I', 1), 2, [[0.5, 0.4], ... [0.5, 0.6]], ... evidence=[('I', 0)], ... evidence_card=2) >>> student.add_cpds(grade_cpd, d_i_cpd, diff_cpd, intel_cpd, i_i_cpd) >>> student.initialize_initial_state() """ for cpd in self.cpds: temp_var = (cpd.variable[0], 1 - cpd.variable[1]) parents = self.get_parents(temp_var) if not any(x.variable == temp_var for x in self.cpds): if all(x[1] == parents[0][1] for x in parents): if parents: evidence_card = cpd.cardinality[:0:-1] new_cpd = TabularCPD( temp_var, cpd.variable_card, cpd.values.reshape(cpd.variable_card, np.prod(evidence_card)), parents, evidence_card) else: if cpd.get_evidence(): initial_cpd = cpd.marginalize(cpd.get_evidence(), inplace=False) new_cpd = TabularCPD( temp_var, cpd.variable_card, np.reshape(initial_cpd.values, (-1, 2))) else: new_cpd = TabularCPD( temp_var, cpd.variable_card, np.reshape(cpd.values, (-1, 2))) self.add_cpds(new_cpd) self.check_model()
def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd')
def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB')
def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['N' + aid1, 'N' + aid2], evidence_card=[num_names, num_names]) cpd.semtype = 'score' return cpd
def samediff_cpd(aid1, aid2): cpd = TabularCPD( variable='A' + aid1 + aid2, variable_card=num_same_diff, values=samediff_vals, evidence=['N' + aid1, 'N' + aid2], # [::-1], evidence_card=[num_names, num_names]) # [::-1]) cpd.semtype = 'match' return cpd
def score_cpd(aid1, aid2): cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=num_scores, values=score_values, evidence=['A' + aid1 + aid2], # [::-1], evidence_card=[num_same_diff]) # [::-1]) cpd.semtype = 'score' return cpd
def test_get_cpd_for_node(self): cpd1 = TabularCPD('diff', 2, np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, np.random.rand(2, 4), ['diff', 'intel'], [2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.assertEqual(self.graph.get_cpds('diff'), cpd1) self.assertEqual(self.graph.get_cpds('intel'), cpd2) self.assertEqual(self.graph.get_cpds('grade'), cpd3)
def bayesnet_examples(): from pgmpy.factors import TabularCPD from pgmpy.models import BayesianModel import pandas as pd student_model = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')]) # we can generate some random data. raw_data = np.random.randint(low=0, high=2, size=(1000, 5)) data = pd.DataFrame(raw_data, columns=['D', 'I', 'G', 'L', 'S']) data_train = data[: int(data.shape[0] * 0.75)] student_model.fit(data_train) student_model.get_cpds() data_test = data[int(0.75 * data.shape[0]): data.shape[0]] data_test.drop('D', axis=1, inplace=True) student_model.predict(data_test) grade_cpd = TabularCPD( variable='G', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['I', 'D'], evidence_card=[2, 2]) difficulty_cpd = TabularCPD( variable='D', variable_card=2, values=[[0.6, 0.4]]) intel_cpd = TabularCPD( variable='I', variable_card=2, values=[[0.7, 0.3]]) letter_cpd = TabularCPD( variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) sat_cpd = TabularCPD( variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) student_model.add_cpds(grade_cpd, difficulty_cpd, intel_cpd, letter_cpd, sat_cpd)
def test_remove_multiple_cpds_string(self): cpd1 = TabularCPD('diff', 2, values=np.random.rand(2, 1)) cpd2 = TabularCPD('intel', 2, values=np.random.rand(2, 1)) cpd3 = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd1, cpd2, cpd3) self.graph.remove_cpds('diff', 'grade') self.assertListEqual(self.graph.get_cpds(), [cpd2])
def test_get_parameters_missing_data(self): mle = MaximumLikelihoodEstimator(self.m1, self.d1) cpds = [ TabularCPD('A', 2, [[2.0 / 3], [1.0 / 3]]), TabularCPD('C', 2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]], evidence=['A', 'B'], evidence_card=[2, 2]), TabularCPD('B', 2, [[2.0 / 3], [1.0 / 3]]) ] self.assertSetEqual(set(mle.get_parameters()), set(cpds))
def test_check_model(self): cpd_g = TabularCPD( 'g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertTrue(self.G.check_model())
def setUp(self): self.G = BayesianModel([('a', 'd'), ('b', 'd'), ('d', 'e'), ('b', 'c')]) self.G1 = BayesianModel([('diff', 'grade'), ('intel', 'grade')]) diff_cpd = TabularCPD('diff', 2, values=[[0.2], [0.8]]) intel_cpd = TabularCPD('intel', 3, values=[[0.5], [0.3], [0.2]]) grade_cpd = TabularCPD('grade', 3, values=[[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3]) self.G1.add_cpds(diff_cpd, intel_cpd, grade_cpd)
def setUp(self): dbn_1 = DynamicBayesianNetwork() dbn_1.add_edges_from([(('Z', 0), ('X', 0)), (('Z', 0), ('Y', 0)), (('Z', 0), ('Z', 1))]) cpd_start_z_1 = TabularCPD(('Z', 0), 2, [[0.8, 0.2]]) cpd_x_1 = TabularCPD(('X', 0), 2, [[0.9, 0.6], [0.1, 0.4]], [('Z', 0)], 2) cpd_y_1 = TabularCPD(('Y', 0), 2, [[0.7, 0.2], [0.3, 0.8]], [('Z', 0)], 2) cpd_trans_z_1 = TabularCPD(('Z', 1), 2, [[0.9, 0.1], [0.1, 0.9]], [('Z', 0)], 2) dbn_1.add_cpds(cpd_start_z_1, cpd_trans_z_1, cpd_x_1, cpd_y_1) dbn_1.initialize_initial_state() self.dbn_inference_1 = DBNInference(dbn_1) dbn_2 = DynamicBayesianNetwork() dbn_2.add_edges_from([(('Z', 0), ('X', 0)), (('X', 0), ('Y', 0)), (('Z', 0), ('Z', 1))]) cpd_start_z_2 = TabularCPD(('Z', 0), 2, [[0.5, 0.5]]) cpd_x_2 = TabularCPD(('X', 0), 2, [[0.6, 0.9], [0.4, 0.1]], [('Z', 0)], 2) cpd_y_2 = TabularCPD(('Y', 0), 2, [[0.2, 0.3], [0.8, 0.7]], [('X', 0)], 2) cpd_z_2 = TabularCPD(('Z', 1), 2, [[0.4, 0.7], [0.6, 0.3]], [('Z', 0)], 2) dbn_2.add_cpds(cpd_x_2, cpd_y_2, cpd_z_2, cpd_start_z_2) dbn_2.initialize_initial_state() self.dbn_inference_2 = DBNInference(dbn_2)
def test_add_multiple_cpds(self): from pgmpy.factors import TabularCPD cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s)
def get_parameters(self, prior='dirichlet', **kwargs): """ Method for getting all the learned CPDs of the model. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ if prior == 'dirichlet' and 'alpha' not in kwargs: alpha = {node: [1] * (self.node_card[node] * (np.product([self.node_card[_node] for _node in self.model.predecessors(node)]) if self.model.predecessors(node) else 1)) for node in self.model.nodes()} else: alpha = kwargs['alpha'] parameters = [] for node in self.model.nodes(): if prior == 'dirichlet': parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() node_alpha = np.array(alpha[node]) values = (state_counts.values + node_alpha) / (state_counts.values.sum() + node_alpha.sum()) cpd = TabularCPD(node, self.node_card[node], values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] state_counts = (self.data.groupby([node] + self.model.predecessors(node)).size()).values node_alpha = np.array(alpha[node]) values = (state_counts + node_alpha) / (state_counts.sum() + node_alpha.sum()) values = values.reshape(var_card, np.product(parent_card)) cpd = TabularCPD(node, var_card, values, evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_model(self): model = BayesianModel(self.get_edges()) model.name = self.network_name tabular_cpds = [] for var, values in self.variable_CPD.items(): cpd = TabularCPD(var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=[ len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var] ]) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_property.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model
def get_model(self): """ Returns an instance of Bayesian Model. """ model = BayesianModel(self.edges) model.name = self.model_name tabular_cpds = [] for var, values in self.variable_CPD.items(): evidence = values['CONDSET'] if 'CONDSET' in values else [] cpd = values['DPIS'] evidence_card = values[ 'CARDINALITY'] if 'CARDINALITY' in values else [] states = self.variables[var]['STATES'] cpd = TabularCPD(var, len(states), cpd, evidence=evidence, evidence_card=evidence_card) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for var, properties in self.variables.items(): model.node[var] = properties return model
def setUp(self): edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = { 'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]]) } states = { 'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false'] } parents = { 'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out'] } properties = { 'bowel-problem': ['position = (335, 99)'], 'dog-out': ['position = (300, 195)'], 'family-out': ['position = (257, 99)'], 'hear-bark': ['position = (296, 268)'], 'light-on': ['position = (218, 195)'] } self.model = BayesianModel(edges) tabular_cpds = [] for var in sorted(cpds.keys()): values = cpds[var] cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[ len(states[evidence_var]) for evidence_var in parents[var] ]) tabular_cpds.append(cpd) self.model.add_cpds(*tabular_cpds) for node, properties in properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) self.model.node[node][prop_name] = prop_value self.writer = BIFWriter(model=self.model)
def get_model(self): """ Returns the fitted bayesian model Example ---------- >>> from pgmpy.readwrite import BIFReader >>> reader = BIFReader("bif_test.bif") >>> reader.get_model() <pgmpy.models.BayesianModel.BayesianModel object at 0x7f20af154320> """ try: model = BayesianModel(self.variable_edges) model.name = self.network_name model.add_nodes_from(self.variable_names) tabular_cpds = [] for var in sorted(self.variable_cpds.keys()): values = self.variable_cpds[var] cpd = TabularCPD(var, len(self.variable_states[var]), values, evidence=self.variable_parents[var], evidence_card=[len(self.variable_states[evidence_var]) for evidence_var in self.variable_parents[var]]) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for node, properties in self.variable_properties.items(): for prop in properties: prop_name, prop_value = map(lambda t: t.strip(), prop.split('=')) model.node[node][prop_name] = prop_value return model except AttributeError: raise AttributeError('First get states of variables, edges, parents and network name')
def test_add_single_cpd(self): cpd = TabularCPD('grade', 2, values=np.random.rand(2, 4), evidence=['diff', 'intel'], evidence_card=[2, 2]) self.graph.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.graph.add_cpds(cpd) self.assertListEqual(self.graph.get_cpds(), [cpd])
def setUp(self): nodes = {'c': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(c) Brain Tumor', 'YPOS': '11935', 'XPOS': '15250', 'TYPE': 'discrete'}, 'a': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(a) Metastatic Cancer', 'YPOS': '10465', 'XPOS': '13495', 'TYPE': 'discrete'}, 'b': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(b) Serum Calcium Increase', 'YPOS': '11965', 'XPOS': '11290', 'TYPE': 'discrete'}, 'e': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(e) Papilledema', 'YPOS': '13240', 'XPOS': '17305', 'TYPE': 'discrete'}, 'd': {'STATES': ['Present', 'Absent'], 'DESCRIPTION': '(d) Coma', 'YPOS': '12985', 'XPOS': '13960', 'TYPE': 'discrete'}} model = BayesianModel([('b', 'd'), ('a', 'b'), ('a', 'c'), ('c', 'd'), ('c', 'e')]) cpd_distribution = {'a': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8]])}, 'e': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.6, 0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]}, 'b': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.2, 0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'c': {'TYPE': 'discrete', 'DPIS': np.array([[0.2, 0.8], [0.05, 0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]}, 'd': {'TYPE': 'discrete', 'DPIS': np.array([[0.8, 0.2], [0.9, 0.1], [0.7, 0.3], [0.05, 0.95]]), 'CONDSET': ['b', 'c'], 'CARDINALITY': [2, 2]}} tabular_cpds = [] for var, values in cpd_distribution.items(): evidence = values['CONDSET'] if 'CONDSET' in values else [] cpd = values['DPIS'] evidence_card = values['CARDINALITY'] if 'CARDINALITY' in values else [] states = nodes[var]['STATES'] cpd = TabularCPD(var, len(states), cpd, evidence=evidence, evidence_card=evidence_card) tabular_cpds.append(cpd) model.add_cpds(*tabular_cpds) for var, properties in nodes.items(): model.node[var] = properties self.maxDiff = None self.writer = XMLBeliefNetwork.XBNWriter(model=model)
def setUp(self): self.maxDiff = None edges = [['family-out', 'dog-out'], ['bowel-problem', 'dog-out'], ['family-out', 'light-on'], ['dog-out', 'hear-bark']] cpds = {'bowel-problem': np.array([[0.01], [0.99]]), 'dog-out': np.array([[0.99, 0.01, 0.97, 0.03], [0.9, 0.1, 0.3, 0.7]]), 'family-out': np.array([[0.15], [0.85]]), 'hear-bark': np.array([[0.7, 0.3], [0.01, 0.99]]), 'light-on': np.array([[0.6, 0.4], [0.05, 0.95]])} states = {'bowel-problem': ['true', 'false'], 'dog-out': ['true', 'false'], 'family-out': ['true', 'false'], 'hear-bark': ['true', 'false'], 'light-on': ['true', 'false']} parents = {'bowel-problem': [], 'dog-out': ['family-out', 'bowel-problem'], 'family-out': [], 'hear-bark': ['dog-out'], 'light-on': ['family-out']} self.bayesmodel = BayesianModel(edges) tabular_cpds = [] for var, values in cpds.items(): cpd = TabularCPD(var, len(states[var]), values, evidence=parents[var], evidence_card=[len(states[evidence_var]) for evidence_var in parents[var]]) tabular_cpds.append(cpd) self.bayesmodel.add_cpds(*tabular_cpds) self.bayeswriter = UAIWriter(self.bayesmodel) edges = {('var_0', 'var_1'), ('var_0', 'var_2'), ('var_1', 'var_2')} self.markovmodel = MarkovModel(edges) tables = [(['var_0', 'var_1'], ['4.000', '2.400', '1.000', '0.000']), (['var_0', 'var_1', 'var_2'], ['2.2500', '3.2500', '3.7500', '0.0000', '0.0000', '10.0000', '1.8750', '4.0000', '3.3330', '2.0000', '2.0000', '3.4000'])] domain = {'var_1': '2', 'var_2': '3', 'var_0': '2'} factors = [] for table in tables: variables = table[0] cardinality = [int(domain[var]) for var in variables] values = list(map(float, table[1])) factor = Factor(variables, cardinality, values) factors.append(factor) self.markovmodel.add_factors(*factors) self.markovwriter = UAIWriter(self.markovmodel)
def score_cpd(aid1, aid2): semtype = 'score' evidence = ['A' + aid1 + aid2, 'N' + aid1, 'N' + aid2] evidence_cpds = [var2_cpd[key] for key in evidence] evidence_nice = [semtype2_nice[cpd.semtype] for cpd in evidence_cpds] evidence_card = list(map(len, evidence_nice)) evidence_states = list(ut.iprod(*evidence_nice)) variable_basis = semtype2_nice[semtype] variable_values = [] for mystate in variable_basis: row = [] for state in evidence_states: if state[0] == state[1]: if state[2] == 'same': val = .2 if mystate == 'low' else .8 else: val = 1 # val = .5 if mystate == 'low' else .5 elif state[0] != state[1]: if state[2] == 'same': val = .5 if mystate == 'low' else .5 else: val = 1 # val = .9 if mystate == 'low' else .1 row.append(val) variable_values.append(row) cpd = TabularCPD( variable='S' + aid1 + aid2, variable_card=len(variable_basis), values=variable_values, evidence=evidence, # [::-1], evidence_card=evidence_card) # [::-1]) cpd.semtype = semtype return cpd
def get_parameters(self): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() state_counts = state_counts.reindex(sorted(state_counts.index)) cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] values = self.data.groupby([node] + parents).size().unstack(parents).fillna(0) cpd = TabularCPD(node, var_card, np.array(values), evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters
def get_parameters(self, **kwargs): """ Method used to get parameters. Returns ------- parameters: list List containing all the parameters. For Bayesian Model it would be list of CPDs' for Markov Model it would be a list of factors Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), ... columns=['A', 'B', 'C', 'D', 'E']) >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')]) >>> estimator = MaximumLikelihoodEstimator(model, values) >>> estimator.get_parameters() """ if isinstance(self.model, BayesianModel): parameters = [] for node in self.model.nodes(): parents = self.model.get_parents(node) if not parents: state_counts = self.data.ix[:, node].value_counts() cpd = TabularCPD(node, self.node_card[node], state_counts.values[:, np.newaxis]) cpd.normalize() parameters.append(cpd) else: parent_card = np.array([self.node_card[parent] for parent in parents]) var_card = self.node_card[node] state_counts = self.data.groupby([node] + self.model.predecessors(node)).size() values = state_counts.values.reshape(var_card, np.product(parent_card)) cpd = TabularCPD(node, var_card, values, evidence=parents, evidence_card=parent_card.astype('int')) cpd.normalize() parameters.append(cpd) return parameters elif isinstance(self.model, MarkovModel): edges = self.model.edges() no_of_params = [self.node_card[u] * self.node_card[v] for u, v in edges] constants = [] for u, v in edges: value_counts = self.data.groupby([u, v]).size() constants.extend(value_counts.values) total_params = sum(no_of_params) constants = np.array(constants) no_of_params.insert(0, 0) param_cumsum = np.cumsum(no_of_params) def optimize_fun(params): factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]], params[param_cumsum[index]: param_cumsum[index + 1]])) Z = sum(factor_product(*factors).values) return Z - sum(constants * params) mini = minimize(optimize_fun, x0=[1]*total_params) final_params = mini.x score = mini.fun factors = [] for index in range(len(edges)): u, v = edges[index][0], edges[index][1] factors.append(Factor([u, v], [self.node_card[u], self.node_card[v]], final_params[param_cumsum[index]: param_cumsum[index + 1]])) if 'score' in kwargs and kwargs['score']: return factors, score else: return factors