def setUp(self): # A test Bayesian model diff_cpd = TabularCPD("diff", 2, [[0.6], [0.4]]) intel_cpd = TabularCPD("intel", 2, [[0.7], [0.3]]) grade_cpd = TabularCPD( "grade", 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=["diff", "intel"], evidence_card=[2, 2], ) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(["diff", "intel", "grade"]) self.bayesian_model.add_edges_from([("diff", "grade"), ("intel", "grade")]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([("A", "B"), ("C", "B"), ("B", "D")]) factor_ab = DiscreteFactor(["A", "B"], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = DiscreteFactor(["C", "B"], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = DiscreteFactor(["B", "D"], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model)
def setUp(self): self.m1 = BayesianModel([("A", "C"), ("B", "C")]) self.d1 = pd.DataFrame(data={ "A": [0, 0, 1], "B": [0, 1, 0], "C": [1, 1, 0] }) self.d2 = pd.DataFrame( data={ "A": [0, np.NaN, 1], "B": [0, 1, 0], "C": [1, 1, np.NaN], "D": [np.NaN, "Y", np.NaN], }) self.cpds = [ TabularCPD("A", 2, [[2.0 / 3], [1.0 / 3]]), TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]), TabularCPD( "C", 2, [[0.0, 0.0, 1.0, 0.5], [1.0, 1.0, 0.0, 0.5]], evidence=["A", "B"], evidence_card=[2, 2], ), ] self.mle1 = MaximumLikelihoodEstimator(self.m1, self.d1)
def generate_cpds(self): model = BayesianModel([(str(a), str(b)) for a, b in self.graph.edges()]) variable_cards = {} cpds = [] for n in nx.topological_sort(self.graph): causes = sorted(self.graph.predecessors(n)) variable_card = random.choice([2, 3, 4, 5]) variable_cards[n] = variable_card if len(causes) == 0: values = np.random.rand(1, variable_card) values = values / np.sum(values) cpd = TabularCPD(variable=str(n), variable_card=variable_card, values=values) cpds.append(cpd) else: evidence_card = [variable_cards[i] for i in causes] values = np.random.rand(variable_card, np.prod(evidence_card)) values = values / np.sum(values, axis=0) cpd = TabularCPD(variable=str(n), variable_card=variable_card, values=values, evidence=[str(a) for a in causes], evidence_card=evidence_card) cpds.append(cpd) model.add_cpds(*cpds) model.check_model() self.model = model
def load_toy_symptom(): """ TOY symptom """ # FEATURES = S, D, C = 'Symptom', 'Disease', 'Another Disease' VALUES = ['No', 'Yes'] model = BayesianModel([(D, S), (C, S)]) model.add_cpds( TabularCPD(variable=C, variable_card=2, values=[[.7], [.3]], state_names=VALUES), TabularCPD(variable=D, variable_card=2, values=[[.9], [.1]], state_names=VALUES), TabularCPD(variable=S, variable_card=2, values=[ [.3, .99, .1, .1], [.7, .01, .9, .9], ], evidence=[D, C], evidence_card=[2, 2], state_names=VALUES), ) return model
def setUp(self): reader = XMLBIFReader(string=TEST_FILE) self.expected_model = reader.get_model() self.writer = XMLBIFWriter(self.expected_model) self.model_stateless = BayesianModel([('D', 'G'), ('I', 'G'), ('G', 'L'), ('I', 'S')]) self.cpd_d = TabularCPD(variable='D', variable_card=2, values=[[0.6, 0.4]]) self.cpd_i = TabularCPD(variable='I', variable_card=2, values=[[0.7, 0.3]]) self.cpd_g = TabularCPD(variable='G', variable_card=3, values=[[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['I', 'D'], evidence_card=[2, 2]) self.cpd_l = TabularCPD(variable='L', variable_card=2, values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['G'], evidence_card=[3]) self.cpd_s = TabularCPD(variable='S', variable_card=2, values=[[0.95, 0.2], [0.05, 0.8]], evidence=['I'], evidence_card=[2]) self.model_stateless.add_cpds(self.cpd_d, self.cpd_i, self.cpd_g, self.cpd_l, self.cpd_s) self.writer_stateless = XMLBIFWriter(self.model_stateless)
def setUp(self): self.bayesian = BayesianModel([("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")]) a_cpd = TabularCPD("a", 2, [[0.4, 0.6]]) b_cpd = TabularCPD("b", 2, [[0.2, 0.4], [0.8, 0.6]], evidence=["a"], evidence_card=[2]) c_cpd = TabularCPD("c", 2, [[0.1, 0.2], [0.9, 0.8]], evidence=["b"], evidence_card=[2]) d_cpd = TabularCPD("d", 2, [[0.4, 0.3], [0.6, 0.7]], evidence=["c"], evidence_card=[2]) e_cpd = TabularCPD("e", 2, [[0.3, 0.2], [0.7, 0.8]], evidence=["d"], evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([("a", "b"), ("b", "d"), ("a", "c"), ("c", "d")]) factor_1 = DiscreteFactor(["a", "b"], [2, 2], np.array([100, 1, 1, 100])) factor_2 = DiscreteFactor(["a", "c"], [2, 2], np.array([40, 30, 100, 20])) factor_3 = DiscreteFactor(["b", "d"], [2, 2], np.array([1, 100, 100, 1])) factor_4 = DiscreteFactor(["c", "d"], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
def intervene(self, intervention_node, intervention_value=None): intervention_node = str(intervention_node) v_card, states = self.get_state_space(intervention_node) values = [float(s == intervention_value) for s in states] values = np.array([values]) self.bn.remove_cpds(intervention_node) if np.sum(values) == 0: self.bn.remove_node(intervention_node) self.bn.add_node(intervention_node) cpd = TabularCPD( variable=intervention_node, variable_card=1, values=np.array([[1.]]), state_names={intervention_node: [intervention_value]}) self.bn.add_cpds(cpd) else: cpd = TabularCPD(variable=intervention_node, variable_card=v_card, values=values) edges = [(e[0], e[1]) for e in self.bn.in_edges(intervention_node)] for n_in, n_out in edges: self.bn.remove_edge(n_in, n_out) self.bn.add_cpds(cpd) # print(self.bn.nodes()) # for a in self.bn.get_cpds(): # print(a) # print(self.bn.edges()) self.bn.check_model()
def main(): # Defining the network structure model = BayesianModel([('C', 'H'), ('P', 'H')]) # H: host # P: prize # C: contestant # Defining the CPDs: cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]]) cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]]) cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5], [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5], [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]], evidence=['C', 'P'], evidence_card=[3, 3]) # Associating the CPDs with the network structure. model.add_cpds(cpd_c, cpd_p, cpd_h) # Some other methods # model.get_cpds() # check_model check for the model structure and the associated CPD and # returns True if everything is correct otherwise throws an exception # print model.check_model() # Infering the posterior probability infer = VariableElimination(model) posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0}) print(posterior_p['H'])
def basic_different_dec_cardinality() -> MACID: macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 0: { 'D': ['D1'], 'U': ['U1'] }, 1: { 'D': ['D2'], 'U': ['U2'] } }) cpd_d1 = DecisionDomain('D1', [0, 1]) cpd_d2 = DecisionDomain('D2', [0, 1, 2]) cpd_u1 = TabularCPD('U1', 4, np.array([[0, 0, 1, 0, 0, 0], [0, 1, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0], [1, 0, 0, 0, 0, 1]]), evidence=['D1', 'D2'], evidence_card=[2, 3]) cpd_u2 = TabularCPD('U2', 4, np.array([[0, 0, 0, 0, 1, 0], [1, 0, 1, 1, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]]), evidence=['D1', 'D2'], evidence_card=[2, 3]) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def estimate_cpd(self, node): """ Method to estimate the CPD for a given variable. Parameters ---------- node: int, string (any hashable python object) The name of the variable for which the CPD is to be estimated. Returns ------- CPD: TabularCPD Examples -------- >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) >>> model = BayesianModel([('A', 'C'), ('B', 'C')]) >>> cpd_A = MaximumLikelihoodEstimator(model, data).estimate_cpd('A') >>> print(cpd_A) ╒══════╤══════════╕ │ A(0) │ 0.666667 │ ├──────┼──────────┤ │ A(1) │ 0.333333 │ ╘══════╧══════════╛ >>> cpd_C = MaximumLikelihoodEstimator(model, data).estimate_cpd('C') >>> print(cpd_C) ╒══════╤══════╤══════╤══════╤══════╕ │ A │ A(0) │ A(0) │ A(1) │ A(1) │ ├──────┼──────┼──────┼──────┼──────┤ │ B │ B(0) │ B(1) │ B(0) │ B(1) │ ├──────┼──────┼──────┼──────┼──────┤ │ C(0) │ 0.0 │ 0.0 │ 1.0 │ 0.5 │ ├──────┼──────┼──────┼──────┼──────┤ │ C(1) │ 1.0 │ 1.0 │ 0.0 │ 0.5 │ ╘══════╧══════╧══════╧══════╧══════╛ """ state_counts = self.state_counts(node) # if a column contains only `0`s (no states observed for some configuration # of parents' states) fill that column uniformly instead state_counts.ix[:, (state_counts == 0).all()] = 1 parents = sorted(self.model.get_parents(node)) parents_cardinalities = [ len(self.state_names[parent]) for parent in parents ] node_cardinality = len(self.state_names[node]) cpd = TabularCPD(node, node_cardinality, np.array(state_counts), evidence=parents, evidence_card=parents_cardinalities, state_names=self.state_names) cpd.normalize() return cpd
def _get_node_CPT(self, node, df=None): parents = list(self.G.predecessors(node)) if len(parents) == 0: # if root node (latent) mu = df[node].mean() return TabularCPD(node, 2, values=[[1 - mu], [mu]]) elif len(parents) > 0: mus = df.groupby(parents)[node].mean().reset_index() uniques = mus[parents].drop_duplicates() parent_combos = list(product(*[[0, 1] for _ in parents])) appends = [] for combo in parent_combos: if not (uniques == np.array(combo) ).all(1).any(): # if value not enumerated in sample appends.append(list(combo) + [0.5]) # add an uninformative prior add_df = pd.DataFrame(appends, columns=parents + [node]) mus = pd.concat((mus, add_df), axis=0) mus = mus.sort_values(by=parents) mus = mus[node].values cpt = np.vstack((1. - mus, mus)) cpt = TabularCPD(node, 2, values=cpt, evidence=parents, evidence_card=len(parents) * [2]) return cpt
def test_nonoccurring_values(self): mle = MaximumLikelihoodEstimator( self.m1, self.d1, state_names={ "A": [0, 1, 23], "B": [0, 1], "C": [0, 42, 1], 1: [2] }, ) cpds = [ TabularCPD("A", 3, [[2.0 / 3], [1.0 / 3], [0]]), TabularCPD("B", 2, [[2.0 / 3], [1.0 / 3]]), TabularCPD( "C", 3, [ [0.0, 0.0, 1.0, 1.0 / 3, 1.0 / 3, 1.0 / 3], [1.0, 1.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3], [0.0, 0.0, 0.0, 1.0 / 3, 1.0 / 3, 1.0 / 3], ], evidence=["A", "B"], evidence_card=[3, 2], ), ] self.assertSetEqual(set(mle.get_parameters()), set(cpds))
def test_check_model(self): cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=["d", "i"], evidence_card=[2, 2], ) cpd_s = TabularCPD( "s", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["i"], evidence_card=[2], ) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["g"], evidence_card=[2], ) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertRaises(ValueError, self.G.check_model) cpd_d = TabularCPD("d", 2, values=[[0.8, 0.2]]) cpd_i = TabularCPD("i", 2, values=[[0.7, 0.3]]) self.G.add_cpds(cpd_d, cpd_i) self.assertTrue(self.G.check_model())
def Generate_CPTs(data_cpts, data, cols, root): cpts_list = [] for i in cols: if (i == root): cpt = TabularCPD(variable=root, variable_card=5, values=[data_cpts[0].T.values[1]]) cpts_list.append(cpt) elif (i[:2] == "T:"): cpt = TabularCPD( variable=i, #variable_card = len(data_cpts[np.where(cols == "B")[0][0]]), variable_card=2, values=data_cpts[np.where(cols == i)[0][0]].T.values, evidence=[root], evidence_card=[5]) cpts_list.append(cpt) else: cpt = TabularCPD( variable=i, #variable_card = len(data_cpts[np.where(cols == "B")[0][0]]), variable_card=6, values=data_cpts[np.where(cols == i)[0][0]].T.values, evidence=[root], evidence_card=[5]) cpts_list.append(cpt) return cpts_list
def test_add_multiple_cpds(self): cpd_d = TabularCPD("d", 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD("i", 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD( "g", 2, values=np.random.rand(2, 4), evidence=["d", "i"], evidence_card=[2, 2], ) cpd_l = TabularCPD("l", 2, values=np.random.rand(2, 2), evidence=["g"], evidence_card=[2]) cpd_s = TabularCPD("s", 2, values=np.random.rand(2, 2), evidence=["i"], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds("d"), cpd_d) self.assertEqual(self.G.get_cpds("i"), cpd_i) self.assertEqual(self.G.get_cpds("g"), cpd_g) self.assertEqual(self.G.get_cpds("l"), cpd_l) self.assertEqual(self.G.get_cpds("s"), cpd_s)
def test_check_model2(self): cpd_s = TabularCPD('s', 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l)
def test_fit_missing_data(self): self.model2.fit(self.data2, state_names={'C': [0, 1]}, complete_samples_only=False) cpds = set([TabularCPD('A', 2, [[0.5], [0.5]]), TabularCPD('B', 2, [[2. / 3], [1. / 3]]), TabularCPD('C', 2, [[0, 0.5, 0.5, 0.5], [1, 0.5, 0.5, 0.5]], evidence=['A', 'B'], evidence_card=[2, 2])]) self.assertSetEqual(cpds, set(self.model2.get_cpds()))
def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s)
def inf(self, file1): f1 = open(file1, encoding="utf8") lines = f1.readlines() i = 0 G = BayesianModel() nodeList = {} while i < len(lines): if lines[i] == '\n': break nodeName = self.getnode(lines[i]) valueNum = int(lines[i + 1]) cpd_str = lines[i + 2] sequence = self.getList(lines[i + 3]) card = self.getCard(lines[i + 4]) cpd = self.parseCpd(cpd_str, valueNum, card) l = {} l['nodeName'] = nodeName l['valueNum'] = valueNum l['cpd'] = cpd l['sequence'] = sequence l['card'] = card # l = [nodeName,valueNum,cpd,sequence,card] nodeList[nodeName] = l i += 5 edges = self.getegdes(lines[i + 1]) evidence2 = self.getValue(lines[i + 3]) # print(nodeList) for i in range(int(len(edges) / 2)): G.add_edge(edges[2 * i], edges[2 * i + 1]) for (this, node) in nodeList.items(): if node['sequence'][0] == '': cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], values=node['cpd']) else: cpt = TabularCPD(variable=node['nodeName'], variable_card=node['valueNum'], evidence=node['sequence'], evidence_card=node['card'], values=node['cpd']) G.add_cpds(cpt) if G.check_model(): # print('1') # belief_propagation = BeliefPropagation(G) inference = VariableElimination(G) result = '' for node in G.nodes(): if node not in evidence2: namelist = [node] result += node + ' ' phi_query = inference.query(variables=namelist, evidence=evidence2, show_progress=False).values result += str(phi_query) + '\n' print(result)
def factors(): """ initialise the initial factor """ phi = dict() # marginal on A phi['a'] = TabularCPD(variable='a', variable_card=2, values=np.array([[0.05, 0.95]])) #CPD on B|A phi['ab'] = TabularCPD('b', 2, np.array([[0.1, 0.9], [0.2, 0.8]]).T, ['a'], [2]) ##CPD ON E|A #phi['ae'] = DiscreteFactor(['a','e'],[2,2], np.array([[0.3, # 0.7],[0.4,0.6]])) phi['ae'] = TabularCPD('e', 2, np.array([[0.3, 0.4], [0.7, 0.6]]), ['a'], [2]) ##CPD ON c|b #phi['bc'] = DiscreteFactor(['b','c'],[2,2], np.array([[0.5, # 0.5],[0.6,0.4]])) phi['bc'] = TabularCPD('c', 2, np.array([[0.5, 0.6], [0.5, 0.4]]), ['b'], [2]) #CPD on D|B,c A = np.array([[[0.7, 0.3], [0.8, 0.2]], [[0.9, 0.1], [0.99, 0.01]]]).T A = A.reshape(A.shape[0], -1) phi['ced'] = TabularCPD('d', 2, A, ['c', 'e'], [2, 2]) return phi
def setUp(self): # A test Bayesian model diff_cpd = TabularCPD('diff', 2, [[0.6], [0.4]]) intel_cpd = TabularCPD('intel', 2, [[0.7], [0.3]]) grade_cpd = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5], [0.4, 0.25, 0.08, 0.3], [0.3, 0.7, 0.02, 0.2]], evidence=['diff', 'intel'], evidence_card=[2, 2]) self.bayesian_model = BayesianModel() self.bayesian_model.add_nodes_from(['diff', 'intel', 'grade']) self.bayesian_model.add_edges_from([('diff', 'grade'), ('intel', 'grade')]) self.bayesian_model.add_cpds(diff_cpd, intel_cpd, grade_cpd) # A test Markov model self.markov_model = MarkovModel([('A', 'B'), ('C', 'B'), ('B', 'D')]) factor_ab = DiscreteFactor(['A', 'B'], [2, 3], [1, 2, 3, 4, 5, 6]) factor_cb = DiscreteFactor(['C', 'B'], [4, 3], [3, 1, 4, 5, 7, 8, 1, 3, 10, 4, 5, 6]) factor_bd = DiscreteFactor(['B', 'D'], [3, 2], [5, 7, 2, 1, 9, 3]) self.markov_model.add_factors(factor_ab, factor_cb, factor_bd) self.gibbs = GibbsSampling(self.bayesian_model)
def test_check_model2(self): cpd_s = TabularCPD( "s", 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=["i"], evidence_card=[2], ) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=["d", "i"], evidence_card=[2, 2], ) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=["g"], evidence_card=[2], ) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l)
def init_model(self, ebunch, cpdtables, plot=False, pgm_id='pgm'): """ Creo el PGM usando PGMPY. Por ahora es un modelo Bayesiano. Recibe la listas de aristas y las tablas CPD. Args: ebunch (list) : una lista de que contiene a las aristas del grafo. cpdtables (list) : un arreglo de diccionarios donde cada diccionario contiene la información necesaria para crear una tabla de probabilidad. plot (boolean) : una bandera para saber si guardo una imagen del grafo usando matplotlib. graph_id (str): el nombre para identificar el grafo. """ for cpdtable in cpdtables: self.variables_dict[cpdtable['variable']] = [\ _ for _ in range(cpdtable['variable_card'])] table = TabularCPD(variable=cpdtable['variable'],\ variable_card=cpdtable['variable_card'],\ values=cpdtable['values'],\ evidence_card=cpdtable.get('evidence_card'),\ evidence=cpdtable.get('evidence')) if cpdtable.get('evidence'): table.reorder_parents(sorted(cpdtable.get('evidence'))) self.pgmodel.add_cpds(table) if not self.pgmodel.check_model(): raise ValueError("Error with CPDTs") self.update_infer_system() if plot: self.save_pgm_as_img(pgm_id)
def basic2agent_tie_break() -> MACID: macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 0: { 'D': ['D1'], 'U': ['U1'] }, 1: { 'D': ['D2'], 'U': ['U2'] } }) cpd_d1 = DecisionDomain('D1', [0, 1]) cpd_d2 = DecisionDomain('D2', [0, 1]) cpd_u1 = TabularCPD('U1', 6, np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0], [1, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]]), evidence=['D1', 'D2'], evidence_card=[2, 2]) cpd_u2 = TabularCPD('U2', 6, np.array([[0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]), evidence=['D1', 'D2'], evidence_card=[2, 2]) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def test_check_model(self): cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_s = TabularCPD('s', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertRaises(ValueError, self.G.check_model) cpd_d = TabularCPD('d', 2, values=[[0.8, 0.2]]) cpd_i = TabularCPD('i', 2, values=[[0.7, 0.3]]) self.G.add_cpds(cpd_d, cpd_i) self.assertTrue(self.G.check_model())
def setUp(self): self.bayesian = BayesianModel([('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')]) a_cpd = TabularCPD('a', 2, [[0.4, 0.6]]) b_cpd = TabularCPD('b', 2, [[0.2, 0.4], [0.8, 0.6]], evidence=['a'], evidence_card=[2]) c_cpd = TabularCPD('c', 2, [[0.1, 0.2], [0.9, 0.8]], evidence=['b'], evidence_card=[2]) d_cpd = TabularCPD('d', 2, [[0.4, 0.3], [0.6, 0.7]], evidence=['c'], evidence_card=[2]) e_cpd = TabularCPD('e', 2, [[0.3, 0.2], [0.7, 0.8]], evidence=['d'], evidence_card=[2]) self.bayesian.add_cpds(a_cpd, b_cpd, c_cpd, d_cpd, e_cpd) self.markov = MarkovModel([('a', 'b'), ('b', 'd'), ('a', 'c'), ('c', 'd')]) factor_1 = DiscreteFactor(['a', 'b'], [2, 2], np.array([100, 1, 1, 100])) factor_2 = DiscreteFactor(['a', 'c'], [2, 2], np.array([40, 30, 100, 20])) factor_3 = DiscreteFactor(['b', 'd'], [2, 2], np.array([1, 100, 100, 1])) factor_4 = DiscreteFactor(['c', 'd'], [2, 2], np.array([60, 60, 40, 40])) self.markov.add_factors(factor_1, factor_2, factor_3, factor_4)
def buildBN(): burglary_model = BayesianModel([('Burglary', 'Alarm'), ('Earthquake', 'Alarm'), ("Alarm", "JohnCalls"), ("Alarm", "MaryCalls")]) cpd_burg = TabularCPD(variable='Burglary', variable_card=2, values=[[.999], [.001]]) # [ P(!B), p(B) ] cpd_earth = TabularCPD(variable='Earthquake', variable_card=2, values=[[.998], [.002]]) # [ P(!E), p(E) ] cpd_alarm = TabularCPD( variable='Alarm', variable_card=2, values=[ [.999, .06, .71, .05], # P(!A|!E,!B), P(!A|!E,B), P(!A|E,!B), P(!A|E,B) [.001, .94, .29, .95] ], # P(A|!E,!B), P(A|!E,B), P(A|E,!B), P(A|E,B) evidence=['Earthquake', 'Burglary'], evidence_card=[2, 2]) cpd_john = TabularCPD( variable="JohnCalls", variable_card=2, values=[[.95, .10], [.05, .90]], # P(!J|!A), P(!J|A) evidence=['Alarm'], evidence_card=[2]) # P(J|!A), P(J|A) cpd_mary = TabularCPD( variable="MaryCalls", variable_card=2, values=[[.99, .30], [.01, .70]], # P(!M|!A), P(!M|A) evidence=['Alarm'], evidence_card=[2]) # P(M|!A), P(M|A) burglary_model.add_cpds(cpd_burg, cpd_earth, cpd_alarm, cpd_john, cpd_mary) # print(burglary_model.check_model()) # print(burglary_model.get_independencies()) # print(burglary_model.edges()) # print(burglary_model.get_cpds()) # Doing exact inference using Variable Elimination burglary_infer = VariableElimination(burglary_model) # using D-interference to determine conditional dependence of B and E given A is observed # print(burglary_model.is_active_trail('Burglary', 'Earthquake')) # print(burglary_model.is_active_trail('Burglary', 'Earthquake', observed=['Alarm'])) # print(burglary_infer.query(variables=['JohnCalls'], joint=False, evidence={'Earthquake': 0})['JohnCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 0})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'Burglary': 1, 'Earthquake': 1})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1})['MaryCalls']) # print(burglary_infer.query(variables=['MaryCalls'], joint=False, evidence={'JohnCalls': 1, 'Burglary': 0,"Earthquake": 0})['MaryCalls']) return burglary_infer
def buildNet(data, conn): model = BayesianModel(data) checkedSymp = list() #Lista dei sintomi già visitati ed aggiunti alla rete checkedDis = list() #Lista delle malattie già aggiunte alla rete #Costruzione dei nodi parents della rete for t in data: if t[0] not in checkedSymp: cpd = TabularCPD(variable=t[0], variable_card=2, values=[[0.5, 0.5]]) checkedSymp.append(t[0]) model.add_cpds(cpd) #Costruzione dei nodi figli, collegandoli ai rispettivi parent for t in data: if t[1] not in checkedDis: sym_list = SQL.symList( conn, t[1]) #Ricavo la lista di sintomi collegati alla malattia sym_list_length = len(sym_list) mat = numberOfSons(conn, sym_list) arr = [] for i in range(0, len(mat)): arr.append(mat[i][1]) print(arr) cpd = TabularCPD(variable=t[1], variable_card=sym_list_length, values=np.full((1, sym_list_length), 1 / sym_list_length), evidence=sym_list, evidence_card=arr) break checkedDis.append(t[1]) model.add_cpds(cpd) return model
def basic2agent_tie_break() -> MACID: macid = MACID( [("D1", "D2"), ("D1", "U1"), ("D1", "U2"), ("D2", "U2"), ("D2", "U1")], agent_decisions={ 0: ["D1"], 1: ["D2"] }, agent_utilities={ 0: ["U1"], 1: ["U2"] }, ) cpd_d1 = DecisionDomain("D1", [0, 1]) cpd_d2 = DecisionDomain("D2", [0, 1]) cpd_u1 = TabularCPD( "U1", 6, np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0], [1, 0, 1, 0], [0, 0, 0, 0], [0, 0, 0, 0]]), evidence=["D1", "D2"], evidence_card=[2, 2], ) cpd_u2 = TabularCPD( "U2", 6, np.array([[0, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0]]), evidence=["D1", "D2"], evidence_card=[2, 2], ) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def __setitem__(self, variable: str, cpd: TabularCPD, sync_state_names: bool = True) -> None: # Update the keys if variable in self.keys(): self.__delitem__(variable) super().__setitem__(variable, cpd) # If the CPD can be initialized, try doing so. If it fails, do nothing if isinstance(cpd, StochasticFunctionCPD): try: cpd.initialize_tabular_cpd(self.cbn) except ParentsNotReadyException: return # add cpd to BayesianModel, and update domain dictionary BayesianModel.add_cpds(self.cbn, cpd) old_domain = self.domain.get(variable, None) self.domain[variable] = cpd.state_names[variable] # if the domain has changed, update all descendants, and sync the state_names if not (old_domain and old_domain == self.domain[variable]): for child in self.cbn.get_children(variable): if child in self.keys(): self.__setitem__( child, self[child], sync_state_names=False) # type: ignore if sync_state_names: self.sync_state_names()
def estimate_cpd(self, node): """ Method to estimate the CPD for a given variable. Parameters ---------- node: int, string (any hashable python object) The name of the variable for which the CPD is to be estimated. Returns ------- CPD: TabularCPD Examples -------- >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import MaximumLikelihoodEstimator >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) >>> model = BayesianModel([('A', 'C'), ('B', 'C')]) >>> cpd_A = MaximumLikelihoodEstimator(model, data).estimate_cpd('A') >>> print(cpd_A) ╒══════╤══════════╕ │ A(0) │ 0.666667 │ ├──────┼──────────┤ │ A(1) │ 0.333333 │ ╘══════╧══════════╛ >>> cpd_C = MaximumLikelihoodEstimator(model, data).estimate_cpd('C') >>> print(cpd_C) ╒══════╤══════╤══════╤══════╤══════╕ │ A │ A(0) │ A(0) │ A(1) │ A(1) │ ├──────┼──────┼──────┼──────┼──────┤ │ B │ B(0) │ B(1) │ B(0) │ B(1) │ ├──────┼──────┼──────┼──────┼──────┤ │ C(0) │ 0.0 │ 0.0 │ 1.0 │ 0.5 │ ├──────┼──────┼──────┼──────┼──────┤ │ C(1) │ 1.0 │ 1.0 │ 0.0 │ 0.5 │ ╘══════╧══════╧══════╧══════╧══════╛ """ state_counts = self.state_counts(node) # if a column contains only `0`s (no states observed for some configuration # of parents' states) fill that column uniformly instead state_counts.ix[:, (state_counts == 0).all()] = 1 parents = sorted(self.model.get_parents(node)) parents_cardinalities = [len(self.state_names[parent]) for parent in parents] node_cardinality = len(self.state_names[node]) cpd = TabularCPD(node, node_cardinality, np.array(state_counts), evidence=parents, evidence_card=parents_cardinalities, state_names=self.state_names) cpd.normalize() return cpd
def test_reduce_cpd_statename(self): cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 'high')]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd.reduce([('diff', 0)]) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 'high')], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]])) cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]], evidence=['diff', 'intel'], evidence_card=[2, 3], state_names=self.sn2) cpd = cpd.reduce([('diff', 0)], inplace=False) self.assertEqual(cpd.variable, 'grade') self.assertEqual(cpd.variables, ['grade', 'intel']) np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.8, 0.8, 0.8]]))
def estimate_cpd(self, node, prior_type='BDeu', pseudo_counts=[], equivalent_sample_size=5): """ Method to estimate the CPD for a given variable. Parameters ---------- node: int, string (any hashable python object) The name of the variable for which the CPD is to be estimated. prior_type: 'dirichlet', 'BDeu', 'K2', string indicting which type of prior to use for the model parameters. - If 'prior_type' is 'dirichlet', the following must be provided: 'pseudo_counts' = dirichlet hyperparameters; a list or dict with a "virtual" count for each variable state. The virtual counts are added to the actual state counts found in the data. (if a list is provided, a lexicographic ordering of states is assumed) - If 'prior_type' is 'BDeu', then an 'equivalent_sample_size' must be specified instead of 'pseudo_counts'. This is equivalent to 'prior_type=dirichlet' and using uniform 'pseudo_counts' of `equivalent_sample_size/(node_cardinality*np.prod(parents_cardinalities))`. - A prior_type of 'K2' is a shorthand for 'dirichlet' + setting every pseudo_count to 1, regardless of the cardinality of the variable. Returns ------- CPD: TabularCPD Examples -------- >>> import pandas as pd >>> from pgmpy.models import BayesianModel >>> from pgmpy.estimators import BayesianEstimator >>> data = pd.DataFrame(data={'A': [0, 0, 1], 'B': [0, 1, 0], 'C': [1, 1, 0]}) >>> model = BayesianModel([('A', 'C'), ('B', 'C')]) >>> estimator = BayesianEstimator(model, data) >>> cpd_C = estimator.estimate_cpd('C', prior_type="dirichlet", pseudo_counts=[1, 2]) >>> print(cpd_C) ╒══════╤══════╤══════╤══════╤════════════════════╕ │ A │ A(0) │ A(0) │ A(1) │ A(1) │ ├──────┼──────┼──────┼──────┼────────────────────┤ │ B │ B(0) │ B(1) │ B(0) │ B(1) │ ├──────┼──────┼──────┼──────┼────────────────────┤ │ C(0) │ 0.25 │ 0.25 │ 0.5 │ 0.3333333333333333 │ ├──────┼──────┼──────┼──────┼────────────────────┤ │ C(1) │ 0.75 │ 0.75 │ 0.5 │ 0.6666666666666666 │ ╘══════╧══════╧══════╧══════╧════════════════════╛ """ node_cardinality = len(self.state_names[node]) parents = sorted(self.model.get_parents(node)) parents_cardinalities = [len(self.state_names[parent]) for parent in parents] if prior_type == 'K2': pseudo_counts = [1] * node_cardinality elif prior_type == 'BDeu': alpha = float(equivalent_sample_size) / (node_cardinality * np.prod(parents_cardinalities)) pseudo_counts = [alpha] * node_cardinality elif prior_type == 'dirichlet': if not len(pseudo_counts) == node_cardinality: raise ValueError("'pseudo_counts' should have length {0}".format(node_cardinality)) if isinstance(pseudo_counts, dict): pseudo_counts = sorted(pseudo_counts.values()) else: raise ValueError("'prior_type' not specified") state_counts = self.state_counts(node) bayesian_counts = (state_counts.T + pseudo_counts).T cpd = TabularCPD(node, node_cardinality, np.array(bayesian_counts), evidence=parents, evidence_card=parents_cardinalities, state_names=self.state_names) cpd.normalize() return cpd