# Cancer_1 | 0.97 | 0.95 | 0.999 | 0.98 # # 以0.98所在的单元格为例,Smoker_1代表吸烟,Pullution_1代表有污染,Cancer_1代表患癌症,即在吸烟且环境有污染的情况下,患癌症概率为0.98。其他单元格阅读方式相同。这样,在知道任意情况的条件概率分布表的情况下,就能建立对应结点的参数。 # In[13]: # 利用add_cpds函数将参数与图连接起来 cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # 检查模型是否合理,True代表合理 cancer_model.check_model() # In[14]: # is_active_trail函数检验两个结点之间是否有有向连接 cancer_model.is_active_trail('Pollution', 'Smoker') # In[15]: # 在is_active_trail函数中,设置observed参数,表示两个结点能否通过observed结点实现连接 cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer']) # # 5 实验练习 # 下面我们将利用一个更为复杂的Bayes网络,通过Pgmpy模块实现计算。网络图如下图: # <img src="./Img/fig7.png" width = "500" height = "300" alt="Aisa" align=center /> # 首先导入相应的模块和数据集: # >**注意:**pgmpy模块中,PGM图可以通过bif格式进行存储和阅读,这里已经将上述PGM以asia.bif储存好。 # In[16]:
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual( sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual( sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, values=np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, values=np.random.rand(2, 4), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_l = TabularCPD('l', 2, values=np.random.rand(2, 2), evidence=['g'], evidence_card=[2]) cpd_s = TabularCPD('s', 2, values=np.random.rand(2, 2), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def test_check_model(self): cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) cpd_s = TabularCPD('s', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=[2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 's'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['l'], evidence_card=[2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=['d'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, values=np.array( [[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]), evidence=['g', 'd', 'i'], evidence_card=[2, 2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD('s', 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=['i'], evidence_card=2) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=['d', 'i'], evidence_card=[2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=['g'], evidence_card=[2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
# --- Check whether the model and all the associated CPDs are consistent model.check_model() " True " # --- if any wrong/additional cpds model.remove_cpds('wrong_cpds') model.get_cpds() " 'wrong cpd gone' " -------------- P = P(A,R,J,G,L,Q) = P(A) P(R) P(J|A,R) P(Q|R) P(L|G,J) # ----- Active Trail model.is_active_trail('accident', 'rain') " False " model.is_active_trail('accident', 'rain', observed='traffic_jam') " True "
# ('activities', 'studytime'), # ('freetime', 'goout'), # ('failures', 'absences'), # ('freetime', 'activities'), # ('studytime', 'freetime'), ] model = BayesianModel(bnmodel) # To test any implied condition in the network, the method `is_active_trail` can be used. Next line tests for # the condition (Education _|_ MaritalStatus | Age) var1 = 'subject' var2 = 'G3' observed = [] active = model.is_active_trail(var1, var2, observed=observed) # is dependent # The `get_independencies` method lists all the implied conditions in the model. #model.get_independencies() # To perform chi-square test on any of the conditional independencies, the method `test_independence` defined # above can be used. To test for (Education _|_ HoursPerWeek | 'Age', 'Immigrant', 'Sex') independent, dependent, questionable = [], [], [] for (var1, var2) in bnmodel: if var1 == 'IQ' or var2 == 'IQ': continue chi_stat, p_value, dof, RMSEA = test_independence(df=df, var1=var1, var2=var2,
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def test_check_model(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) cpd_s = TabularCPD('s', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 's'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['l'], 2) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.8, 0.7]]), ['d'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4]]), ['g', 'd', 'i'], [2, 2, 2]) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD('s', 2, np.array([[0.5, 0.3], [0.8, 0.7]]), ['i'], 2) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD('g', 2, np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), ['d', 'i'], [2, 2]) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD('l', 2, np.array([[0.2, 0.3], [0.1, 0.7]]), ['g'], 2) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def test_get_cpds(self): cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d').variable, 'd') def test_get_cpds1(self): self.model = BayesianModel([('A', 'AB')]) cpd_a = TabularCPD('A', 2, np.random.rand(2, 1)) cpd_ab = TabularCPD('AB', 2, np.random.rand(2, 2), evidence=['A'], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds('A').variable, 'A') self.assertEqual(self.model.get_cpds('AB').variable, 'AB') def test_add_single_cpd(self): from pgmpy.factors import TabularCPD cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): from pgmpy.factors import TabularCPD cpd_d = TabularCPD('d', 2, np.random.rand(2, 1)) cpd_i = TabularCPD('i', 2, np.random.rand(2, 1)) cpd_g = TabularCPD('g', 2, np.random.rand(2, 4), ['d', 'i'], [2, 2]) cpd_l = TabularCPD('l', 2, np.random.rand(2, 2), ['g'], 2) cpd_s = TabularCPD('s', 2, np.random.rand(2, 2), ['i'], 2) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds('d'), cpd_d) self.assertEqual(self.G.get_cpds('i'), cpd_i) self.assertEqual(self.G.get_cpds('g'), cpd_g) self.assertEqual(self.G.get_cpds('l'), cpd_l) self.assertEqual(self.G.get_cpds('s'), cpd_s) def tearDown(self): del self.G
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([("d", "g"), ("i", "g"), ("g", "l"), ("i", "s")]) self.G2 = DAG([("d", "g"), ("i", "g"), ("g", "l"), ("i", "s")]) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G2.active_trail_nodes("d")["d"]), ["d", "g", "l"]) self.assertEqual(sorted(self.G2.active_trail_nodes("i")["i"]), ["g", "i", "l", "s"]) self.assertEqual(sorted(self.G2.active_trail_nodes(["d", "i"])["d"]), ["d", "g", "l"]) def test_active_trail_nodes_args(self): self.assertEqual( sorted(self.G2.active_trail_nodes(["d", "l"], observed="g")["d"]), ["d", "i", "s"], ) self.assertEqual( sorted(self.G2.active_trail_nodes(["d", "l"], observed="g")["l"]), ["l"]) self.assertEqual( sorted(self.G2.active_trail_nodes("s", observed=["i", "l"])["s"]), ["s"]) self.assertEqual( sorted(self.G2.active_trail_nodes("s", observed=["d", "l"])["s"]), ["g", "i", "s"], ) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail("d", "l")) self.assertTrue(self.G.is_active_trail("g", "s")) self.assertFalse(self.G.is_active_trail("d", "i")) self.assertTrue(self.G.is_active_trail("d", "i", observed="g")) self.assertFalse(self.G.is_active_trail("d", "l", observed="g")) self.assertFalse(self.G.is_active_trail("i", "l", observed="g")) self.assertTrue(self.G.is_active_trail("d", "i", observed="l")) self.assertFalse(self.G.is_active_trail("g", "s", observed="i")) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail("d", "s")) self.assertTrue(self.G.is_active_trail("s", "l")) self.assertTrue(self.G.is_active_trail("d", "s", observed="g")) self.assertFalse(self.G.is_active_trail("s", "l", observed="g")) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail("s", "l", "i")) self.assertFalse(self.G.is_active_trail("s", "l", "g")) self.assertTrue(self.G.is_active_trail("d", "s", "l")) self.assertFalse(self.G.is_active_trail("d", "s", ["i", "l"])) def test_get_cpds(self): cpd_d = TabularCPD("d", 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD("i", 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD( "g", 2, values=np.random.rand(2, 4), evidence=["d", "i"], evidence_card=[2, 2], ) cpd_l = TabularCPD("l", 2, values=np.random.rand(2, 2), evidence=["g"], evidence_card=[2]) cpd_s = TabularCPD("s", 2, values=np.random.rand(2, 2), evidence=["i"], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds("d").variable, "d") def test_get_cpds1(self): self.model = BayesianModel([("A", "AB")]) cpd_a = TabularCPD("A", 2, values=np.random.rand(2, 1)) cpd_ab = TabularCPD("AB", 2, values=np.random.rand(2, 2), evidence=["A"], evidence_card=[2]) self.model.add_cpds(cpd_a, cpd_ab) self.assertEqual(self.model.get_cpds("A").variable, "A") self.assertEqual(self.model.get_cpds("AB").variable, "AB") self.assertRaises(ValueError, self.model.get_cpds, "B") self.model.add_node("B") self.assertIsNone(self.model.get_cpds("B")) def test_add_single_cpd(self): cpd_s = TabularCPD("s", 2, np.random.rand(2, 2), ["i"], [2]) self.G.add_cpds(cpd_s) self.assertListEqual(self.G.get_cpds(), [cpd_s]) def test_add_multiple_cpds(self): cpd_d = TabularCPD("d", 2, values=np.random.rand(2, 1)) cpd_i = TabularCPD("i", 2, values=np.random.rand(2, 1)) cpd_g = TabularCPD( "g", 2, values=np.random.rand(2, 4), evidence=["d", "i"], evidence_card=[2, 2], ) cpd_l = TabularCPD("l", 2, values=np.random.rand(2, 2), evidence=["g"], evidence_card=[2]) cpd_s = TabularCPD("s", 2, values=np.random.rand(2, 2), evidence=["i"], evidence_card=[2]) self.G.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s) self.assertEqual(self.G.get_cpds("d"), cpd_d) self.assertEqual(self.G.get_cpds("i"), cpd_i) self.assertEqual(self.G.get_cpds("g"), cpd_g) self.assertEqual(self.G.get_cpds("l"), cpd_l) self.assertEqual(self.G.get_cpds("s"), cpd_s) def test_check_model(self): cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=["d", "i"], evidence_card=[2, 2], ) cpd_s = TabularCPD( "s", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["i"], evidence_card=[2], ) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["g"], evidence_card=[2], ) self.G.add_cpds(cpd_g, cpd_s, cpd_l) self.assertRaises(ValueError, self.G.check_model) cpd_d = TabularCPD("d", 2, values=[[0.8, 0.2]]) cpd_i = TabularCPD("i", 2, values=[[0.7, 0.3]]) self.G.add_cpds(cpd_d, cpd_i) self.assertTrue(self.G.check_model()) def test_check_model1(self): cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["i"], evidence_card=[2], ) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=["d", "s"], evidence_card=[2, 2], ) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["l"], evidence_card=[2], ) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3], [0.8, 0.7]]), evidence=["d"], evidence_card=[2], ) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4]]), evidence=["d", "i"], evidence_card=[2, 2], ) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) cpd_l = TabularCPD( "l", 2, values=np.array([ [0.2, 0.3, 0.4, 0.6, 0.2, 0.3, 0.4, 0.6], [0.8, 0.7, 0.6, 0.4, 0.8, 0.7, 0.6, 0.4], ]), evidence=["g", "d", "i"], evidence_card=[2, 2, 2], ) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def test_check_model2(self): cpd_s = TabularCPD( "s", 2, values=np.array([[0.5, 0.3], [0.8, 0.7]]), evidence=["i"], evidence_card=[2], ) self.G.add_cpds(cpd_s) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_s) cpd_g = TabularCPD( "g", 2, values=np.array([[0.2, 0.3, 0.4, 0.6], [0.3, 0.7, 0.6, 0.4]]), evidence=["d", "i"], evidence_card=[2, 2], ) self.G.add_cpds(cpd_g) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_g) cpd_l = TabularCPD( "l", 2, values=np.array([[0.2, 0.3], [0.1, 0.7]]), evidence=["g"], evidence_card=[2], ) self.G.add_cpds(cpd_l) self.assertRaises(ValueError, self.G.check_model) self.G.remove_cpds(cpd_l) def tearDown(self): del self.G
class TestBayesianModelCPD(unittest.TestCase): def setUp(self): self.G = BayesianModel([('d', 'g'), ('i', 'g'), ('g', 'l'), ('i', 's')]) # self.G.set_states( # {'d': ['easy', 'hard'], 'g': ['A', 'B', 'C'], 'i': ['dumb', 'smart'], 's': ['bad', 'avg', 'good'], # 'l': ['yes', 'no']}) # def test_set_cpd(self): # self.G.set_cpd('g', [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]]) # self.assertIsInstance(self.G.node['g']['_cpd'], bm.CPD.TabularCPD) # np.testing.assert_array_equal(self.G.node['g']['_cpd'].cpd, np.array(( # [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]]))) # # def test_get_cpd(self): # self.G.set_cpd('g', [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]]) # np.testing.assert_array_equal(self.G.get_cpd('g'), np.array(( # [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], # [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]]))) # def test_set_observations_single_state_reset_false(self): # self.G.set_observations({'d': 'easy'}) # for state in self.G.node['d']['_states']: # if state['name'] == 'easy': # break # self.assertTrue(state['observed_status']) # self.assertTrue(self.G.node['d']['_observed']) # # def test_set_observation_multiple_state_reset_false(self): # self.G.set_observations({'d': 'easy', 'g': 'A'}) # for state in self.G.node['d']['_states']: # if state['name'] == 'easy': # break # self.assertTrue(state['observed_status']) # self.assertTrue(self.G.node['d']['_observed']) # for state in self.G.node['g']['_states']: # if state['name'] == 'A': # break # self.assertTrue(state['observed_status']) # self.assertTrue(self.G.node['g']['_observed']) # # def test_set_observation_multiple_state_reset_false_not_found(self): # self.assertRaises(ValueError, self.G.set_observations, {'d': 'unknow_state'}) # # def test_reset_observations_single_state(self): # self.G.reset_observations({'d': 'easy'}) # # TODO change this as the function has changed # self.G.reset_observations({'d': 'easy'}) # for state in self.G.node['d']['_states']: # if state['name'] == 'easy': # break # self.assertFalse(state['observed_status']) # self.assertFalse(self.G.node['g']['_observed']) # # def test_reset_observations_multiple_state(self): # self.G.set_observations({'d': 'easy', 'g': 'A', 'i': 'dumb'}) # self.G.reset_observations({'d': 'easy', 'i': 'dumb'}) # for state in self.G.node['d']['_states']: # if state['name'] == 'easy': # break # self.assertFalse(state['observed_status']) # self.assertFalse(self.G.node['d']['_observed']) # for state in self.G.node['g']['_states']: # if state['name'] == 'A': # break # self.assertTrue(state['observed_status']) # self.assertTrue(self.G.node['g']['_observed']) # # def test_reset_observation_node_none(self): # self.G.set_observations({'d': 'easy', 'g': 'A'}) # self.G.reset_observations() # self.assertFalse(self.G.node['d']['_observed']) # for state in self.G.node['d']['_states']: # self.assertFalse(state['observed_status']) # self.assertFalse(self.G.node['g']['_observed']) # for state in self.G.node['g']['_states']: # self.assertFalse(state['observed_status']) # # def test_reset_observations_node_not_none(self): # self.G.set_observations({'d': 'easy', 'g': 'A'}) # self.G.reset_observations('d') # self.assertFalse(self.G.node['d']['_observed']) # for state in self.G.node['d']['_states']: # self.assertFalse(state['observed_status']) # self.assertTrue(self.G.node['g']['_observed']) # for state in self.G.node['g']['_states']: # if state['name'] == 'A': # self.assertTrue(state['observed_status']) # else: # self.assertFalse(state['observed_status']) # # def test_reset_observations_node_error(self): # self.assertRaises(KeyError, self.G.reset_observations, 'j') # # def test_is_observed(self): # self.G.set_observations({'d': 'easy'}) # self.assertTrue(self.G.is_observed('d')) # self.assertFalse(self.G.is_observed('i')) # # # def test_get_ancestros_observation(self): # # self.G.set_observations({'d': 'easy', 'g': 'A'}) # # self.assertListEqual(list(self.G._get_ancestors_observation(['d'])), []) # # self.assertListEqual(list(sorted(self.G._get_ancestors_observation(['d', 'g']))), ['d', 'i']) # # def test_get_observed_list(self): # self.G.set_observations({'d': 'hard', 'i': 'smart'}) # self.assertListEqual(sorted(self.G._get_observed_list()), ['d', 'i']) def test_active_trail_nodes(self): self.assertEqual(sorted(self.G.active_trail_nodes('d')), ['d', 'g', 'l']) self.assertEqual(sorted(self.G.active_trail_nodes('i')), ['g', 'i', 'l', 's']) def test_active_trail_nodes_args(self): self.assertEqual(sorted(self.G.active_trail_nodes('d', observed='g')), ['d', 'i', 's']) self.assertEqual(sorted(self.G.active_trail_nodes('l', observed='g')), ['l']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['i', 'l'])), ['s']) self.assertEqual(sorted(self.G.active_trail_nodes('s', observed=['d', 'l'])), ['g', 'i', 's']) def test_is_active_trail_triplets(self): self.assertTrue(self.G.is_active_trail('d', 'l')) self.assertTrue(self.G.is_active_trail('g', 's')) self.assertFalse(self.G.is_active_trail('d', 'i')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='g')) self.assertFalse(self.G.is_active_trail('d', 'l', observed='g')) self.assertFalse(self.G.is_active_trail('i', 'l', observed='g')) self.assertTrue(self.G.is_active_trail('d', 'i', observed='l')) self.assertFalse(self.G.is_active_trail('g', 's', observed='i')) def test_is_active_trail(self): self.assertFalse(self.G.is_active_trail('d', 's')) self.assertTrue(self.G.is_active_trail('s', 'l')) self.assertTrue(self.G.is_active_trail('d', 's', observed='g')) self.assertFalse(self.G.is_active_trail('s', 'l', observed='g')) def test_is_active_trail_args(self): self.assertFalse(self.G.is_active_trail('s', 'l', 'i')) self.assertFalse(self.G.is_active_trail('s', 'l', 'g')) self.assertTrue(self.G.is_active_trail('d', 's', 'l')) self.assertFalse(self.G.is_active_trail('d', 's', ['i', 'l'])) def tearDown(self): del self.G
variable_card=2, values=[[0.9], [0.1]]) cpd_smoke = TabularCPD(variable='Smoker', variable_card=2, values=[[0.3], [0.7]]) cpd_cancer = TabularCPD(variable='Cancer', variable_card=2, values=[[0.03, 0.05, 0.001, 0.02], [0.97, 0.95, 0.999, 0.98]], evidence=['Smoker', 'Pollution'], evidence_card=[2, 2]) cpd_xray = TabularCPD(variable='Xray', variable_card=2, values=[[0.9, 0.2], [0.1, 0.8]], evidence=['Cancer'], evidence_card=[2]) cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2, values=[[0.65, 0.3], [0.35, 0.7]], evidence=['Cancer'], evidence_card=[2]) # Associating the parameters with the model structure. cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp) # Checking if the cpds are valid for the model. cancer_model.check_model() # Doing some simple queries on the network cancer_model.is_active_trail('Pollution', 'Smoker')