def setUp(self): # create a discrete network G = bayesnet.BNet('Water Sprinkler Bayesian Network') c, s, r, w = [ G.add_v(bayesnet.BVertex(nm, True, 2)) for nm in 'c s r w'.split() ] for ep in [(c, r), (c, s), (r, w), (s, w)]: G.add_e(graph.DirEdge(len(G.e), *ep)) G.InitDistributions() c.setDistributionParameters([0.5, 0.5]) s.setDistributionParameters([0.5, 0.9, 0.5, 0.1]) r.setDistributionParameters([0.8, 0.2, 0.2, 0.8]) w.distribution[:, 0, 0] = [0.99, 0.01] w.distribution[:, 0, 1] = [0.1, 0.9] w.distribution[:, 1, 0] = [0.1, 0.9] w.distribution[:, 1, 1] = [0.0, 1.0] self.c = c self.s = s self.r = r self.w = w self.BNet = G # create a simple continuous network G2 = bayesnet.BNet('Gaussian Bayesian Network') a, b = [ G2.add_v(bayesnet.BVertex(nm, False, 1)) for nm in 'a b'.split() ] for ep in [(a, b)]: G2.add_e(graph.DirEdge(len(G2.e), *ep)) G2.InitDistributions() a.setDistributionParameters(mu=1.0, sigma=1.0) b.setDistributionParameters(mu=1.0, sigma=1.0, wi=2.0) self.a = a self.b = b self.G2 = G2
def setUp(self): # create a discrete network G = bayesnet.BNet('Water Sprinkler Bayesian Network') c, s, r, w = [G.add_v(bayesnet.BVertex(nm, True, 2)) for \ nm in 'c s r w'.split()] for ep in [(c, r), (c, s), (r, w), (s, w)]: G.add_e(graph.DirEdge(len(G.e), *ep)) G.InitDistributions() c.setDistributionParameters([0.5, 0.5]) s.setDistributionParameters([0.5, 0.9, 0.5, 0.1]) r.setDistributionParameters([0.8, 0.2, 0.2, 0.8]) w.distribution[:, 0, 0] = [0.99, 0.01] w.distribution[:, 0, 1] = [0.1, 0.9] w.distribution[:, 1, 0] = [0.1, 0.9] w.distribution[:, 1, 1] = [0.0, 1.0] self.c = c self.s = s self.r = r self.w = w self.BNet = G
def setUp(self): # create the network G = bayesnet.BNet('Asia Bayesian Network') visit, smoking, tuberculosis, bronchitis, lung, ou, Xray, dyspnoea = \ [G.add_v(bayesnet.BVertex( nm, True, 2)) for nm in \ 'visit smoking tuberculosis bronchitis lung ou Xray dyspnoea'.split()] for ep in [(visit,tuberculosis), (tuberculosis, ou), (smoking,lung), \ (lung, ou), (ou, Xray), (smoking, bronchitis), \ (bronchitis, dyspnoea), (ou, dyspnoea)]: G.add_e(graph.DirEdge(len(G.e), *ep)) G.InitDistributions() visit.setDistributionParameters([0.99, 0.01]) tuberculosis.distribution[:, 0] = [0.99, 0.01] tuberculosis.distribution[:, 1] = [0.95, 0.05] smoking.setDistributionParameters([0.5, 0.5]) lung.distribution[:, 0] = [0.99, 0.01] lung.distribution[:, 1] = [0.9, 0.1] ou.distribution[:, 0, 0] = [1, 0] ou.distribution[:, 0, 1] = [0, 1] ou.distribution[:, 1, 0] = [0, 1] ou.distribution[:, 1, 1] = [0, 1] Xray.distribution[:, 0] = [0.95, 0.05] Xray.distribution[:, 1] = [0.02, 0.98] bronchitis.distribution[:, 0] = [0.7, 0.3] bronchitis.distribution[:, 1] = [0.4, 0.6] dyspnoea.distribution[{'bronchitis': 0, 'ou': 0}] = [0.9, 0.1] dyspnoea.distribution[{'bronchitis': 1, 'ou': 0}] = [0.2, 0.8] dyspnoea.distribution[{'bronchitis': 0, 'ou': 1}] = [0.3, 0.7] dyspnoea.distribution[{'bronchitis': 1, 'ou': 1}] = [0.1, 0.9] self.visit = visit self.tuberculosis = tuberculosis self.smoking = smoking self.lung = lung self.ou = ou self.Xray = Xray self.bronchitis = bronchitis self.dyspnoea = dyspnoea self.BNet = G
def setUp(self): G = BNet('Water Sprinkler Bayesian Network') c, s, r, w = [G.add_v(BVertex(name, True, 2)) for name in 'c s r w'.split()] for ep in [(c, r), (c, s), (r, w), (s, w)]: G.add_e(graph.DirEdge(len(G.e), *ep)) ## G.InitCPTs() ## c.setCPT([0.5, 0.5]) ## s.setCPT([0.5, 0.9, 0.5, 0.1]) ## r.setCPT([0.8, 0.2, 0.2, 0.8]) ## w.setCPT([1, 0.1, 0.1, 0.01, 0.0, 0.9, 0.9, 0.99]) G.InitDistributions() c.setDistributionParameters([0.5, 0.5]) s.setDistributionParameters([0.5, 0.9, 0.5, 0.1]) r.setDistributionParameters([0.8, 0.2, 0.2, 0.8]) w.setDistributionParameters([1, 0.1, 0.1, 0.01, 0.0, 0.9, 0.9, 0.99]) self.c = c self.s = s self.r = r self.w = w self.BNet = G
def LearnStruct(self, cases, N, alpha, approx): """Greedy search for optimal structure (all the data in cases are known). It will go through every node of the BNet. At each node, it will delete every outgoing edge, or add every possible edge, or reverse every possible edge. It will compute the BIC score each time and keep the BNet with the highest score. """ G_initial = self.BNet.copy() engine_init = SEMLearningEngine(G_initial) G_best = self.BNet.copy() prec_var_score = 0 invert = {} change = {} for v in self.BNet.all_v: G = copy.deepcopy(engine_init.BNet) edges = copy.deepcopy(G.v[v.name].out_e) temp = {} # delete the outgoing edges while edges: edge = edges.pop(0) node = edge._v[ 1] #node is the child node, the only node for which the cpt table changes dim_init = G_initial.Dimension(node) score_init = engine_init.ScoreBIC(N, dim_init, G_initial, \ G_initial.v[node.name], cases, alpha, approx) self.ChangeStruct('del', edge) #delete the current edge self.SetNewDistribution(engine_init.BNet, node, cases, approx) dim = self.BNet.Dimension(node) score = self.ScoreBIC(N, dim, self.BNet, self.BNet.v[node.name], \ cases, alpha, approx) var_score = score - score_init if var_score > prec_var_score: change = {} invert = {} change[v.name] = node.name print 'deleted:', v.name, node.name, var_score prec_var_score = var_score G_best = self.BNet.copy() self.BNet = G_initial.copy() # Add all possible edges G = copy.deepcopy(engine_init.BNet) nodes = [] for node in G.all_v: if (not (node.name in [vv.name for vv in self.BNet.v[v.name].out_v])) and \ (not (node.name == v.name)): nodes.append(node) while nodes: node = nodes.pop(0) if G.e.keys(): edge = graph.DirEdge(max(G.e.keys()) + 1, \ self.BNet.v[v.name], self.BNet.v[node.name]) else: edge = graph.DirEdge(0, self.BNet.v[v.name], \ self.BNet.v[node.name]) self.ChangeStruct('add', edge) if self.BNet.HasNoCycles(self.BNet.v[node.name]): dim_init = engine_init.BNet.Dimension(node) score_init = engine_init.ScoreBIC(N, dim_init, G_initial, \ G_initial.v[node.name], cases, alpha, approx) self.SetNewDistribution(engine_init.BNet, node, cases, approx) dim = self.BNet.Dimension(node) score = self.ScoreBIC(N, dim, self.BNet, \ self.BNet.v[node.name], cases, \ alpha, approx) var_score = score - score_init if var_score > prec_var_score: change = {} invert = {} change[v.name] = node.name print 'added: ', v.name, node.name, var_score prec_var_score = var_score G_best = self.BNet.copy() self.BNet = G_initial.copy() # Invert all possible edges G = copy.deepcopy(G_initial) edges = copy.deepcopy(G.v[v.name].out_e) while edges: edge = edges.pop(0) node = self.BNet.v[edge._v[1].name] #node is the child node temp[v.name] = node.name if temp not in self.inverted: dim_init1 = G_initial.Dimension(node) score_init1 = engine_init.ScoreBIC(N, dim_init1, G_initial, \ G_initial.v[node.name], cases, alpha, approx) self.ChangeStruct('del', edge) self.SetNewDistribution(engine_init.BNet, node, \ cases, approx) dim1 = self.BNet.Dimension(node) score1 = self.ScoreBIC(N, dim1, self.BNet, \ self.BNet.v[node.name], cases, alpha, approx) G_invert = self.BNet.copy() engine_invert = SEMLearningEngine(G_invert) inverted_edge = graph.DirEdge(max(G.e.keys()) + 1, \ self.BNet.v[node.name], self.BNet.v[v.name]) self.ChangeStruct('add', inverted_edge) if self.BNet.HasNoCycles(self.BNet.v[node.name]): dim_init = G_initial.Dimension(v) score_init = engine_init.ScoreBIC(N, dim_init, \ G_initial, G_initial.v[v.name], cases, \ alpha, approx) self.SetNewDistribution(engine_invert.BNet, v, \ cases, approx) dim = self.BNet.Dimension(v) score = self.ScoreBIC(N, dim, self.BNet, \ self.BNet.v[v.name], cases, alpha, approx) var_score = score1 - score_init1 + score - score_init if var_score > prec_var_score + 5: #+ 5 is to avoid recalculation due to round errors invert = {} change = {} invert[node.name] = v.name print 'inverted:', v.name, node.name, var_score prec_var_score = var_score G_best = self.BNet.copy() self.BNet = G_initial.copy() #self.BNet is the optimal graph structure if prec_var_score == 0: self.converged = True self.BNet = G_best.copy() self.inverted.append(invert) self.changed = [] self.changed.append(change)
for i in range(3): case = cases[3 * i] rand = random.sample(['visit', 'smoking', 'tuberculosis', \ 'bronchitis', 'lung', 'ou', 'Xray', 'dyspnoea'], 1)[0] case[rand] = '?' # create two new bayesian network with the same parameters as self.BNet G1 = bayesnet.BNet('Asia Bayesian Network2') visit, smoking, tuberculosis, bronchitis, lung, ou, Xray, dyspnoea = \ [G1.add_v( bayesnet.BVertex( nm, True, 2 ) ) for nm in \ 'visit smoking tuberculosis bronchitis lung ou Xray dyspnoea'.split()] for ep in [(visit, tuberculosis), (tuberculosis, ou), (smoking, lung), \ (lung, ou), (ou, Xray), (smoking, bronchitis), \ (bronchitis, dyspnoea), (ou, dyspnoea)]: G1.add_e(graph.DirEdge(len(G1.e), *ep)) G1.InitDistributions() ## tuberculosis.distribution[:,0]=[0.99, 0.01] ## tuberculosis.distribution[:,1]=[0.95, 0.05] ## smoking.setDistributionParameters([0.5, 0.5]) ## lung.distribution[:,0]=[0.99, 0.01] ## lung.distribution[:,1]=[0.9, 0.1] ## ou.distribution[:,0,0]=[1, 0] ## ou.distribution[:,0,1]=[0, 1] ## ou.distribution[:,1,0]=[0, 1] ## ou.distribution[:,1,1]=[0, 1] ## Xray.distribution[:,0]=[0.946, 0.054] ## Xray.distribution[:,1]=[0.0235, 0.9765] ## bronchitis.distribution[:,0]=[0.7, 0.3] ## bronchitis.distribution[:,1]=[0.4, 0.6] ## dyspnoea.distribution[{'bronchitis':0,'ou':0}]=[0.907, 0.093]