def recur(sc, temp, number, bn, val, jp): if number != 0: for i in range(2): sc[bn.V[len(bn.V) - number]] = val[i] recur(sc, temp, number - 1, bn, val, jp) else: result = [] p = 1 temp = [] for j in range(len(bn.V)): pa = bn.Vdata[bn.V[j]]['parents'] if pa: fn = TableCPDFactorization(bn) evidence = {} for k in range(len(pa)): evidence[pa[k]] = sc[pa[k]] query = {bn.V[j]: list(sc[bn.V[j]])} result.append(fn.specificquery(query, evidence)) else: if sc[bn.V[j]] == '0': result.append(bn.Vdata[bn.V[j]]['cprob'][0]) else: result.append(bn.Vdata[bn.V[j]]['cprob'][1]) temp.append(sc[bn.V[j]]) p = p * result[j] temp.append(p) jp.append(temp)
def specificquery(self, query, evidence): ''' .. note: Shortcut method to the *specificquery* method in :doc:`tablecpdfactorization` Eliminate all variables except for the ones specified by *query*. Adjust all distributions to reflect *evidence*. Return the entry that matches the exact probability of a specific event, as specified by *query*. Arguments: 1. *query* -- A dict containing (key: value) pairs reflecting (variable: value) that represents what outcome to calculate the probability of. The value of the query is a list of one or more values that can be taken by the variable. 2. *evidence* -- A dict containing (key: value) pairs reflecting (variable: value) evidence that is known about the system. Returns: - the probability that the event (or events) specified will occur, represented as a float between 0 and 1. Note that in this function, queries of the type P((x=A or x=B) and (y=C or y=D)) are permitted. They are executed by formatting the *query* dictionary like so:: { "x": ["A", "B"], "y": ["C", "D"] } ''' # validate if not (hasattr(self, "V") and hasattr(self, "E") and hasattr(self, "Vdata")): raise notloadedError("Bayesian network is missing essential attributes") assert isinstance(query, dict) and isinstance(evidence, dict), "query and evidence must be dicts" for k in query.keys(): assert isinstance(query[k], list), "the values of your query must be lists, even if singletons" # calculate fn = TableCPDFactorization(self) return fn.specificquery(query, evidence)
def infer(self, sensor_evidence, fsm_evidence): # sensor values are always True; their proxy nodes encode the real probability evidence = dict(fsm_evidence) evidence.update({k: "T" for k in sensor_evidence}) # update probability of proxy nodes for sensor, p in sensor_evidence.iteritems(): self.net.Vdata[sensor]["cprob"] = { "['T']": [p, 1 - p], "['F']": [(1 - p), p] } # refactorize fn = TableCPDFactorization(self.net) events = [] for name, output in self.outputs.iteritems(): fn.refresh() query = {} for q in output["query"]: if is_negated(q): query[normalise_name(q)] = ['F'] else: query[normalise_name(q)] = ['T'] prob = result = fn.specificquery(query, evidence) ev = output["event"] formatted_query = " AND ".join(query) # logging.debug("Query p(%s)=%.8f; need p(%s)>%.8f to trigger event %s/%s" % (formatted_query, prob, formatted_query, 1-np.exp(ev["logp"]), ev.get("fsm", None), ev["event"])) logger.info(json.dumps({ \ 'type' : 'query', 'query' : formatted_query, 'value' : '%.8f' % prob, 'threshold' : '%.8f' % (1-np.exp(ev['logp'])), 'fsm' : ev.get("fsm", None), 'event' : ev['event'] })) if prob > (1 - np.exp(ev["logp"])) + self.event_caution: #logging.debug("Fired event %s/%s" % (ev.get("fsm", None), ev["event"])) logger.info( json.dumps({ 'type': 'fire_event', 'fsm': ev.get("fsm", None), 'event': ev['event'] })) # generate event events.append({ "fsm": ev.get("fsm", None), "event": ev["event"] }) return events
def calc_BNprob(df_test): result = pd.Series() for row in df_test.itertuples(): tablecpd=TableCPDFactorization(bn) prob_surv = tablecpd.specificquery(dict(Surv='1'), dict(Fare=str(row.Fare) , Sex=str(row.Sex) , Class=str(row.Pclass) )) if prob_surv >= 0.5: surv_class = 1 else: surv_class = 0 result = result.append(pd.Series([surv_class]), ignore_index = True ) return result
skel2 = GraphSkeleton() nd2.load(path_bn2) skel2.load(path_bn2) skel2.toporder() # toporder graph skeleton # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1 # wkday variable query evidence1 = dict(wkdayT0=userinput[0]) for i, item in enumerate(wkdayValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query query1 = dict(wkdayT1=[item]) # querying in accordance to the given evidence and appending it to the list of probability of each value wkdayProbList.append(fn1.specificquery(query1, evidence1)) #print "Iteration: " + str(i) + "-> wkdayTO (Input): " + userinput[0] + "; wkdayT1 (Output): " + item + " - prob: " + str(wkdayProbList[i]) most_probable_wkdayT1 = wkdayValsList[numpy.argmax(wkdayProbList)] # hour variable query evidence1 = dict(hourT0=userinput[1]) for i, item in enumerate(hourValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query query1 = dict(hourT1=[item]) # querying in accordance to the given evidence and appending it to the list of probability of each value hourProbList.append(fn1.specificquery(query1, evidence1)) #print "Iteration: " + str(i) + "-> hourTO (Input): " + userinput[1] + "; hourT1 (Output): " + item + " - prob: " + str(hourProbList[i]) most_probable_hourT1 = hourValsList[numpy.argmax(hourProbList)]
#listofDicts=json.dumps(data) listofDicts = json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf = TableCPDFactorization(result) #Rating 1 Given Occupation is student myquery = dict(rating=[1]) myevidence = dict(occupation='student') result = tcf.specificquery(query=myquery, evidence=myevidence) print result tcf.refresh() #Rating 2 Given Occupation is student myquery = dict(rating=[2]) myevidence = dict(occupation='student') result = tcf.specificquery(query=myquery, evidence=myevidence) print result tcf.refresh() #Rating 3 Given Occupation is student myquery = dict(rating=[3]) myevidence = dict(occupation='student')
listofDicts=json.loads(asciiData) skel = GraphSkeleton() skel.load("../skeleton.json") learner = PGMLearner() result = learner.discrete_mle_estimateparams(skel, listofDicts) tcf=TableCPDFactorization(result) #Rating 1 Given Genre is Drama myquery = dict(rating=[1]) myevidence = dict(genre='Drama') result=tcf.specificquery(query=myquery,evidence=myevidence) print result tcf.refresh() #Rating 2 Given Genre is Drama myquery = dict(rating=[2]) myevidence = dict(genre='Drama') result=tcf.specificquery(query=myquery,evidence=myevidence) print result tcf.refresh() #Rating 3 Given Genre is multiple myquery = dict(rating=[3])
skel = GraphSkeleton() nd.load('unittestdict.txt') skel.load('unittestdict.txt') bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) for i in range(len(EachLike)): evidence = {} if EachLike[i] <= EachLikeThreshold[0]: evidence['LikeN'] = 'Small' elif EachLikeThreshold[0] < EachLike[i] and EachLike[i] <= EachLikeThreshold[1]: evidence['LikeN'] = 'Mid' else: evidence['LikeN'] = 'Big' if EachLiked[i] <= EachLikedThreshold[0]: evidence['LikedN'] = 'Small' elif EachLikedThreshold[0] < EachLiked[i] and EachLiked[i] <= EachLikedThreshold[1]: evidence['LikedN'] = 'Mid' else: evidence['LikedN'] = 'Big' print evidence query = dict(BulliedPro=['NO']) result = fn.specificquery(query, evidence) fn = TableCPDFactorization(bn) BulliedPro.append(result) print BulliedPro
class TestTableCPDFactorization(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) self.fn = TableCPDFactorization(self.bn) def test_constructor(self): self.assertTrue(len(self.fn.originalfactorlist) == 5) for x in range(5): self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor)) def test_refresh(self): evidence = dict(Letter='weak') query = dict(Intelligence=['high']) result1 = self.fn.specificquery(query, evidence) self.fn.refresh() result2 = self.fn.specificquery(query, evidence) self.assertEqual(result1, result2) def test_sumproducteliminatevar(self): self.fn.refresh() self.fn.sumproducteliminatevar("Difficulty") yes = 0 for x in range(len(self.fn.factorlist)): if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']): yes += 1 index = x self.assertTrue(yes == 1) exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001] for x in range(6): self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01) def test_sumproductve(self): input = ["Difficulty", "Grade", "Intelligence", "SAT"] self.fn.refresh() self.fn.sumproductve(input) exp = [.498, .502] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_condprobve(self): evidence = dict(Grade='C', SAT='highscore') query = dict(Intelligence='high') self.fn.refresh() self.fn.condprobve(query, evidence) exp = [.422, .578] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_specificquery(self): evidence = dict(Difficulty='easy') query = dict(Grade=['A', 'B']) self.fn.refresh() answer = self.fn.specificquery(query, evidence) self.assertTrue(abs(answer - .784) < .01) def test_gibbssample(self): evidence = dict(Letter='weak') gs = self.fn.gibbssample(evidence, 5) self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard') self.assertTrue(len(gs) == 5) for entry in gs: self.assertTrue(entry["Letter"] == 'weak')
class Graph: def __init__(self): self.node = dict() self.obs = dict() def addnode(self, node): self.node[node.name] = node def removeNode(self, name): if self.node.has_key(name): del self.node[name] def addobs(self, node, value): self.obs[node.name] = [node, value] def removeObs(self, name): if self.obs.has_key(name): del self.obs[name] def setup(self): self.nd = NodeData() self.skel = GraphSkeleton() self.skel.V, self.skel.E = [], [] self.nd.Vdata = {} for i, node in enumerate(self.node.values()): dNode = {} node.sId = str(i) dNode["numoutcomes"] = len(node.values) dNode["vals"] = node.values dNode["cprob"] = node.cpt # dNode["parents"] = map(lambda x: if x=x.name, node.parents); self.skel.V.append(node.name) aParents = [] for parent in node.parents: if parent == None: continue aParents.append(parent.name) self.skel.E.append([parent.name, node.name]) dNode["parents"] = aParents if len(aParents) > 0 else None self.nd.Vdata[node.name] = dNode self.skel.toporder() self.bn = DiscreteBayesianNetwork(self.skel, self.nd) self.fn = TableCPDFactorization(self.bn) # def setup(self): # self.nd = NodeData(); # self.skel = GraphSkeleton(); # self.skel.V, self.skel.E = [], []; # self.nd.Vdata = {}; # for i,node in enumerate(self.node.values()): # dNode = {}; # node.sId = str(i); # dNode["numoutcomes"] = len(node.values); # dNode["vals"] = node.values; # dNode["cprob"] = node.cpt; # # dNode["parents"] = map(lambda x: if x=x.name, node.parents); # self.skel.V.append(node.name); # aParents = []; # for parent in node.parents: # if parent==None: continue; # aParents.append(parent.name); # self.skel.E.append([parent.name, node.name]); # dNode["parents"] = aParents if len(aParents)>0 else None; # self.nd.Vdata[node.name] = dNode; # self.skel.toporder(); # self.bn = DiscreteBayesianNetwork(self.skel, self.nd); # self.fn = TableCPDFactorization(self.bn); def getPost(self, query, evidence): result = self.fn.specificquery(query, evidence) return result def write2dot(self, fname="graph.dot"): f = open(fname, "w") f.write("digraph G {\n") f.write("node[shape=circle, width=0.4];\n") for node in self.node.values(): l = "\"" + node.name + "\"" f.write(node.sId) if node in map(lambda x: x[0], self.obs): f.write("[label=" + l + ",style=filled,color=blue]") else: f.write("[label=" + l + "]") f.write(";\n") for parent in node.parents: if parent == None: continue f.write(parent.sId + " -> " + node.sId + ";\n") f.write("}") f.close() def write2pdf(self, fname="graph.pdf"): if ".pdf" in fname: fname = fname[:-4] pdfFile = fname + ".pdf" dotFile = fname + ".dot" self.write2dot(dotFile) call(['dot', '-Tpdf', dotFile, '-o', pdfFile])
class TestTableCPDFactorization(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) self.fn = TableCPDFactorization(self.bn) def test_constructor(self): self.assertTrue(len(self.fn.originalfactorlist) == 5) for x in range(5): self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor)) def test_refresh(self): self.fn.refresh() for x in range(5): self.assertTrue(isinstance(self.fn.factorlist[x], TableCPDFactor)) def test_sumproducteliminatevar(self): self.fn.refresh() self.fn.sumproducteliminatevar("Difficulty") yes = 0 for x in range(len(self.fn.factorlist)): if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']): yes += 1 index = x self.assertTrue(yes == 1) exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001] for x in range(6): self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01) def test_sumproductve(self): input = ["Difficulty", "Grade", "Intelligence", "SAT"] self.fn.refresh() self.fn.sumproductve(input) exp = [.498, .502] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_condprobve(self): evidence = dict(Grade='C', SAT='highscore') query = dict(Intelligence='high') self.fn.refresh() self.fn.condprobve(query, evidence) exp = [.422, .578] for x in range(2): self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01) def test_specificquery(self): evidence = dict(Difficulty='easy') query = dict(Grade=['A', 'B']) self.fn.refresh() answer = self.fn.specificquery(query, evidence) self.assertTrue(abs(answer - .784) < .01) def test_gibbssample(self): evidence = dict(Letter='weak') gs = self.fn.gibbssample(evidence, 5) self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard') self.assertTrue(len(gs) == 5) for entry in gs: self.assertTrue(entry["Letter"] == 'weak')
# Compute the probability distribution over a specific node or nodes # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # toporder graph skeleton print skel.toporder() # load evidence evidence = {"Intelligence": "high"} query = {"Grade": "A"} # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # load factorization fn = TableCPDFactorization(bn) # # calculate probability distribution # result = fn.condprobve(query, evidence) # print json.dumps(result.vals, indent=2) # print json.dumps(result.scope, indent=2) # print json.dumps(result.card, indent=2) # print json.dumps(result.stride, indent=2) result = fn.specificquery(query, evidence) print result
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.tablecpdfactorization import TableCPDFactorization # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("tests/net1.json") # any input file skel.load("tests/net1.json") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) # sample result = fn.specificquery(dict(C='T'), dict(B='F')) # output print json.dumps(result, indent=2)
nd.load(jsonpath_node) skel.load(jsonpath_skel) # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) print (skel.getchildren("Class"),skel.getchildren("Sex"),skel.getchildren("Fare"),skel.getchildren("Surv")) ([u'Surv'], [u'Surv'], [u'Class'], []) # In[ ]: # We can now start querying our network. We provide a query (first dictionary in the arguments) # and an evidence (second dictionary in the args)) tablecpd=TableCPDFactorization(bn) print ("P(Surv=0) = {}".format(tablecpd.specificquery(dict(Surv='0'),dict()))) # In[ ]: tablecpd=TableCPDFactorization(bn) print("P(Surv = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict()))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='0')))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1')))) tablecpd=TableCPDFactorization(bn) print("P(Surv = 1 | Fare = 1, Sex = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1' , Sex='0')))) tablecpd=TableCPDFactorization(bn)
result = learner.discrete_mle_estimateparams(skel, listofDicts) #print json.dumps(result.randomsample(10), indent=2) #print json.dumps(result.Vdata, indent=2) tcf=TableCPDFactorization(result) #print "cpds" #print tcf myquery = dict(rating=[5]) myevidence = dict(occupation='student') #print json.dumps(tcf.factorlist,indent=2) #print json.dumps(tcf.condprobve(query=query1,evidence=evidence1),indent=2) #res2=tcf.specificquery(query=query1,evidence=evidence1) #res2=tcf.condprobve(query=query1,evidence=evidence1) #res2=tcf.condprobve(query=myquery,evidence=myevidence) res2=tcf.specificquery(query=myquery,evidence=myevidence) #res2=tcf.specificquery(query=query1,evidence=evidence1) print res2 #print json.dumps(res2.vals, indent=2) #print json.dumps(res2.scope, indent=4) #print json.dumps(res2.card, indent=6) #print json.dumps(res2.stride, indent=8) #print json.dumps(result.specificquery(query1,evidence1), indent=2)