def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.instance = DiscreteBayesianNetwork(skel, nodedata)
def set_bayesnet(self): nd = NodeData() skel = GraphSkeleton() nd.load(self.file) skel.load(self.file) skel.toporder() self.bn = DiscreteBayesianNetwork(skel, nd)
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg
class TestSampleAggregator(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg def test_rseq(self): self.assertTrue(len(self.rseq) == 50) for key in self.ravg.keys(): summ = 0 for entry in self.ravg[key].keys(): summ += self.ravg[key][entry] self.assertTrue(summ > .99 and summ < 1.01) def test_gseq(self): self.assertTrue(len(self.gseq) == 51) for key in self.gavg.keys(): summ = 0 for entry in self.gavg[key].keys(): summ += self.gavg[key][entry] self.assertTrue(summ > .99 and summ < 1.01)
class TestSampleAggregator(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(skel, nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg def test_rseq(self): self.assertTrue(len(self.rseq) == 50) for key in self.ravg.keys(): summ = 0 for entry in self.ravg[key].keys(): summ += self.ravg[key][entry] self.assertTrue(summ > .99 and summ < 1.01) def test_gseq(self): self.assertTrue(len(self.gseq) == 51) for key in self.gavg.keys(): summ = 0 for entry in self.gavg[key].keys(): summ += self.gavg[key][entry] self.assertTrue(summ > .99 and summ < 1.01)
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(nodedata) self.fn = TableCPDFactorization(self.bn)
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData() nodedata.load("unittestdict.txt") self.instance = DiscreteBayesianNetwork(skel, nodedata)
def createData(): nd = NodeData() skel = GraphSkeleton() fpath = "job_interview.txt" nd.load(fpath) skel.load(fpath) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) learner = PGMLearner() data = bn.randomsample(1000) X, Y = 'Grades', 'Offer' c,p,w=learner.discrete_condind(data, X, Y, ['Interview']) print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w result = learner.discrete_constraint_estimatestruct(data) print result.E
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.instance = DiscreteBayesianNetwork(nodedata) self.factor = TableCPDFactor("Grade", self.instance) self.factor2 = TableCPDFactor("Letter", self.instance)
def getTableCPD(): nd = NodeData() skel = GraphSkeleton() jsonpath = "" nd.load(jsonpath) skel.load(jsonpath) bn = DiscreteBayesianNetwork(skel, nd) tablecpd = TableCPDFactorization(bn) return tablecpd
def getTableCPD(): nd = NodeData() skel = GraphSkeleton() jsonpath = "./graph/graph_example.txt" nd.load(jsonpath) skel.load(jsonpath) # load Bayesian network bn = DiscreteBayesianNetwork(skel, nd) tablecpd = TableCPDFactorization(bn) return tablecpd
def q_without_ros(): skel = GraphSkeleton() skel.V = ["prize_door", "guest_door", "monty_door"] skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]] skel.toporder() nd = NodeData() nd.Vdata = { "prize_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3], }, "guest_door": { "numoutcomes": 3, "parents": None, "children": ["monty_door"], "vals": ["A", "B", "C"], "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3], }, "monty_door": { "numoutcomes": 3, "parents": ["prize_door", "guest_door"], "children": None, "vals": ["A", "B", "C"], "cprob": { "['A', 'A']": [0., 0.5, 0.5], "['B', 'B']": [0.5, 0., 0.5], "['C', 'C']": [0.5, 0.5, 0.], "['A', 'B']": [0., 0., 1.], "['A', 'C']": [0., 1., 0.], "['B', 'A']": [0., 0., 1.], "['B', 'C']": [1., 0., 0.], "['C', 'A']": [0., 1., 0.], "['C', 'B']": [1., 0., 0.], }, }, } bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) query = { "prize_door": ["A", "B", "C"], } evidence = { "guest_door": "A", "monty_door": "B", } res = fn.condprobve(query, evidence) print res.vals print res.scope print res.card print res.stride
def test_structure_estimation(self): req = DiscreteStructureEstimationRequest() skel = GraphSkeleton() skel.load(self.data_path) skel.toporder() teacher_nd = NodeData() teacher_nd.load(self.teacher_data_path) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(8000) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) res = self.struct_estimate(req) self.assertIsNotNone(res.graph) self.assertEqual(len(res.graph.nodes), 5) self.assertGreater(len(res.graph.edges), 0)
def test_structure_estimation(self): req = DiscreteStructureEstimationRequest() skel = GraphSkeleton() skel.load(self.data_path) skel.toporder() teacher_nd = NodeData() teacher_nd.load(self.teacher_data_path) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(8000) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) res = self.struct_estimate(req) self.assertIsNotNone(res.graph) self.assertEqual(len(res.graph.nodes), 5) self.assertGreater(len(res.graph.edges), 0)
def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel
class TestDiscreteBayesianNetwork(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.instance = DiscreteBayesianNetwork(nodedata) def test_randomsample(self): randomsample = self.instance.randomsample(5) self.assertTrue(randomsample[0]["Difficulty"] == 'easy' or randomsample[0]["Difficulty"] == 'hard') for key in randomsample[0].keys(): self.assertTrue(randomsample[0][key] != "default") def test_randomsamplewithevidence(self): evidence = dict(Difficulty='easy') randomsample = self.instance.randomsample(10, evidence) for entry in randomsample: self.assertEqual(entry["Difficulty"], 'easy')
class TestDiscreteBayesianNetwork(unittest.TestCase): def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.instance = DiscreteBayesianNetwork(nodedata) def test_randomsample(self): randomsample = self.instance.randomsample(5) self.assertTrue(randomsample[0]["Difficulty"] == 'easy' or randomsample[0]["Difficulty"] == 'hard') for key in randomsample[0].keys(): self.assertTrue(randomsample[0][key] != "default") def test_randomsamplewithevidence(self): evidence = dict(Difficulty='easy') randomsample = self.instance.randomsample(10, evidence) for entry in randomsample: self.assertEqual(entry["Difficulty"], 'easy')
def loadbn(param_file): """ This function loads the bn model into the workspace from its associated .txt file. """ file_path = os.path.join(experiment_dir, 'parameters', param_file + '.txt') nd = NodeData() skel = GraphSkeleton() nd.load(file_path) skel.load(file_path) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) return bn
def test_param_estimation(self): req = DiscreteParameterEstimationRequest() # load graph structure skel = GraphSkeleton() skel.load(self.data_path) req.graph.nodes = skel.V req.graph.edges = [GraphEdge(k, v) for k,v in skel.E] skel.toporder() # generate trial data teacher_nd = NodeData() teacher_nd.load(self.teacher_data_path) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(200) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) self.assertEqual(len(self.param_estimate(req).nodes), 5)
def test_param_estimation(self): req = DiscreteParameterEstimationRequest() # load graph structure skel = GraphSkeleton() skel.load(self.data_path) req.graph.nodes = skel.V req.graph.edges = [GraphEdge(k, v) for k, v in skel.E] skel.toporder() # generate trial data teacher_nd = NodeData() teacher_nd.load(self.teacher_data_path) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(200) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) self.assertEqual(len(self.param_estimate(req).nodes), 5)
def setUp(self): skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() nodedata = NodeData.load("unittestdict.txt") self.bn = DiscreteBayesianNetwork(nodedata) agg = SampleAggregator() agg.aggregate(self.bn.randomsample(50)) self.rseq = agg.seq self.ravg = agg.avg self.fn = TableCPDFactorization(self.bn) evidence = dict(Letter='weak') agg.aggregate(self.fn.gibbssample(evidence, 51)) self.gseq = agg.seq self.gavg = agg.avg
def construct(self): skel = GraphSkeleton() skel.V = self.nodes.keys() skel.E = [] for node, ndata in self.nodes.iteritems(): if ndata['parents']: for p in ndata['parents']: skel.E.append([p, node]) self.nodes[p]['children'].append(node) for node, ndata in self.nodes.iteritems(): if len(ndata['children']) == 0: ndata['children'] = None data = NodeData() data.Vdata = self.nodes skel.toporder() bn = DiscreteBayesianNetwork(skel, data) return bn
def discrete_query_cb(self, req): nd = U.discrete_nodedata_from_ros(req.nodes) skel = U.graph_skeleton_from_node_data(nd) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) q = {n: nd.Vdata[n]["vals"] for n in req.query} ev = {ns.node: ns.state for ns in req.evidence} rospy.loginfo("resolving query %s with evidence %s" % (q, ev)) ans = fn.condprobve(query=q, evidence=ev) rospy.loginfo("%s -> %s" % (ans.scope, ans.vals)) res = DiscreteQueryResponse() node = DiscreteNode() node.name = ans.scope[0] node.outcomes = q[node.name] node.CPT.append(ConditionalProbability(node.outcomes, ans.vals)) res.nodes.append(node) return res
def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel
def main(): in_data = read_data.getdata() f_data = format_data(in_data) nd = NodeData() nd.load("net4.txt") # an input file skel = GraphSkeleton() skel.load("net4.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) #training dataset:70% bn2 = em(f_data[1:6000], bn, skel) pr_training = precision(f_data[1:6000], bn2) print "Prediction accuracy for training data:", pr_training[1] #testing dataset:30% pr = precision(f_data[6700:6800], bn2) print "Prediction accuracy for test data:", pr[1]
def setup(self): self.nd = NodeData() self.skel = GraphSkeleton() self.skel.V, self.skel.E = [], [] self.nd.Vdata = {} for i, node in enumerate(self.node.values()): dNode = {} node.sId = str(i) dNode["numoutcomes"] = len(node.values) dNode["vals"] = node.values dNode["cprob"] = node.cpt # dNode["parents"] = map(lambda x: if x=x.name, node.parents); self.skel.V.append(node.name) aParents = [] for parent in node.parents: if parent == None: continue aParents.append(parent.name) self.skel.E.append([parent.name, node.name]) dNode["parents"] = aParents if len(aParents) > 0 else None self.nd.Vdata[node.name] = dNode self.skel.toporder() self.bn = DiscreteBayesianNetwork(self.skel, self.nd) self.fn = TableCPDFactorization(self.bn)
class TestPGMLearner(unittest.TestCase): def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData() nd.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(skel, nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData() nda.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(skel, nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel def test_discrete_mle_estimateparams(self): result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq) indexa = result.Vdata['SAT']['vals'].index('lowscore') self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9) indexb = result.Vdata['Letter']['vals'].index('weak') self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05) def test_lg_mle_estimateparams(self): result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq) self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5) self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5) def test_discrete_constraint_estimatestruct(self): result = self.l.discrete_constraint_estimatestruct(self.samplediscseq) self.assertTrue(["Difficulty", "Grade"] in result.E) def test_lg_constraint_estimatestruct(self): result = self.l.lg_constraint_estimatestruct(self.samplelgseq) self.assertTrue(["Intelligence", "Grade"] in result.E) def test_discrete_condind(self): chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"]) self.assertTrue(pv > .05) self.assertTrue(witness, ["Grade"]) chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", []) self.assertTrue(pva < .05) def test_discrete_estimatebn(self): result = self.l.discrete_estimatebn(self.samplediscseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Difficulty"]["cprob"][0]) def test_lg_estimatebn(self): result = self.l.lg_estimatebn(self.samplelgseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
class BinaryNetworkBandit(object): """Class represents a Discrete Bayesian Network that supports Thompson sampling. Currently only supports interventions on a single variable at a time""" def __init__(self, bayesnetfile, targetVar = 'Y', prior=(1.0,1.0)): self.file = bayesnetfile self.theta_range = linspace(0.0001,0.9999,100) self.set_bayesnet() self.y = targetVar self.reset(prior = prior) def getCPD(self): """ required as querying a TableCPDFactorization leads modifies the underlying Bayesian network (refresh() appears broken) """ self.set_bayesnet() return TableCPDFactorization(self.bn) def set_bayesnet(self): nd = NodeData() skel = GraphSkeleton() nd.load(self.file) skel.load(self.file) skel.toporder() self.bn = DiscreteBayesianNetwork(skel, nd) def reset(self,prior=(1.0,1.0)): """ Clears all the data on samples that have been taken so far but keeps graph structure. You can optionally specify a new prior""" # possible single interventions on non-target variable self.prior = prior self.interventions = [] # a list of possible assignments self.variables = [] # store the variables - defines an ordering self.intervention_to_arm = {} self.variable_name_to_index = {} values = [] index = 0 variable_index = 0 for variable, data in self.bn.Vdata.iteritems(): if variable != self.y: self.variables.append(variable) self.variable_name_to_index[variable] = variable_index variable_index +=1 vals = data.get("vals") values.append(vals) for value in vals: self.interventions.append({variable:value}) self.intervention_to_arm[(variable,value)]=index index +=1 # lets calculate and print the actual value of theta for each arm (since we know it) truth = [] for i in self.interventions: cpd = self.getCPD() answer = cpd.specificquery({self.y:"1"},i) truth.append(answer) print "THETA",truth cpd = self.getCPD() # reset the network to its original state # generate all possible assignments (intervention on all variables) to non-target values combinations = list(itertools.product(*values)) self.assignements = [dict(zip(self.variables,v)) for v in combinations] num_assignments = len(self.assignements) self.assingment_map = dict(zip([str(list(v)) for v in combinations],range(num_assignments))) # builds a map from a each assingment to its indx self.atrials = self.trials = zeros(shape=(num_assignments,), dtype=int) # stores how often each assignment occured self.asuccesses = zeros(shape=(num_assignments,), dtype=int) # stores how often each assignment paid off self.num_arms = len(self.interventions) self.trials = zeros(shape=(self.num_arms,), dtype=int) # stores how often each arm was selected self.successes = zeros(shape=(self.num_arms,), dtype=int) # stores how often each arm paid off # now here I'm going to assume models where X1 ... Xn mutually independent causes of Y # record distributions for P(X1), P(X2) ... - they update only when we observe Xn not when we do it self.observed_trials = zeros(shape=(self.num_arms,), dtype=int) self.observed_true = zeros(shape=(self.num_arms,), dtype=int) # records how many times each variable was set by intervention self.intervened_trials = zeros(shape=(self.num_arms,), dtype=int) self.intervened_true = zeros(shape=(self.num_arms,), dtype=int) def sample(self,n,plot=-1): """ returns n samples based on Thompson sampling """ for i in xrange(n): if plot > 0 and i % plot == 0: do_plot = True else: do_plot = False arm = self.get_recommendation(do_plot) intervention = self.interventions[arm] # note: evidence is equivelent to do in libpgm result = self.bn.randomsample(1,evidence=intervention)[0] # returns a dictionary containing values for each variable reward = int(result.get(self.y)) # update the counts for the pulled arm (P(Y|X?=?)) self.trials[arm] = self.trials[arm] + 1 if (reward == 1): self.successes[arm] = self.successes[arm] + 1 # for variable we intervened on record that the we intervened assert(len(interventions.keys())==1) do_variable = intervention.keys()[0] # ASSUMING SINGLE INTERVENTIONS AT THIS POINT do_value = intervention.values()[0] do_variable_indx = self.variable_name_to_index[do_variable] self.intervened_trials[do_variable_indx] +=1 self.intervened_true[do_variable_indx] +=1 # for all variables we did not intervene on, update observed values = [] for indx,v in enumerate(self.variables): value = result[v] values.append(value) if v not in intervention: self.observed_trials[indx] += 1 if int(value) == 1: self.observed_true[indx]+=1 # update based on intervened and non-intervened ... # for the pulled arm self.trials_c[arm] = self.trials_c[arm] + 1 if (reward == 1): self.successes_c[arm] = self.successes_c[arm] + 1 # each other value in result corresponds to an arm for k,val in result: # calculate how much this should be weighted down otrials = self.observed_trials[do_variable_indx] oratio = (otrials+self.observed_true[do_variable_indx])/otrials itrials = self.intervened_trials[do_variable_indx] isuccess = itrials/ total_success = osuccess+/ w = if k not in intervention: o_arm = # get arm corresponding to setting variable k to var self.trials_c[o_arm] = self.trials_c[o_arm]+w if reward == 1: self.successes_c[arm] = self.successes_c[arm] + w # the value of [arm] occured because of intervention so we need to weight down based on that - going to lead to fractional trials... # update relevent exact assignment key = str((values)) a = self.assingment_map[key] self.atrials[a] = self.atrials[a]+1 if (reward == 1): self.asuccesses[a] = self.asuccesses[a] + 1 def plot_observed(self): # put labels under each plot f,sp = plt.subplots(1,len(self.variables),sharey=False,figsize=(15,5)) for i in range(len(self.variables)): dist = beta(self.prior[0]+self.observed_true[i], self.prior[1]+self.observed_trials[i]-self.observed_true[i]) sp[i].plot(self.theta_range,dist.pdf(self.theta_range)) plt.show() def plot_assignments(self): print self.atrials print self.asuccesses f,sp = plt.subplots(1,len(self.assingment_map),sharey=False,figsize=(15,5)) titles = [json.dumps(x) for x in self.assignements] for i in range(len(self.assingment_map)): # need to get rid of the unicode tags so things fit - dirty way is s.encode('ascii') dist = beta(self.prior[0]+self.asuccesses[i]+1, self.prior[1]+self.atrials[i]-self.asuccesses[i]) sp[i].set_title(titles[i]) sp[i].plot(self.theta_range,dist.pdf(self.theta_range)) plt.show() def get_recommendation(self,do_plot=False): """ recommends which arm to pull next proportional to the estimated probability that it is the optimal one""" sampled_theta = [] if do_plot: f,sp = plt.subplots(1,self.num_arms,sharey=False,figsize=(15,5)) for i in range(self.num_arms): #Construct beta distribution for posterior dist = beta(self.prior[0]+self.successes[i], self.prior[1]+self.trials[i]-self.successes[i]) if do_plot: sp[i].plot(self.theta_range,dist.pdf(self.theta_range)) #Draw sample from beta distribution sampled_theta.append(dist.rvs()) # Alternately calculate P(Y|X1) as sum(P(Y|X1,X2)P(X2)) # Do this here .... if do_plot: plt.show() # Return the index of the sample with the largest value return sampled_theta.index( max(sampled_theta) ) def regret(self, bestprob): """ regret as ratio between reward and expectation of reward had we always selected best """ reward = sum(self.successes)/float(sum(self.trials)) optimal = bestprob return 1 - reward/bestprob
def __init__(self, nodes): self.nodes = {} self.children = defaultdict(list) self.parents = defaultdict(list) self.outputs = {} for name, node_spec in nodes.iteritems(): node_type = node_spec["type"] if node_type == "inferred": parents = node_spec["parents"] # store the relationship between these elements for parent in parents: normalised = normalise_name(parent) self.parents[name].append(normalised) self.children[normalised].append(name) truth_table = parse_truth_table(node_spec["p"], parents) node = make_node(truth_table, parents, node_type) self.nodes[name] = node if node_type == "fsm_input": node = make_node([1.0, 0.0], None, node_type) self.nodes[name] = node if node_type == "sensor_input": proxy_node = make_node([1.0, 0.0], None, "proxy") proxy_name = "_proxy_%s" % name self.nodes[proxy_name] = proxy_node self.children[proxy_name].append(name) node = make_node({ "['T']": [1.0, 0.0], "['F']": [0.0, 1.0] }, [proxy_name], node_type) self.nodes[name] = node if node_type == "output": self.outputs[name] = node_spec for node in self.nodes: if len(self.children[node]) > 0: self.nodes[node]["children"] = self.children[node] else: self.nodes[node]["children"] = None # certainty scaling self.event_caution = 0.0 og = OrderedSkeleton() og.V = self.nodes.keys() edges = [] for k, children in self.children.iteritems(): for child in children: edges.append((k, child)) og.E = edges og.toporder() nd = NodeData() nd.Vdata = self.nodes #logging.debug(pprint.pformat(nd.Vdata)) self.net = DiscreteBayesianNetwork(og, nd) self.factor_net = TableCPDFactorization(self.net)
class TestPGMLearner(unittest.TestCase): def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData() nd.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(skel, nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData() nda.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(skel, nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel def test_discrete_mle_estimateparams(self): result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq) indexa = result.Vdata['SAT']['vals'].index('lowscore') self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9) indexb = result.Vdata['Letter']['vals'].index('weak') self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05) def test_lg_mle_estimateparams(self): result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq) self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5) self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5) def test_discrete_constraint_estimatestruct(self): result = self.l.discrete_constraint_estimatestruct(self.samplediscseq) self.assertTrue(["Difficulty", "Grade"] in result.E) def test_lg_constraint_estimatestruct(self): result = self.l.lg_constraint_estimatestruct(self.samplelgseq) self.assertTrue(["Intelligence", "Grade"] in result.E) def test_discrete_condind(self): chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"]) self.assertTrue(pv > .05) self.assertTrue(witness, ["Grade"]) chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", []) self.assertTrue(pva < .05) def test_discrete_estimatebn(self): result = self.l.discrete_estimatebn(self.samplediscseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Difficulty"]["cprob"][0]) def test_lg_estimatebn(self): result = self.l.lg_estimatebn(self.samplelgseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
# Compute the probability distribution over a specific node or nodes # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # toporder graph skeleton print skel.toporder() # load evidence evidence = {"Intelligence": "high"} query = {"Grade": "A"} # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # load factorization fn = TableCPDFactorization(bn) # # calculate probability distribution # result = fn.condprobve(query, evidence) # print json.dumps(result.vals, indent=2) # print json.dumps(result.scope, indent=2) # print json.dumps(result.card, indent=2) # print json.dumps(result.stride, indent=2) result = fn.specificquery(query, evidence) print result
if __name__ == '__main__': rospy.init_node("pgm_learner_sample_discrete") param_estimate = rospy.ServiceProxy("pgm_learner/discrete/parameter_estimation", DiscreteParameterEstimation) req = DiscreteParameterEstimationRequest() dpath = os.path.join(PKG_PATH, "test", "graph-test.txt") tpath = dpath # load graph structure skel = GraphSkeleton() skel.load(dpath) req.graph.nodes = skel.V req.graph.edges = [GraphEdge(k, v) for k,v in skel.E] skel.toporder() # generate trial data teacher_nd = NodeData() teacher_nd.load(dpath) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(200) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) PP.pprint(param_estimate(req).nodes)
from libpgm.pgmlearner import PGMLearner # (1) --------------------------------------------------------------------- # Generate a sequence of samples from a discrete-CPD Bayesian network # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # sample result = bn.randomsample(10) # output - toggle comment to see #print json.dumps(result, indent=2) # (2) ---------------------------------------------------------------------- # Generate a sequence of samples from a linear Gaussian-CPD Bayesian network # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestlgdict.txt") skel.load("../tests/unittestdict.txt")
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("grades.txt") # an input file skel = GraphSkeleton() skel.load("grades.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(80000) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.discrete_constraint_estimatestruct(data) # output print json.dumps(result.E, indent=2)
w = csv.writer(open("bayesian_outcome.txt", "wb")) count = 0 for i in range(104): nd = NodeData() skel = GraphSkeleton() nd.load('bayes_net/'+str(i)+".txt") # any input file skel.load('bayes_net/'+str(i)+".txt") # topologically order graphskeleton skel.toporder() # load bayesian network # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) dic1 = {} k = 1 for c in data_l[i]: dic1[str(k)] = str(c) k += 2 print dic1 k = 2 * len(data_l[i]) - 2 dic2 = {} word = '' while k >= 0: maxx = 0 char = '' temp = deepcopy(dic2) for c in CHARS:
count = 0 for i in range(104): all_perms = list(itertools.product(CHARS, repeat=len(data_l[i]))) nd = NodeData() skel = GraphSkeleton() nd.load('bayes_net/'+str(i)+".txt") # any input file skel.load('bayes_net/'+str(i)+".txt") # topologically order graphskeleton skel.toporder() # load bayesian network # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) dic1 = {} k = 1 for c in data_l[i]: dic1[str(k)] = str(c) k += 2 maxx = 0 pred = '' for word in all_perms: dic2 = {} k = 0 for c in word: dic2[str(k)] = [c] k += 2 curr = bn.specificquery(dic2,dic1)
rospy.init_node("pgm_learner_sample_discrete") param_estimate = rospy.ServiceProxy( "pgm_learner/discrete/parameter_estimation", DiscreteParameterEstimation) req = DiscreteParameterEstimationRequest() dpath = os.path.join(PKG_PATH, "test", "graph-test.txt") tpath = dpath # load graph structure skel = GraphSkeleton() skel.load(dpath) req.graph.nodes = skel.V req.graph.edges = [GraphEdge(k, v) for k, v in skel.E] skel.toporder() # generate trial data teacher_nd = NodeData() teacher_nd.load(dpath) bn = DiscreteBayesianNetwork(skel, teacher_nd) data = bn.randomsample(200) for v in data: gs = DiscreteGraphState() for k_s, v_s in v.items(): gs.node_states.append(DiscreteNodeState(node=k_s, state=v_s)) req.states.append(gs) PP.pprint(param_estimate(req).nodes)
reward = sum(self.successes) / float(sum(self.trials)) optimal = bestprob return 1 - reward / bestprob # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("bayesnet.json") # any input file skel.load("bayesnet.json") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) simulations = 10000 # the number of simulations of the whole process experiments = 32 # the number of experiments we run in each simulation # specify what the interventions are for the 'try all combinations of interventions' bandit interventions = [{ "X1": '0', "X2": '0', "X3": "0" }, { "X1": '0', "X2": '0', "X3": "1" }, { "X1": '0',
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("grades.txt") # an input file skel = GraphSkeleton() skel.load("grades.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(80000) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.discrete_constraint_estimatestruct(data) # output print json.dumps(result.E, indent=2)
def discrete_mle_estimateparams2(graphskeleton, data): ''' Estimate parameters for a discrete Bayesian network with a structure given by *graphskeleton* in order to maximize the probability of data given by *data*. This function takes the following arguments: 1. *graphskeleton* -- An instance of the :doc:`GraphSkeleton <graphskeleton>` class containing vertex and edge data. 2. *data* -- A list of dicts containing samples from the network in {vertex: value} format. Example:: [ { 'Grade': 'B', 'SAT': 'lowscore', ... }, ... ] This function normalizes the distribution of a node's outcomes for each combination of its parents' outcomes. In doing so it creates an estimated tabular conditional probability distribution for each node. It then instantiates a :doc:`DiscreteBayesianNetwork <discretebayesiannetwork>` instance based on the *graphskeleton*, and modifies that instance's *Vdata* attribute to reflect the estimated CPDs. It then returns the instance. The Vdata attribute instantiated is in the format seen in :doc:`unittestdict`, as described in :doc:`discretebayesiannetwork`. Usage example: this would learn parameters from a set of 200 discrete samples:: import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("../tests/unittestdict.txt") # an input file skel = GraphSkeleton() skel.load("../tests/unittestdict.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(200) # instantiate my learner learner = PGMLearner() # estimate parameters from data and skeleton result = learner.discrete_mle_estimateparams(skel, data) # output print json.dumps(result.Vdata, indent=2) ''' assert (isinstance( graphskeleton, GraphSkeleton)), "First arg must be a loaded GraphSkeleton class." assert (isinstance(data, list) and data and isinstance( data[0], dict)), "Second arg must be a list of dicts." # instantiate Bayesian network, and add parent and children data bn = DiscreteBayesianNetwork() graphskeleton.toporder() bn.V = graphskeleton.V bn.E = graphskeleton.E bn.Vdata = dict() for vertex in bn.V: bn.Vdata[vertex] = dict() bn.Vdata[vertex]["children"] = graphskeleton.getchildren(vertex) bn.Vdata[vertex]["parents"] = graphskeleton.getparents(vertex) # make placeholders for vals, cprob, and numoutcomes bn.Vdata[vertex]["vals"] = [] if (bn.Vdata[vertex]["parents"] == []): bn.Vdata[vertex]["cprob"] = [] else: bn.Vdata[vertex]["cprob"] = dict() bn.Vdata[vertex]["numoutcomes"] = 0 #print '1 ------' # determine which outcomes are possible for each node for sample in data: for vertex in bn.V: if (sample[vertex] not in bn.Vdata[vertex]["vals"]): bn.Vdata[vertex]["vals"].append(sample[vertex]) bn.Vdata[vertex]["numoutcomes"] += 1 # lay out probability tables, and put a [num, denom] entry in all spots: # define helper function to recursively set up cprob table def addlevel(vertex, _dict, key, depth, totaldepth): if depth == totaldepth: _dict[str(key)] = [] for _ in range(bn.Vdata[vertex]["numoutcomes"]): _dict[str(key)].append([0, 0]) return else: for val in bn.Vdata[bn.Vdata[vertex]["parents"][depth]]["vals"]: ckey = key[:] ckey.append(str(val)) addlevel(vertex, _dict, ckey, depth + 1, totaldepth) #print '2 ------' # put [0, 0] at each entry of cprob table for vertex in bn.V: if (bn.Vdata[vertex]["parents"]): root = bn.Vdata[vertex]["cprob"] numparents = len(bn.Vdata[vertex]["parents"]) addlevel(vertex, root, [], 0, numparents) else: for _ in range(bn.Vdata[vertex]["numoutcomes"]): bn.Vdata[vertex]["cprob"].append([0, 0]) #print '3 ------' # fill out entries with samples: for sample in data: for vertex in bn.V: # print 'vertex ', vertex # compute index of result rindex = bn.Vdata[vertex]["vals"].index(sample[vertex]) # print 'rindex ', rindex # go to correct place in Vdata if bn.Vdata[vertex]["parents"]: pvals = [str(sample[t]) for t in bn.Vdata[vertex]["parents"]] lev = bn.Vdata[vertex]["cprob"][str(pvals)] else: lev = bn.Vdata[vertex]["cprob"] # increase all denominators for the current condition for entry in lev: entry[1] += 1 # increase numerator for current outcome lev[rindex][0] += 1 # print 'lev ', lev #print '4 ------' ########################### LAPLACE SMOOTHING TO AVOID ZERO DIVISION ERROR WHEN WE HAVE EMPTY BINS ############################# #""" for vertex in bn.V: #print 'vertex ', vertex # print bn.V[vertex] numBins = bn.Vdata[vertex]['numoutcomes'] if not (bn.Vdata[vertex]["parents"]): # has no parents # for i in range(len(bn.Vdata[vertex]['cprob'])): # bn.Vdata[vertex]['cprob'][i][0] += 1 # numerator (count) # bn.Vdata[vertex]['cprob'][i][1] += numBins # denomenator (total count) for counts in bn.Vdata[vertex]['cprob']: counts[0] += 1 # numerator (count) counts[1] += numBins # denomenator (total count) else: countdict = bn.Vdata[vertex]['cprob'] for key in countdict.keys(): for counts in countdict[key]: counts[0] += 1 counts[1] += numBins #print '5 ------' """ # OPTIONAL: converts cprob from dict into df, does laplace smoothing, then (missing) maps back to dict bincounts = pd.DataFrame.from_dict(bn.Vdata[vertex]['cprob'], orient='index') #print bincounts for columnI in range (0, bincounts.shape[1]): for rowI in range (0,bincounts.shape[0]): bincounts[columnI][rowI]=[bincounts[columnI][rowI][0]+1,bincounts[columnI][rowI][1]+numBins] #print bincounts """ #print 'max def ', bn.Vdata['max_def'] #print 'EAx ', bn.Vdata['EAx'] #print 'EAy ', bn.Vdata['EAy'] #print 'mass ', bn.Vdata['mass'] #print 'cog_x ', bn.Vdata['cog_x'] #print 'cog_z ', bn.Vdata['cog_z'] #""" #print '6 ------' ###################################################################################### # convert arrays to floats for vertex in bn.V: if not bn.Vdata[vertex]["parents"]: bn.Vdata[vertex]["cprob"] = [ x[0] / float(x[1]) for x in bn.Vdata[vertex]["cprob"] ] else: for key in bn.Vdata[vertex]["cprob"].keys(): try: bn.Vdata[vertex]["cprob"][key] = [ x[0] / float(x[1]) for x in bn.Vdata[vertex]["cprob"][key] ] # default to even distribution if no data points except ZeroDivisionError: bn.Vdata[vertex]["cprob"][key] = [ 1 / float(bn.Vdata[vertex]["numoutcomes"]) for x in bn.Vdata[vertex]["cprob"][key] ] # return cprob table with estimated probability distributions return bn
from libpgm.pgmlearner import PGMLearner # (1) --------------------------------------------------------------------- # Generate a sequence of samples from a discrete-CPD Bayesian network # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestdict.txt") skel.load("../tests/unittestdict.txt") # topologically order graphskeleton skel.toporder() # load bayesian network bn = DiscreteBayesianNetwork(skel, nd) # sample result = bn.randomsample(10) # output - toggle comment to see #print json.dumps(result, indent=2) # (2) ---------------------------------------------------------------------- # Generate a sequence of samples from a linear Gaussian-CPD Bayesian network # load nodedata and graphskeleton nd = NodeData() skel = GraphSkeleton() nd.load("../tests/unittestlgdict.txt") skel.load("../tests/unittestdict.txt")
skel1.toporder() # toporder graph skeleton #INITIALIZING BN 2 # load nodedata and graphskeleton nd2 = NodeData() skel2 = GraphSkeleton() nd2.load(path_bn2) skel2.load(path_bn2) skel2.toporder() # toporder graph skeleton # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1 # wkday variable query evidence1 = dict(wkdayT0=userinput[0]) for i, item in enumerate(wkdayValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query query1 = dict(wkdayT1=[item]) # querying in accordance to the given evidence and appending it to the list of probability of each value wkdayProbList.append(fn1.specificquery(query1, evidence1)) #print "Iteration: " + str(i) + "-> wkdayTO (Input): " + userinput[0] + "; wkdayT1 (Output): " + item + " - prob: " + str(wkdayProbList[i]) most_probable_wkdayT1 = wkdayValsList[numpy.argmax(wkdayProbList)] # hour variable query evidence1 = dict(hourT0=userinput[1]) for i, item in enumerate(hourValsList): # loading bayesian network and factorization - needs to be done at every iteration bn1 = DiscreteBayesianNetwork(skel1, nd1) fn1 = TableCPDFactorization(bn1) # setting the query
from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from inference.exact_inference import ExactInferenceEngine from inference.approximate_inference import ApproximateInferenceEngine node_data = NodeData() network_skeleton = GraphSkeleton() node_data.load('test_bayesian_networks/network.txt') network_skeleton.load('test_bayesian_networks/network.txt') network = DiscreteBayesianNetwork(network_skeleton, node_data) exact_inference_engine = ExactInferenceEngine(network) approximate_inference_engine = ApproximateInferenceEngine(network) query_variable = 'Burglary' evidence_variables = {'MaryCalls': 'true', 'JohnCalls': 'true'} resulting_distribution = exact_inference_engine.perform_inference( query_variable, evidence_variables) print 'P(B|m,j) - enumeration: ', resulting_distribution resulting_distribution = exact_inference_engine.perform_ve_inference( query_variable, evidence_variables) print '(B|m,j) - variable elimination: ', resulting_distribution resulting_distribution = approximate_inference_engine.perform_rs_inference( query_variable, evidence_variables, 100000) print 'P(B|m,j) - approximate - rejection sampling: ', resulting_distribution resulting_distribution = approximate_inference_engine.perform_lw_inference( query_variable, evidence_variables, 100000) print 'P(B|m,j) - approximate - likelihood weighting: ', resulting_distribution resulting_distribution = approximate_inference_engine.perform_gibbs_inference(