def net2(): nd = NodeData() skel = GraphSkeleton() nd.load("net.txt") # an input file skel.load("net.txt") # topologically order graphskeleton skel.toporder() # load bayesian network lgbn = LGBayesianNetwork(skel, nd) in_data = read_data.getdata2() learner = PGMLearner() bn = learner.lg_mle_estimateparams(skel, in_data) p = cal_prob(in_data[300:500], bn) print p return 0
def net2(): nd = NodeData() skel = GraphSkeleton() nd.load("net.txt") # an input file skel.load("net.txt") # topologically order graphskeleton skel.toporder() # load bayesian network lgbn = LGBayesianNetwork(skel, nd) in_data=read_data.getdata2() learner = PGMLearner() bn=learner.lg_mle_estimateparams(skel,in_data) p=cal_prob(in_data[300:500],bn) print p return 0
class TestPGMLearner(unittest.TestCase): def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData() nd.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(skel, nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData() nda.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(skel, nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel def test_discrete_mle_estimateparams(self): result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq) indexa = result.Vdata['SAT']['vals'].index('lowscore') self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9) indexb = result.Vdata['Letter']['vals'].index('weak') self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05) def test_lg_mle_estimateparams(self): result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq) self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5) self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5) def test_discrete_constraint_estimatestruct(self): result = self.l.discrete_constraint_estimatestruct(self.samplediscseq) self.assertTrue(["Difficulty", "Grade"] in result.E) def test_lg_constraint_estimatestruct(self): result = self.l.lg_constraint_estimatestruct(self.samplelgseq) self.assertTrue(["Intelligence", "Grade"] in result.E) def test_discrete_condind(self): chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"]) self.assertTrue(pv > .05) self.assertTrue(witness, ["Grade"]) chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", []) self.assertTrue(pva < .05) def test_discrete_estimatebn(self): result = self.l.discrete_estimatebn(self.samplediscseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Difficulty"]["cprob"][0]) def test_lg_estimatebn(self): result = self.l.lg_estimatebn(self.samplelgseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
def bn_learn(attr, cicli, passed_file): path_to_sentiments = 'sentiment_AFINN' print "Using AFINN sentiment dictionary" if attr == 0: print "Considering tweets' number" elif attr == 1: print "Considering averaged number of positive, negative and neutral tweets" elif attr == 2: print "Considering averaged value of positive and negative tweets" elif attr == 3: print "Considering positive and negative tweets\' increment" elif attr == 4: print "Considering bullisment index obtained by number of tweets sentiment" elif attr == 5: print "Considering bullisment index obtained by tweets value of sentiment" print "And considering market trend" all_data = [] files = [ path_to_sentiments + "/" + file for file in os.listdir(path_to_sentiments) if file.endswith('.json') ] for file in files: with open(file) as sentiment_file: data = json.load(sentiment_file) vdata = {} if attr == 0: vdata["com"] = data["n_tweets"] elif attr == 1: vdata["pos"] = data["n_pos_ave"] vdata["neg"] = data["n_neg_ave"] vdata["neu"] = data["n_neu_ave"] elif attr == 2: vdata["pos"] = data["pos_val_ave"] vdata["neg"] = data["neg_val_ave"] elif attr == 3: vdata["pos"] = data["pos_inc"] vdata["neg"] = data["neg_inc"] elif attr == 4: vdata["com"] = data["bull_ind"] elif attr == 5: vdata["com"] = data["bull_ind_val"] vdata["market"] = data["market_inc"] all_data.append(vdata) skel = GraphSkeleton() if len(all_data[0]) == 2: skel.load("network_struct_1_vertex.json") print "Loading structure with 2 node" elif len(all_data[0]) == 3: skel.load("network_struct_2_vertex.json") print "Loading structure with 3 node" elif len(all_data[0]) == 4: skel.load("network_struct_3_vertex.json") print "Loading structure with 4 node" skel.toporder() learner = PGMLearner() result = learner.lg_mle_estimateparams(skel, all_data) for key in result.Vdata.keys(): result.Vdata[key]['type'] = 'lg' prob_pos = prob_neg = prob_neu = 0 for data in all_data: if data['market'] == 1: prob_pos += 1 elif data['market'] == 0: prob_neu += 1 else: prob_neg += 1 prob_pos = float(prob_pos) / float(len(all_data)) prob_neg = float(prob_neg) / float(len(all_data)) prob_neu = float(prob_neu) / float(len(all_data)) tmp = {} tmp['numoutcomes'] = len(all_data) tmp['cprob'] = [prob_pos, prob_neg, prob_neu] tmp['parents'] = result.Vdata['market']['parents'] tmp['vals'] = ['positive', 'negative', 'neutral'] tmp['type'] = 'discrete' tmp['children'] = result.Vdata['market']['children'] result.Vdata['market'] = tmp node = Discrete(result.Vdata["market"]) print "Loading node as Discrete" estimated, real = mcmc_json(passed_file, attr, cicli, node) return estimated, real
# (10) ----------------------------------------------------------------------- # Learn the structure of a linear Gaussian Bayesian network, given data and a # structure # say I have some data data = lgbn.randomsample(200) # and a graphskeleton skel = GraphSkeleton() skel.load("../tests/unittestdict.txt") # instantiate my learner learner = PGMLearner() # estimate parameters result = learner.lg_mle_estimateparams(skel, data) # output - toggle comment to see #print json.dumps(result.Vdata, indent=2) # (11) ---------------------------------------------------------------------- # Learn a structure of a linear Gaussian Bayesian network, given only data # say I have some data data = lgbn.randomsample(8000) # instantiate my learner learner = PGMLearner() # estimate parameters result = learner.lg_constraint_estimatestruct(data)
__author__ = 'Amir' import json from libpgm.graphskeleton import GraphSkeleton from libpgm.pgmlearner import PGMLearner with open('data.txt', 'r') as f: data = eval(f.read()) # generate some data to use skel = GraphSkeleton() skel.load("skel.txt") skel.toporder() # instantiate my learner learner = PGMLearner() # estimate parameters from data and skeleton result = learner.lg_mle_estimateparams(skel, data) # output print json.dumps(result.Vdata, indent=2)
class PGMLearnerServer(object): def __init__(self): self.learner = PGMLearner() rospy.Service("~discrete/parameter_estimation", DiscreteParameterEstimation, self.discrete_parameter_estimation_cb) rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb) rospy.Service("~discrete/structure_estimation", DiscreteStructureEstimation, self.discrete_structure_estimation_cb) rospy.Service("~linear_gaussian/parameter_estimation", LinearGaussianParameterEstimation, self.lg_parameter_estimation_cb) rospy.Service("~linear_gaussian/structure_estimation", LinearGaussianStructureEstimation, self.lg_structure_estimation_cb) def discrete_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.discrete_mle_estimateparams(skel, data) return DiscreteParameterEstimationResponse( U.discrete_nodes_to_ros(res.Vdata)) def discrete_query_cb(self, req): nd = U.discrete_nodedata_from_ros(req.nodes) skel = U.graph_skeleton_from_node_data(nd) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) q = {n: nd.Vdata[n]["vals"] for n in req.query} ev = {ns.node: ns.state for ns in req.evidence} rospy.loginfo("resolving query %s with evidence %s" % (q, ev)) ans = fn.condprobve(query=q, evidence=ev) rospy.loginfo("%s -> %s" % (ans.scope, ans.vals)) res = DiscreteQueryResponse() node = DiscreteNode() node.name = ans.scope[0] node.outcomes = q[node.name] node.CPT.append(ConditionalProbability(node.outcomes, ans.vals)) res.nodes.append(node) return res def discrete_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] pvalparam = 0.05 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.indegree != 0: indegree = req.indegree res = self.learner.discrete_constraint_estimatestruct( states, pvalparam=pvalparam, indegree=indegree) return DiscreteStructureEstimationResponse( U.graph_skeleton_to_ros(res)) def lg_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.lg_mle_estimateparams(skel, data) rospy.logdebug("parameter estimation: %s" % res.Vdata) return LinearGaussianParameterEstimationResponse( U.linear_gaussian_nodes_to_ros(res.Vdata)) def lg_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] rospy.logdebug(states) pvalparam = 0.05 # default value bins = 10 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.bins != 0: bins = req.bins if req.indegree != 0: indegree = req.indegree rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" % (bins, pvalparam, indegree)) res = self.learner.lg_constraint_estimatestruct(states, pvalparam=pvalparam, bins=bins, indegree=indegree) rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") rospy.logdebug(res.V) rospy.logdebug(res.E) return LinearGaussianStructureEstimationResponse( U.graph_skeleton_to_ros(res))
class PGMTrainer(Trainer): def __init__(self, module, dataset=None): Trainer.__init__(self, module) #self.setData(dataset) self.ds = dataset self.learner = PGMLearner() def train(self): """Train the associated module for one epoch.""" assert len(self.ds) > 0, "Dataset cannot be empty." gbds = [] ds2 = [] for seq in self.ds: for state_, action_, reward_ in seq: #sample = dict(theta=state_[0],thetaV=state_[1],s=state_[2],sV=state_[3],Action=action_[0],Reward=reward_[0]) sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0]) #print state_, action_, reward_ # sample = dict(StateA=state_[0],StateB=state_[2],StateC=state_[1],StateD=state_[3],Action=action_[0],Reward=reward_[0]) #sample = dict(theta=state_[0],thetaPrime=state_[2],s=state_[1],sPrime=state_[3],Action=action_[0],Reward=reward_[0]) if sample["Reward"] >= 0: gbds.append(sample) if sample["Reward"] == -1: ds2.append(sample) #print sample["Reward"] # sort samples for highest reward # bdss = sorted(gbds, key=lambda tup: tup["Reward"],reverse=True) # #print "BDS: " #print json.dumps(gbds, indent=2) # print "BDSS: " # print json.dumps(bdss, indent=2) #tokeep = bdss[:max(2,len(bdss)/2)] #print bds # estimate parameters # print "data size: ", len(bds), len(gbds) N = 200 if len(gbds) < N: l = N - len(gbds) n = len(ds2) t = len(ds2[n-l:]) gbds.extend(ds2[n-l:]) print "ds:, ", len(gbds), len(ds2) if len(gbds) < 100: # print "burn" self.module.burn = True return else: self.module.burn = False if len(gbds) < 5: #there was no rewarding action, so nothing to learn self.module.burn = True return N = 200 if len(gbds) > N: #only take the newest N samples l = len(gbds) gbds = gbds[l-N:] # print "new effective set", len(gbds) skel = GraphSkeleton() #load network topology skel.load("net2.txt") # skel.load("workfile") skel.toporder() # estimate parameters self.module.net = self.learner.lg_mle_estimateparams(skel, gbds)