def learn_net(data): '''learns Bayes net on raw data''' data_dict = data.to_dict('records') learner = PGMLearner() skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1) skel.toporder() disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict) return disc_bayes_net
def learn_net_discretize(data, vars_to_discretize, n_bins): '''learn Bayes net after selected variables have been discretized''' data_subset, bins = discretize(data, vars_to_discretize, n_bins=n_bins) data_dict = data_subset.to_dict('records') learner = PGMLearner() skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1) skel.toporder() disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict) return disc_bayes_net, bins
def createData(): nd = NodeData() skel = GraphSkeleton() fpath = "job_interview.txt" nd.load(fpath) skel.load(fpath) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) learner = PGMLearner() data = bn.randomsample(1000) X, Y = 'Grades', 'Offer' c,p,w=learner.discrete_condind(data, X, Y, ['Interview']) print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w result = learner.discrete_constraint_estimatestruct(data) print result.E
class TestPGMLearner(unittest.TestCase): def setUp(self): # instantiate learner self.l = PGMLearner() # generate graph skeleton skel = GraphSkeleton() skel.load("unittestdict.txt") skel.toporder() # generate sample sequence to try to learn from - discrete nd = NodeData() nd.load("unittestdict.txt") self.samplediscbn = DiscreteBayesianNetwork(skel, nd) self.samplediscseq = self.samplediscbn.randomsample(5000) # generate sample sequence to try to learn from - discrete nda = NodeData() nda.load("unittestlgdict.txt") self.samplelgbn = LGBayesianNetwork(skel, nda) self.samplelgseq = self.samplelgbn.randomsample(10000) self.skel = skel def test_discrete_mle_estimateparams(self): result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq) indexa = result.Vdata['SAT']['vals'].index('lowscore') self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9) indexb = result.Vdata['Letter']['vals'].index('weak') self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05) def test_lg_mle_estimateparams(self): result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq) self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5) self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5) def test_discrete_constraint_estimatestruct(self): result = self.l.discrete_constraint_estimatestruct(self.samplediscseq) self.assertTrue(["Difficulty", "Grade"] in result.E) def test_lg_constraint_estimatestruct(self): result = self.l.lg_constraint_estimatestruct(self.samplelgseq) self.assertTrue(["Intelligence", "Grade"] in result.E) def test_discrete_condind(self): chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"]) self.assertTrue(pv > .05) self.assertTrue(witness, ["Grade"]) chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", []) self.assertTrue(pva < .05) def test_discrete_estimatebn(self): result = self.l.discrete_estimatebn(self.samplediscseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Difficulty"]["cprob"][0]) def test_lg_estimatebn(self): result = self.l.lg_estimatebn(self.samplelgseq) self.assertTrue(result.V) self.assertTrue(result.E) self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
@author: himanshu ''' import json from networkx import DiGraph, draw from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner import matplotlib.pyplot as plt from data_extractor import DataExtractor # generate some data to use data_ext = DataExtractor('genome', format = 'json') data = data_ext.get_data_vectors() print 'Got data with ', len(data), ' vectors' # instantiate my learner learner = PGMLearner() print 'learning the structure' # estimate structure result = learner.discrete_constraint_estimatestruct(data, pvalparam = 0.02) # output print json.dumps(result.E, indent = 2) graph = DiGraph() graph.add_edges_from(result.E) draw(graph) plt.show()
result = learner.discrete_mle_estimateparams(skel, data) # output - toggle comment to see #print json.dumps(result.Vdata, indent=2) # (9) ------------------------------------------------------------------------- # Learn the structure of a discrete Bayesian network, given only data: # say I have some data data = bn.randomsample(2000) # instantiate my learner learner = PGMLearner() # estimate parameters result = learner.discrete_constraint_estimatestruct(data) # output - toggle comment to see #print json.dumps(result.E, indent=2) # (10) ----------------------------------------------------------------------- # Learn the structure of a linear Gaussian Bayesian network, given data and a # structure # say I have some data data = lgbn.randomsample(200) # and a graphskeleton skel = GraphSkeleton() skel.load("../tests/unittestdict.txt")
import json from libpgm.nodedata import NodeData from libpgm.graphskeleton import GraphSkeleton from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork from libpgm.pgmlearner import PGMLearner # generate some data to use nd = NodeData() nd.load("grades.txt") # an input file skel = GraphSkeleton() skel.load("grades.txt") skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) data = bn.randomsample(80000) # instantiate my learner learner = PGMLearner() # estimate structure result = learner.discrete_constraint_estimatestruct(data) # output print json.dumps(result.E, indent=2)
class PGMLearnerServer(object): def __init__(self): self.learner = PGMLearner() rospy.Service("~discrete/parameter_estimation", DiscreteParameterEstimation, self.discrete_parameter_estimation_cb) rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb) rospy.Service("~discrete/structure_estimation", DiscreteStructureEstimation, self.discrete_structure_estimation_cb) rospy.Service("~linear_gaussian/parameter_estimation", LinearGaussianParameterEstimation, self.lg_parameter_estimation_cb) rospy.Service("~linear_gaussian/structure_estimation", LinearGaussianStructureEstimation, self.lg_structure_estimation_cb) def discrete_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.discrete_mle_estimateparams(skel, data) return DiscreteParameterEstimationResponse( U.discrete_nodes_to_ros(res.Vdata)) def discrete_query_cb(self, req): nd = U.discrete_nodedata_from_ros(req.nodes) skel = U.graph_skeleton_from_node_data(nd) skel.toporder() bn = DiscreteBayesianNetwork(skel, nd) fn = TableCPDFactorization(bn) q = {n: nd.Vdata[n]["vals"] for n in req.query} ev = {ns.node: ns.state for ns in req.evidence} rospy.loginfo("resolving query %s with evidence %s" % (q, ev)) ans = fn.condprobve(query=q, evidence=ev) rospy.loginfo("%s -> %s" % (ans.scope, ans.vals)) res = DiscreteQueryResponse() node = DiscreteNode() node.name = ans.scope[0] node.outcomes = q[node.name] node.CPT.append(ConditionalProbability(node.outcomes, ans.vals)) res.nodes.append(node) return res def discrete_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] pvalparam = 0.05 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.indegree != 0: indegree = req.indegree res = self.learner.discrete_constraint_estimatestruct( states, pvalparam=pvalparam, indegree=indegree) return DiscreteStructureEstimationResponse( U.graph_skeleton_to_ros(res)) def lg_parameter_estimation_cb(self, req): skel = U.graph_skeleton_from_ros(req.graph) skel.toporder() data = U.graph_states_dict_from_ros(req.states) res = self.learner.lg_mle_estimateparams(skel, data) rospy.logdebug("parameter estimation: %s" % res.Vdata) return LinearGaussianParameterEstimationResponse( U.linear_gaussian_nodes_to_ros(res.Vdata)) def lg_structure_estimation_cb(self, req): states = [{ns.node: ns.state for ns in s.node_states} for s in req.states] rospy.logdebug(states) pvalparam = 0.05 # default value bins = 10 # default value indegree = 1 # default value if req.pvalparam != 0.0: pvalparam = req.pvalparam if req.bins != 0: bins = req.bins if req.indegree != 0: indegree = req.indegree rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" % (bins, pvalparam, indegree)) res = self.learner.lg_constraint_estimatestruct(states, pvalparam=pvalparam, bins=bins, indegree=indegree) rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") rospy.logdebug(res.V) rospy.logdebug(res.E) return LinearGaussianStructureEstimationResponse( U.graph_skeleton_to_ros(res))
learner_struc = PGMLearner() #load data and tranform it to a list of dictionaries data = format_data(df_train) # This method learns a Bayesian network structure from discrete data given # by data, using constraint-based approaches. The function calls discrete_condind # (voir ci-dessous) to determine the dependencies between variables. # Possible params are: # * pvalparam is te value of the p-value used to determine whether two variables # are conditionally indep.(This is obviously necessary to find the net structure). # * indegree = is used to determine the size of the set of variables used to find dependencies # (basically the "witness" variables, this will determine the size of the array passed in the # third argument of the discrete_condind call). result_structure = learner_struc.discrete_constraint_estimatestruct(data, indegree=1,pvalparam=0.05) # The result if always the same for any value of indegree # result is stable for smaller values of 0.05 #The resulting structure is the identical result_structure.getchildren('Fare'), result_structure.getchildren('Class') result_structure.E # In[ ]: # We can thus use the skeleton defined before in jsonpath_skel to learn params #