예제 #1
0
def learn_net(data):
    '''learns Bayes net on raw data'''
    data_dict = data.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net
예제 #2
0
def learn_net_discretize(data, vars_to_discretize, n_bins):
    '''learn Bayes net after selected variables have been discretized'''
    data_subset, bins = discretize(data, vars_to_discretize, n_bins=n_bins)
    data_dict = data_subset.to_dict('records')
    learner = PGMLearner()
    skel = learner.discrete_constraint_estimatestruct(data=data_dict,indegree=1)
    skel.toporder()
    disc_bayes_net = learner.discrete_mle_estimateparams(graphskeleton=skel,data=data_dict)
    return disc_bayes_net, bins
def createData():
   nd = NodeData()
   skel = GraphSkeleton()
   fpath = "job_interview.txt"
   nd.load(fpath)
   skel.load(fpath)
   skel.toporder()
   bn = DiscreteBayesianNetwork(skel, nd)

   learner = PGMLearner()
   data = bn.randomsample(1000)
   X, Y = 'Grades', 'Offer'
   c,p,w=learner.discrete_condind(data, X, Y, ['Interview'])
   print "independence between X and Y: ", c, " p-value ", p, " witness node: ", w
   result = learner.discrete_constraint_estimatestruct(data)
   print result.E
예제 #4
0
class TestPGMLearner(unittest.TestCase):

    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
예제 #5
0
class TestPGMLearner(unittest.TestCase):
    
    def setUp(self):
        # instantiate learner
        self.l = PGMLearner()

        # generate graph skeleton
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()

        # generate sample sequence to try to learn from - discrete
        nd = NodeData()
        nd.load("unittestdict.txt")
        self.samplediscbn = DiscreteBayesianNetwork(skel, nd)
        self.samplediscseq = self.samplediscbn.randomsample(5000)

        # generate sample sequence to try to learn from - discrete
        nda = NodeData()
        nda.load("unittestlgdict.txt")
        self.samplelgbn = LGBayesianNetwork(skel, nda)
        self.samplelgseq = self.samplelgbn.randomsample(10000)

        self.skel = skel

    def test_discrete_mle_estimateparams(self):
        result = self.l.discrete_mle_estimateparams(self.skel, self.samplediscseq)
        indexa = result.Vdata['SAT']['vals'].index('lowscore')
        self.assertTrue(result.Vdata['SAT']['cprob']["['low']"][indexa] < 1 and result.Vdata['SAT']['cprob']["['low']"][indexa] > .9)
        indexb = result.Vdata['Letter']['vals'].index('weak')
        self.assertTrue(result.Vdata['Letter']['cprob']["['A']"][indexb] < .15 and result.Vdata['Letter']['cprob']["['A']"][indexb] > .05)

    def test_lg_mle_estimateparams(self):
        result = self.l.lg_mle_estimateparams(self.skel, self.samplelgseq)
        self.assertTrue(result.Vdata['SAT']['mean_base'] < 15 and result.Vdata['SAT']['mean_base'] > 5)
        self.assertTrue(result.Vdata['Letter']['variance'] < 15 and result.Vdata['Letter']['variance'] > 5)

    def test_discrete_constraint_estimatestruct(self):
        result = self.l.discrete_constraint_estimatestruct(self.samplediscseq)
        self.assertTrue(["Difficulty", "Grade"] in result.E)

    def test_lg_constraint_estimatestruct(self):
        result = self.l.lg_constraint_estimatestruct(self.samplelgseq)
        self.assertTrue(["Intelligence", "Grade"] in result.E)

    def test_discrete_condind(self):
        chi, pv, witness = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Letter", ["Grade"])
        self.assertTrue(pv > .05)
        self.assertTrue(witness, ["Grade"])
        chia, pva, witnessa = self.l.discrete_condind(self.samplediscseq, "Difficulty", "Intelligence", [])  
        self.assertTrue(pva < .05)

    def test_discrete_estimatebn(self):
        result = self.l.discrete_estimatebn(self.samplediscseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Difficulty"]["cprob"][0])

    def test_lg_estimatebn(self):
        result = self.l.lg_estimatebn(self.samplelgseq)
        self.assertTrue(result.V)
        self.assertTrue(result.E)
        self.assertTrue(result.Vdata["Intelligence"]["mean_base"])
예제 #6
0
@author: himanshu
'''
import json
from networkx import DiGraph, draw
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner
import matplotlib.pyplot as plt
from data_extractor import DataExtractor


#  generate some data to use
data_ext = DataExtractor('genome', format = 'json')
data = data_ext.get_data_vectors()
print 'Got data with ', len(data), ' vectors'
#  instantiate my learner
learner = PGMLearner()

print 'learning the structure'
#  estimate structure
result = learner.discrete_constraint_estimatestruct(data, pvalparam = 0.02)

#  output
print json.dumps(result.E, indent = 2)
graph = DiGraph()
graph.add_edges_from(result.E)
draw(graph)
plt.show()
예제 #7
0
result = learner.discrete_mle_estimateparams(skel, data)

# output - toggle comment to see
#print json.dumps(result.Vdata, indent=2)

# (9) -------------------------------------------------------------------------
# Learn the structure of a discrete Bayesian network, given only data:

# say I have some data
data = bn.randomsample(2000)

# instantiate my learner
learner = PGMLearner()

# estimate parameters
result = learner.discrete_constraint_estimatestruct(data)

# output - toggle comment to see
#print json.dumps(result.E, indent=2)

# (10) -----------------------------------------------------------------------
# Learn the structure of a linear Gaussian Bayesian network, given data and a
# structure

# say I have some data
data = lgbn.randomsample(200)

# and a graphskeleton
skel = GraphSkeleton()
skel.load("../tests/unittestdict.txt")
예제 #8
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.pgmlearner import PGMLearner

# generate some data to use
nd = NodeData()
nd.load("grades.txt")    # an input file
skel = GraphSkeleton()
skel.load("grades.txt")
skel.toporder()
bn = DiscreteBayesianNetwork(skel, nd)
data = bn.randomsample(80000)

# instantiate my learner 
learner = PGMLearner()

# estimate structure
result = learner.discrete_constraint_estimatestruct(data)

# output
print json.dumps(result.E, indent=2)
예제 #9
0
class PGMLearnerServer(object):
    def __init__(self):
        self.learner = PGMLearner()
        rospy.Service("~discrete/parameter_estimation",
                      DiscreteParameterEstimation,
                      self.discrete_parameter_estimation_cb)
        rospy.Service("~discrete/query", DiscreteQuery, self.discrete_query_cb)
        rospy.Service("~discrete/structure_estimation",
                      DiscreteStructureEstimation,
                      self.discrete_structure_estimation_cb)
        rospy.Service("~linear_gaussian/parameter_estimation",
                      LinearGaussianParameterEstimation,
                      self.lg_parameter_estimation_cb)
        rospy.Service("~linear_gaussian/structure_estimation",
                      LinearGaussianStructureEstimation,
                      self.lg_structure_estimation_cb)

    def discrete_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.discrete_mle_estimateparams(skel, data)
        return DiscreteParameterEstimationResponse(
            U.discrete_nodes_to_ros(res.Vdata))

    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res

    def discrete_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        pvalparam = 0.05  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.indegree != 0:
            indegree = req.indegree
        res = self.learner.discrete_constraint_estimatestruct(
            states, pvalparam=pvalparam, indegree=indegree)
        return DiscreteStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))

    def lg_parameter_estimation_cb(self, req):
        skel = U.graph_skeleton_from_ros(req.graph)
        skel.toporder()
        data = U.graph_states_dict_from_ros(req.states)
        res = self.learner.lg_mle_estimateparams(skel, data)
        rospy.logdebug("parameter estimation: %s" % res.Vdata)
        return LinearGaussianParameterEstimationResponse(
            U.linear_gaussian_nodes_to_ros(res.Vdata))

    def lg_structure_estimation_cb(self, req):
        states = [{ns.node: ns.state
                   for ns in s.node_states} for s in req.states]
        rospy.logdebug(states)
        pvalparam = 0.05  # default value
        bins = 10  # default value
        indegree = 1  # default value
        if req.pvalparam != 0.0:
            pvalparam = req.pvalparam
        if req.bins != 0:
            bins = req.bins
        if req.indegree != 0:
            indegree = req.indegree
        rospy.logdebug("bins: %d, pvalparam: %f, indegree: %d" %
                       (bins, pvalparam, indegree))
        res = self.learner.lg_constraint_estimatestruct(states,
                                                        pvalparam=pvalparam,
                                                        bins=bins,
                                                        indegree=indegree)
        rospy.logdebug("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        rospy.logdebug(res.V)
        rospy.logdebug(res.E)
        return LinearGaussianStructureEstimationResponse(
            U.graph_skeleton_to_ros(res))
예제 #10
0
learner_struc = PGMLearner()

#load data and tranform it to a list of dictionaries
data = format_data(df_train)

# This method learns a Bayesian network structure from discrete data given
# by data, using constraint-based approaches. The function calls discrete_condind 
# (voir ci-dessous) to determine the dependencies between variables.
# Possible params are:
# * pvalparam is te value of the p-value used to determine whether two variables 
# are conditionally indep.(This is obviously necessary to find the net structure).
# * indegree = is used to determine the size of the set of variables used to find dependencies
# (basically the "witness" variables, this will determine the size of the array passed in the
# third argument of the discrete_condind call). 

result_structure = learner_struc.discrete_constraint_estimatestruct(data, indegree=1,pvalparam=0.05)

# The result if always the same for any value of indegree
# result is stable for smaller values of 0.05

#The resulting structure is the identical
result_structure.getchildren('Fare'), result_structure.getchildren('Class')
result_structure.E


# In[ ]:


# We can thus use the skeleton defined before in jsonpath_skel to learn params
#