Exemple #1
0
    def specificquery(self, query, evidence):
        '''
        .. note: Shortcut method to the *specificquery* method in :doc:`tablecpdfactorization`

        Eliminate all variables except for the ones specified by *query*. Adjust all distributions to reflect *evidence*. Return the entry that matches the exact probability of a specific event, as specified by *query*.
        
        Arguments:
            1. *query* -- A dict containing (key: value) pairs reflecting (variable: value) that represents what outcome to calculate the probability of. The value of the query is a list of one or more values that can be taken by the variable.
            2. *evidence* -- A dict containing (key: value) pairs reflecting (variable: value) evidence that is known about the system. 
                    
        Returns:
            - the probability that the event (or events) specified will occur, represented as a float between 0 and 1.

        Note that in this function, queries of the type P((x=A or x=B) and (y=C or y=D)) are permitted. They are executed by formatting the *query* dictionary like so::

            {
                "x": ["A", "B"],
                "y": ["C", "D"]
            }

        '''
        # validate
        if not (hasattr(self, "V") and hasattr(self, "E") and hasattr(self, "Vdata")):
            raise notloadedError("Bayesian network is missing essential attributes")
        assert isinstance(query, dict) and isinstance(evidence, dict), "query and evidence must be dicts"
        for k in query.keys():
            assert isinstance(query[k], list), "the values of your query must be lists, even if singletons" 

        # calculate
        fn = TableCPDFactorization(self)
        return fn.specificquery(query, evidence)
Exemple #2
0
def recur(sc, temp, number, bn, val, jp):
    if number != 0:
        for i in range(2):
            sc[bn.V[len(bn.V) - number]] = val[i]
            recur(sc, temp, number - 1, bn, val, jp)
    else:
        result = []
        p = 1
        temp = []
        for j in range(len(bn.V)):
            pa = bn.Vdata[bn.V[j]]['parents']
            if pa:
                fn = TableCPDFactorization(bn)
                evidence = {}
                for k in range(len(pa)):
                    evidence[pa[k]] = sc[pa[k]]
                query = {bn.V[j]: list(sc[bn.V[j]])}
                result.append(fn.specificquery(query, evidence))
            else:
                if sc[bn.V[j]] == '0':
                    result.append(bn.Vdata[bn.V[j]]['cprob'][0])
                else:
                    result.append(bn.Vdata[bn.V[j]]['cprob'][1])
            temp.append(sc[bn.V[j]])
            p = p * result[j]
        temp.append(p)
        jp.append(temp)
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
Exemple #4
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     self.fn = TableCPDFactorization(self.bn)
Exemple #5
0
def estimate_distrib(skel, samples, query, evidence):
    learner = PGMLearner()
    bayesnet = learner.discrete_mle_estimateparams(skel, samples)
    tablecpd = TableCPDFactorization(bayesnet)
    fac = tablecpd.condprobve(query, evidence)
    df2 = printdist(fac, bayesnet)
    return df2
Exemple #6
0
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
Exemple #7
0
def inferCustomerClasses(param_file, evidence_dir, year):
    """
    This function uses the variable elimination algorithm from libpgm to infer the customer class of each AnswerID, given the evidence presented in the socio-demographic survey responses. 
    
    It returns a tuple of the dataframe with the probability distribution over all classes for each AnswerID and the BN object.
    """
    bn = loadbn(param_file)
    evidence, a_id = readEvidence(year, evidence_dir)
    query = {"customer_class": ''}

    cols = bn.Vdata.get('customer_class')['vals']
    result = pd.DataFrame(
        columns=cols
    )  #create empty dataframe in which to store inferred probabilities

    count = 0  #set counter
    for e in evidence:
        bn = loadbn(param_file)
        fn = TableCPDFactorization(bn)
        try:
            inf = fn.condprobve(query, e)
            classprobs = list(inf.vals)
            result.loc[count] = classprobs
            count += 1
        except:
            result.loc[count] = [None] * len(cols)
            count += 1

    result['AnswerID'] = a_id
    result.set_index(keys='AnswerID', inplace=True)

    return result
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"],
              ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A","B","C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
Exemple #9
0
    def infer(self, sensor_evidence, fsm_evidence):
        # sensor values are always True; their proxy nodes encode the real probability
        evidence = dict(fsm_evidence)
        evidence.update({k: "T" for k in sensor_evidence})

        # update probability of proxy nodes
        for sensor, p in sensor_evidence.iteritems():
            self.net.Vdata[sensor]["cprob"] = {
                "['T']": [p, 1 - p],
                "['F']": [(1 - p), p]
            }

        # refactorize
        fn = TableCPDFactorization(self.net)
        events = []

        for name, output in self.outputs.iteritems():
            fn.refresh()
            query = {}

            for q in output["query"]:
                if is_negated(q):
                    query[normalise_name(q)] = ['F']
                else:
                    query[normalise_name(q)] = ['T']

            prob = result = fn.specificquery(query, evidence)
            ev = output["event"]
            formatted_query = " AND ".join(query)
            # logging.debug("Query p(%s)=%.8f; need p(%s)>%.8f to trigger event %s/%s" % (formatted_query, prob, formatted_query, 1-np.exp(ev["logp"]), ev.get("fsm", None), ev["event"]))

            logger.info(json.dumps({ \
                'type' : 'query',
                'query' : formatted_query,
                'value' : '%.8f' % prob,
                'threshold' : '%.8f' % (1-np.exp(ev['logp'])),
                'fsm' : ev.get("fsm", None),
                'event' : ev['event']
            }))

            if prob > (1 - np.exp(ev["logp"])) + self.event_caution:
                #logging.debug("Fired event %s/%s" % (ev.get("fsm", None), ev["event"]))
                logger.info(
                    json.dumps({
                        'type': 'fire_event',
                        'fsm': ev.get("fsm", None),
                        'event': ev['event']
                    }))

                # generate event
                events.append({
                    "fsm": ev.get("fsm", None),
                    "event": ev["event"]
                })

        return events
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A", "B", "C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
Exemple #11
0
def calc_BNprob(df_test):
    
    result = pd.Series()
    
    for row in df_test.itertuples():
        tablecpd=TableCPDFactorization(bn)
        prob_surv = tablecpd.specificquery(dict(Surv='1'), dict(Fare=str(row.Fare) , Sex=str(row.Sex) , Class=str(row.Pclass) ))

        if prob_surv >= 0.5:
            surv_class = 1
        else:
            surv_class  = 0        
        result = result.append(pd.Series([surv_class]), ignore_index = True )
    return result
Exemple #12
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
Exemple #13
0
class TestSampleAggregator(unittest.TestCase):
    
    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        agg = SampleAggregator()
        agg.aggregate(self.bn.randomsample(50))
        self.rseq = agg.seq
        self.ravg = agg.avg
        self.fn = TableCPDFactorization(self.bn)
        evidence = dict(Letter='weak')
        agg.aggregate(self.fn.gibbssample(evidence, 51))
        self.gseq = agg.seq
        self.gavg = agg.avg
        
    def test_rseq(self):
        self.assertTrue(len(self.rseq) == 50)
        for key in self.ravg.keys():
            summ = 0 
            for entry in self.ravg[key].keys():
                summ += self.ravg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
            
    def test_gseq(self):
        self.assertTrue(len(self.gseq) == 51)
        for key in self.gavg.keys():
            summ = 0 
            for entry in self.gavg[key].keys():
                summ += self.gavg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
Exemple #14
0
class TestSampleAggregator(unittest.TestCase):

    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        agg = SampleAggregator()
        agg.aggregate(self.bn.randomsample(50))
        self.rseq = agg.seq
        self.ravg = agg.avg
        self.fn = TableCPDFactorization(self.bn)
        evidence = dict(Letter='weak')
        agg.aggregate(self.fn.gibbssample(evidence, 51))
        self.gseq = agg.seq
        self.gavg = agg.avg

    def test_rseq(self):
        self.assertTrue(len(self.rseq) == 50)
        for key in self.ravg.keys():
            summ = 0
            for entry in self.ravg[key].keys():
                summ += self.ravg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)

    def test_gseq(self):
        self.assertTrue(len(self.gseq) == 51)
        for key in self.gavg.keys():
            summ = 0
            for entry in self.gavg[key].keys():
                summ += self.gavg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
Exemple #15
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     self.fn = TableCPDFactorization(self.bn)
Exemple #16
0
def simple_graph(pz, px1gz, px2gz):
    pgm = DiscretePGM()
    pgm.addNode('Z', [0, 1], None, [1 - pz, pz])
    pgm.addNode('X1', [0, 1], ['Z'], cpd(px1gz))
    pgm.addNode('X2', [0, 1], ['Z'], cpd(px2gz))
    model = pgm.construct()
    factorization = TableCPDFactorization(model)
    return factorization
def classify(evidence,bn):
    #q1=dict(Speed=evidence['Speed'])
    q2=dict(Volume=evidence['Volume'])
   # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)#t
    result=fn.condprobve(q2,evidence)#t
    mx=max(result.vals)
    indx=result.vals.index(mx)
    vl=bn.Vdata['Volume']['vals'][indx]
    return [0,vl]
Exemple #18
0
def classify(evidence, bn):
    #q1=dict(Speed=evidence['Speed'])
    q2 = dict(Volume=evidence['Volume'])
    # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)  #t
    result = fn.condprobve(q2, evidence)  #t
    mx = max(result.vals)
    indx = result.vals.index(mx)
    vl = bn.Vdata['Volume']['vals'][indx]
    return [0, vl]
Exemple #19
0
def getTableCPD():
    nd = NodeData()
    skel = GraphSkeleton()
    jsonpath = ""
    nd.load(jsonpath)
    skel.load(jsonpath)
    bn = DiscreteBayesianNetwork(skel, nd)
    tablecpd = TableCPDFactorization(bn)
    return tablecpd
def getTableCPD():
    nd = NodeData()
    skel = GraphSkeleton()
    jsonpath = "./graph/graph_example.txt"
    nd.load(jsonpath)
    skel.load(jsonpath)
    # load Bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)
    tablecpd = TableCPDFactorization(bn)
    return tablecpd
Exemple #21
0
    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res
Exemple #22
0
Fichier : pgm.py Projet : ml4ai/b3
 def setup(self):
     self.nd = NodeData()
     self.skel = GraphSkeleton()
     self.skel.V, self.skel.E = [], []
     self.nd.Vdata = {}
     for i, node in enumerate(self.node.values()):
         dNode = {}
         node.sId = str(i)
         dNode["numoutcomes"] = len(node.values)
         dNode["vals"] = node.values
         dNode["cprob"] = node.cpt
         #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
         self.skel.V.append(node.name)
         aParents = []
         for parent in node.parents:
             if parent == None: continue
             aParents.append(parent.name)
             self.skel.E.append([parent.name, node.name])
         dNode["parents"] = aParents if len(aParents) > 0 else None
         self.nd.Vdata[node.name] = dNode
     self.skel.toporder()
     self.bn = DiscreteBayesianNetwork(self.skel, self.nd)
     self.fn = TableCPDFactorization(self.bn)
Exemple #23
0
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
Exemple #24
0
def inferPosteriorDistribution(
        queries, evidence,
        baynet):  # TODO: extend to handle multiple query nodes

    fn = TableCPDFactorization(baynet)

    # result = fn.condprobve(query, evidence) #from library
    result = condprobve2(fn, queries, evidence)  # written here
    print 'result.vals ', result.vals
    probabilities = printdist(result, baynet)
    # for index,key in queries:
    probabilities.sort_values(
        ['max_def'],
        inplace=True)  # make sure probabilities are listed in order of bins

    return probabilities
Exemple #25
0
def confounded_graph(n):
    epsilon = 0.4
    pZ = .5  #P(Z = 1)
    pX0 = .1  #P(X0 = 1) must be <= .5
    pXgivenZ = [.4, .3]  #P(X=1|Z=0),P(X=1|Z=1)
    pYgivenX0 = [
        .5 - pX0 / (1.0 - pX0) * epsilon,
        .5 + epsilon,
    ]  #P(Y = 1|X0)

    pgm = DiscretePGM()
    pgm.addNode('Z', [0, 1], None, [1 - pZ, pZ])
    pgm.addNode('X0', [0, 1], None, [1 - pX0, pX0])
    for i in range(1, n):
        pgm.addNode('X' + str(i), [0, 1], ['Z'], cpd(pXgivenZ))
    pgm.addNode('Y', [0, 1], ['X0'], cpd(pYgivenX0))
    model = pgm.construct()
    factorization = TableCPDFactorization(model)
    return model, factorization
Exemple #26
0
    temp.append(float(max(list))/3)
    temp.append(float(max(list))/3*2)
    return temp
    
EachLikeThreshold = Threshold(EachLike) 
EachLikedThreshold = Threshold(EachLiked)
print EachLikeThreshold
print EachLikedThreshold

BulliedPro = []
nd = NodeData()
skel = GraphSkeleton()
nd.load('unittestdict.txt')
skel.load('unittestdict.txt')
bn = DiscreteBayesianNetwork(skel, nd)
fn = TableCPDFactorization(bn)

for i in range(len(EachLike)):
    evidence = {}
    if EachLike[i] <= EachLikeThreshold[0]:
        evidence['LikeN'] = 'Small'
    elif EachLikeThreshold[0] < EachLike[i] and EachLike[i] <= EachLikeThreshold[1]:
        evidence['LikeN'] = 'Mid'
    else:
        evidence['LikeN'] = 'Big'
    if EachLiked[i] <= EachLikedThreshold[0]:
        evidence['LikedN'] = 'Small'
    elif EachLikedThreshold[0] < EachLiked[i] and EachLiked[i] <= EachLikedThreshold[1]:
        evidence['LikedN'] = 'Mid'
    else:
        evidence['LikedN'] = 'Big'  
lgbn = LGBayesianNetwork(skel, nd)


text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)


asciiData=filter(lambda x: x in printable, data)
listofDicts=json.loads(asciiData)



skel = GraphSkeleton()
skel.load("../skeleton.json")


learner = PGMLearner()


result = learner.discrete_mle_estimateparams(skel, listofDicts)


tcf=TableCPDFactorization(result)


myquery = dict(rating=[5])
myevidence = dict(occupation='student')
res2=tcf.gibbssample(evidence=myevidence,n=3)

print json.dumps(res2, indent=2)
Exemple #28
0
class TestTableCPDFactorization(unittest.TestCase):

    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)

    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))

    def test_refresh(self):
        evidence = dict(Letter='weak')
        query = dict(Intelligence=['high'])
        result1 = self.fn.specificquery(query, evidence)
        self.fn.refresh()
        result2 = self.fn.specificquery(query, evidence)
        self.assertEqual(result1, result2)

    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1
                index = x

        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)

    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs:
            self.assertTrue(entry["Letter"] == 'weak')
Exemple #29
0
jsonpath_node ="titanic_nodes.json"
nd.load(jsonpath_node)
skel.load(jsonpath_skel)

# load bayesian network
bn       = DiscreteBayesianNetwork(skel, nd)

print (skel.getchildren("Class"),skel.getchildren("Sex"),skel.getchildren("Fare"),skel.getchildren("Surv"))
([u'Surv'], [u'Surv'], [u'Class'], [])
# In[ ]:


# We can now start querying our network. We provide a query (first dictionary in the arguments)
# and an evidence (second dictionary in the args))

tablecpd=TableCPDFactorization(bn)
print ("P(Surv=0) = {}".format(tablecpd.specificquery(dict(Surv='0'),dict())))


# In[ ]:


tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict())))

tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='0'))))
tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1'))))
tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 1, Sex = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1' , Sex='0'))))
import json

from libpgm.graphskeleton import GraphSkeleton
from libpgm.nodedata import NodeData
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("grades.txt")
skel.load("grades.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample 
result = fn.gibbssample(evidence, 1000)

# output
print json.dumps(result, indent=2)
skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

#print json.dumps(result.randomsample(10), indent=2)
#print json.dumps(result.Vdata, indent=2)

#nd = NodeData()
#nd.load("../tests/unittestdict.txt")

#evidence = dict(Letter='weak')
tcf = TableCPDFactorization(result)

occupations = [
    'administrator', 'artist'
    'doctor'
    'educator'
    'engineer'
    'entertainment'
    'executive'
    'healthcare'
    'homemaker'
    'lawyer'
    'librarian'
    'marketing'
    'none'
    'other'
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("tests/net1.json")    # any input file
skel.load("tests/net1.json")

# topologically order graphskeleton
skel.toporder()

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

fn = TableCPDFactorization(bn)


# sample 
result = fn.specificquery(dict(C='T'), dict(B='F'))

# output
print json.dumps(result, indent=2)
Exemple #33
0
Fichier : pgm.py Projet : ml4ai/b3
class Graph:
    def __init__(self):
        self.node = dict()
        self.obs = dict()

    def addnode(self, node):
        self.node[node.name] = node

    def removeNode(self, name):
        if self.node.has_key(name):
            del self.node[name]

    def addobs(self, node, value):
        self.obs[node.name] = [node, value]

    def removeObs(self, name):
        if self.obs.has_key(name):
            del self.obs[name]

    def setup(self):
        self.nd = NodeData()
        self.skel = GraphSkeleton()
        self.skel.V, self.skel.E = [], []
        self.nd.Vdata = {}
        for i, node in enumerate(self.node.values()):
            dNode = {}
            node.sId = str(i)
            dNode["numoutcomes"] = len(node.values)
            dNode["vals"] = node.values
            dNode["cprob"] = node.cpt
            #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
            self.skel.V.append(node.name)
            aParents = []
            for parent in node.parents:
                if parent == None: continue
                aParents.append(parent.name)
                self.skel.E.append([parent.name, node.name])
            dNode["parents"] = aParents if len(aParents) > 0 else None
            self.nd.Vdata[node.name] = dNode
        self.skel.toporder()
        self.bn = DiscreteBayesianNetwork(self.skel, self.nd)
        self.fn = TableCPDFactorization(self.bn)

#     def setup(self):
#         self.nd = NodeData();
#         self.skel = GraphSkeleton();
#         self.skel.V, self.skel.E = [], [];
#         self.nd.Vdata = {};
#         for i,node in enumerate(self.node.values()):
#             dNode = {};
#             node.sId = str(i);
#             dNode["numoutcomes"] = len(node.values);
#             dNode["vals"] = node.values;
#             dNode["cprob"] = node.cpt;
# #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
#             self.skel.V.append(node.name);
#             aParents = [];
#             for parent in node.parents:
#                 if parent==None: continue;
#                 aParents.append(parent.name);
#                 self.skel.E.append([parent.name, node.name]);
#             dNode["parents"] = aParents if len(aParents)>0 else None;
#             self.nd.Vdata[node.name] = dNode;
#         self.skel.toporder();
#         self.bn = DiscreteBayesianNetwork(self.skel, self.nd);
#         self.fn = TableCPDFactorization(self.bn);

    def getPost(self, query, evidence):
        result = self.fn.specificquery(query, evidence)
        return result

    def write2dot(self, fname="graph.dot"):
        f = open(fname, "w")
        f.write("digraph G {\n")
        f.write("node[shape=circle, width=0.4];\n")
        for node in self.node.values():
            l = "\"" + node.name + "\""
            f.write(node.sId)
            if node in map(lambda x: x[0], self.obs):
                f.write("[label=" + l + ",style=filled,color=blue]")
            else:
                f.write("[label=" + l + "]")
            f.write(";\n")
            for parent in node.parents:
                if parent == None: continue
                f.write(parent.sId + " -> " + node.sId + ";\n")
        f.write("}")
        f.close()

    def write2pdf(self, fname="graph.pdf"):
        if ".pdf" in fname:
            fname = fname[:-4]
        pdfFile = fname + ".pdf"
        dotFile = fname + ".dot"
        self.write2dot(dotFile)
        call(['dot', '-Tpdf', dotFile, '-o', pdfFile])
Exemple #34
0
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample 
result = fn.gibbssample(evidence, 10)

# output - toggle comment to see
#print json.dumps(result, indent=2)

# (5) --------------------------------------------------------------------------
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")
Exemple #35
0
def inference(bn, evidence):
  fn = TableCPDFactorization(bn)
  result = fn.gibbssample(evidence, GIBBS_ITERATIONS)
  agg = SampleAggregator()
  result = agg.aggregate(result)
  return json.dumps(result, indent=2)
Exemple #36
0
class TestTableCPDFactorization(unittest.TestCase):
    
    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)
    
    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))
    
    def test_refresh(self):
        self.fn.refresh()
        for x in range(5):
            self.assertTrue(isinstance(self.fn.factorlist[x], TableCPDFactor))
                            
    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1 
                index = x
                
        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
    
    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
        
    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)
    
    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs: 
            self.assertTrue(entry["Letter"] == 'weak')
Exemple #37
0
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
print skel.toporder()

# load evidence
evidence = {"Intelligence": "high"}
query = {"Grade": "A"}

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# # calculate probability distribution
# result = fn.condprobve(query, evidence)
# print json.dumps(result.vals, indent=2)
# print json.dumps(result.scope, indent=2)
# print json.dumps(result.card, indent=2)
# print json.dumps(result.stride, indent=2)

result = fn.specificquery(query, evidence)
print result
Exemple #38
0
 clean = int(testdf.iloc[i]["clean"])
 # # small = int(testdf.iloc[i]["small"])
 bad = int(testdf.iloc[i]["bad"])
 old = int(testdf.iloc[i]["old"])
 Rooms = int(testdf.iloc[i]["Rooms"])
 Location = int(testdf.iloc[i]["Location"])
 Service = int(testdf.iloc[i]["Service"])
 Cleanliness = int(testdf.iloc[i]["Cleanliness"])
 #Checkin = int(testdf.iloc[i]["Checkin"])
 #Businessservice = int(testdf.iloc[i]["Businessservice"])
 Value = int(testdf.iloc[i]["Value"])
 Overall = int(testdf.iloc[i]["Overall"])
 #append the overall score to the target list
 target.append(Overall)
 #getting all cpt from our model
 a = TableCPDFactorization(res)
 #compute the query and evidences as dicts
 query = dict(Overall=Overall)
 evidence = dict(Service=Service,
                 Location=Location,
                 Cleanliness=Cleanliness,
                 Value=Value,
                 bad=bad,
                 Rooms=Rooms,
                 old=old,
                 good=good,
                 great=great,
                 comfortable=comfortable)
 #Checkin=Checkin,Businessservice=Businessservice
 #run the query given evidence
 result = a.condprobve(query, evidence)
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#print asciiData
#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

#print listofDicts[0]

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)
print tcf
text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)
asciiData=filter(lambda x: x in printable, data)

listofDicts=json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf=TableCPDFactorization(result)

#Rating 1 Given Genre  is Drama
myquery = dict(rating=[1])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result


tcf.refresh()

#Rating 2 Given Genre  is Drama
myquery = dict(rating=[2])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result
Exemple #41
0
text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

#Rating 1 Given Occupation is student
myquery = dict(rating=[1])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result

tcf.refresh()

#Rating 2 Given Occupation is student
myquery = dict(rating=[2])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result
Exemple #42
0
    def __init__(self, nodes):

        self.nodes = {}

        self.children = defaultdict(list)
        self.parents = defaultdict(list)
        self.outputs = {}
        for name, node_spec in nodes.iteritems():
            node_type = node_spec["type"]
            if node_type == "inferred":
                parents = node_spec["parents"]
                # store the relationship between these elements
                for parent in parents:
                    normalised = normalise_name(parent)
                    self.parents[name].append(normalised)
                    self.children[normalised].append(name)
                truth_table = parse_truth_table(node_spec["p"], parents)
                node = make_node(truth_table, parents, node_type)
                self.nodes[name] = node

            if node_type == "fsm_input":
                node = make_node([1.0, 0.0], None, node_type)
                self.nodes[name] = node

            if node_type == "sensor_input":
                proxy_node = make_node([1.0, 0.0], None, "proxy")
                proxy_name = "_proxy_%s" % name
                self.nodes[proxy_name] = proxy_node
                self.children[proxy_name].append(name)
                node = make_node({
                    "['T']": [1.0, 0.0],
                    "['F']": [0.0, 1.0]
                }, [proxy_name], node_type)
                self.nodes[name] = node
            if node_type == "output":
                self.outputs[name] = node_spec

        for node in self.nodes:
            if len(self.children[node]) > 0:
                self.nodes[node]["children"] = self.children[node]
            else:
                self.nodes[node]["children"] = None

        # certainty scaling
        self.event_caution = 0.0

        og = OrderedSkeleton()
        og.V = self.nodes.keys()
        edges = []
        for k, children in self.children.iteritems():
            for child in children:
                edges.append((k, child))

        og.E = edges
        og.toporder()

        nd = NodeData()
        nd.Vdata = self.nodes

        #logging.debug(pprint.pformat(nd.Vdata))

        self.net = DiscreteBayesianNetwork(og, nd)
        self.factor_net = TableCPDFactorization(self.net)
Exemple #43
0
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample
result = fn.gibbssample(evidence, 10)

# output - toggle comment to see
#print json.dumps(result, indent=2)

# (5) --------------------------------------------------------------------------
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")
Exemple #44
0
    #INITIALIZING BN 2
    # load nodedata and graphskeleton
    nd2 = NodeData()
    skel2 = GraphSkeleton()
    nd2.load(path_bn2)
    skel2.load(path_bn2)
    skel2.toporder()  # toporder graph skeleton

    # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1
    # wkday variable query
    evidence1 = dict(wkdayT0=userinput[0])
    for i, item in enumerate(wkdayValsList):
        # loading bayesian network and factorization - needs to be done at every iteration
        bn1 = DiscreteBayesianNetwork(skel1, nd1)
        fn1 = TableCPDFactorization(bn1)
        # setting the query
        query1 = dict(wkdayT1=[item])
        # querying in accordance to the given evidence and appending it to the list of probability of each value
        wkdayProbList.append(fn1.specificquery(query1, evidence1))
        #print "Iteration: " + str(i) + "-> wkdayTO (Input): " + userinput[0] + "; wkdayT1 (Output): " + item + " - prob: " + str(wkdayProbList[i])
    most_probable_wkdayT1 = wkdayValsList[numpy.argmax(wkdayProbList)]

    # hour variable query
    evidence1 = dict(hourT0=userinput[1])
    for i, item in enumerate(hourValsList):
        # loading bayesian network and factorization - needs to be done at every iteration
        bn1 = DiscreteBayesianNetwork(skel1, nd1)
        fn1 = TableCPDFactorization(bn1)
        # setting the query
        query1 = dict(hourT1=[item])
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner

lgbn = LGBayesianNetwork(skel, nd)

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)

asciiData = filter(lambda x: x in printable, data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

myquery = dict(rating=[5])
myevidence = dict(occupation='student')
res2 = tcf.gibbssample(evidence=myevidence, n=3)

print json.dumps(res2, indent=2)