コード例 #1
0
ファイル: dbn.py プロジェクト: spoilr/ml-profiling
    def specificquery(self, query, evidence):
        '''
        .. note: Shortcut method to the *specificquery* method in :doc:`tablecpdfactorization`

        Eliminate all variables except for the ones specified by *query*. Adjust all distributions to reflect *evidence*. Return the entry that matches the exact probability of a specific event, as specified by *query*.
        
        Arguments:
            1. *query* -- A dict containing (key: value) pairs reflecting (variable: value) that represents what outcome to calculate the probability of. The value of the query is a list of one or more values that can be taken by the variable.
            2. *evidence* -- A dict containing (key: value) pairs reflecting (variable: value) evidence that is known about the system. 
                    
        Returns:
            - the probability that the event (or events) specified will occur, represented as a float between 0 and 1.

        Note that in this function, queries of the type P((x=A or x=B) and (y=C or y=D)) are permitted. They are executed by formatting the *query* dictionary like so::

            {
                "x": ["A", "B"],
                "y": ["C", "D"]
            }

        '''
        # validate
        if not (hasattr(self, "V") and hasattr(self, "E") and hasattr(self, "Vdata")):
            raise notloadedError("Bayesian network is missing essential attributes")
        assert isinstance(query, dict) and isinstance(evidence, dict), "query and evidence must be dicts"
        for k in query.keys():
            assert isinstance(query[k], list), "the values of your query must be lists, even if singletons" 

        # calculate
        fn = TableCPDFactorization(self)
        return fn.specificquery(query, evidence)
コード例 #2
0
ファイル: ch16.py プロジェクト: barryColumbia/ch16-new
def recur(sc, temp, number, bn, val, jp):
    if number != 0:
        for i in range(2):
            sc[bn.V[len(bn.V) - number]] = val[i]
            recur(sc, temp, number - 1, bn, val, jp)
    else:
        result = []
        p = 1
        temp = []
        for j in range(len(bn.V)):
            pa = bn.Vdata[bn.V[j]]['parents']
            if pa:
                fn = TableCPDFactorization(bn)
                evidence = {}
                for k in range(len(pa)):
                    evidence[pa[k]] = sc[pa[k]]
                query = {bn.V[j]: list(sc[bn.V[j]])}
                result.append(fn.specificquery(query, evidence))
            else:
                if sc[bn.V[j]] == '0':
                    result.append(bn.Vdata[bn.V[j]]['cprob'][0])
                else:
                    result.append(bn.Vdata[bn.V[j]]['cprob'][1])
            temp.append(sc[bn.V[j]])
            p = p * result[j]
        temp.append(p)
        jp.append(temp)
コード例 #3
0
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
コード例 #4
0
ファイル: run_unit_tests.py プロジェクト: Anaphory/libpgm
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     self.fn = TableCPDFactorization(self.bn)
コード例 #5
0
ファイル: pgm.py プロジェクト: finnhacks42/causality
def estimate_distrib(skel, samples, query, evidence):
    learner = PGMLearner()
    bayesnet = learner.discrete_mle_estimateparams(skel, samples)
    tablecpd = TableCPDFactorization(bayesnet)
    fac = tablecpd.condprobve(query, evidence)
    df2 = printdist(fac, bayesnet)
    return df2
コード例 #6
0
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
コード例 #7
0
def inferCustomerClasses(param_file, evidence_dir, year):
    """
    This function uses the variable elimination algorithm from libpgm to infer the customer class of each AnswerID, given the evidence presented in the socio-demographic survey responses. 
    
    It returns a tuple of the dataframe with the probability distribution over all classes for each AnswerID and the BN object.
    """
    bn = loadbn(param_file)
    evidence, a_id = readEvidence(year, evidence_dir)
    query = {"customer_class": ''}

    cols = bn.Vdata.get('customer_class')['vals']
    result = pd.DataFrame(
        columns=cols
    )  #create empty dataframe in which to store inferred probabilities

    count = 0  #set counter
    for e in evidence:
        bn = loadbn(param_file)
        fn = TableCPDFactorization(bn)
        try:
            inf = fn.condprobve(query, e)
            classprobs = list(inf.vals)
            result.loc[count] = classprobs
            count += 1
        except:
            result.loc[count] = [None] * len(cols)
            count += 1

    result['AnswerID'] = a_id
    result.set_index(keys='AnswerID', inplace=True)

    return result
コード例 #8
0
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"],
              ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A","B","C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
コード例 #9
0
ファイル: bayes_net.py プロジェクト: johnhw/mgsharedcontrol
    def infer(self, sensor_evidence, fsm_evidence):
        # sensor values are always True; their proxy nodes encode the real probability
        evidence = dict(fsm_evidence)
        evidence.update({k: "T" for k in sensor_evidence})

        # update probability of proxy nodes
        for sensor, p in sensor_evidence.iteritems():
            self.net.Vdata[sensor]["cprob"] = {
                "['T']": [p, 1 - p],
                "['F']": [(1 - p), p]
            }

        # refactorize
        fn = TableCPDFactorization(self.net)
        events = []

        for name, output in self.outputs.iteritems():
            fn.refresh()
            query = {}

            for q in output["query"]:
                if is_negated(q):
                    query[normalise_name(q)] = ['F']
                else:
                    query[normalise_name(q)] = ['T']

            prob = result = fn.specificquery(query, evidence)
            ev = output["event"]
            formatted_query = " AND ".join(query)
            # logging.debug("Query p(%s)=%.8f; need p(%s)>%.8f to trigger event %s/%s" % (formatted_query, prob, formatted_query, 1-np.exp(ev["logp"]), ev.get("fsm", None), ev["event"]))

            logger.info(json.dumps({ \
                'type' : 'query',
                'query' : formatted_query,
                'value' : '%.8f' % prob,
                'threshold' : '%.8f' % (1-np.exp(ev['logp'])),
                'fsm' : ev.get("fsm", None),
                'event' : ev['event']
            }))

            if prob > (1 - np.exp(ev["logp"])) + self.event_caution:
                #logging.debug("Fired event %s/%s" % (ev.get("fsm", None), ev["event"]))
                logger.info(
                    json.dumps({
                        'type': 'fire_event',
                        'fsm': ev.get("fsm", None),
                        'event': ev['event']
                    }))

                # generate event
                events.append({
                    "fsm": ev.get("fsm", None),
                    "event": ev["event"]
                })

        return events
コード例 #10
0
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A", "B", "C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
コード例 #11
0
def calc_BNprob(df_test):
    
    result = pd.Series()
    
    for row in df_test.itertuples():
        tablecpd=TableCPDFactorization(bn)
        prob_surv = tablecpd.specificquery(dict(Surv='1'), dict(Fare=str(row.Fare) , Sex=str(row.Sex) , Class=str(row.Pclass) ))

        if prob_surv >= 0.5:
            surv_class = 1
        else:
            surv_class  = 0        
        result = result.append(pd.Series([surv_class]), ignore_index = True )
    return result
コード例 #12
0
ファイル: run_unit_tests.py プロジェクト: Anaphory/libpgm
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
コード例 #13
0
class TestSampleAggregator(unittest.TestCase):
    
    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        agg = SampleAggregator()
        agg.aggregate(self.bn.randomsample(50))
        self.rseq = agg.seq
        self.ravg = agg.avg
        self.fn = TableCPDFactorization(self.bn)
        evidence = dict(Letter='weak')
        agg.aggregate(self.fn.gibbssample(evidence, 51))
        self.gseq = agg.seq
        self.gavg = agg.avg
        
    def test_rseq(self):
        self.assertTrue(len(self.rseq) == 50)
        for key in self.ravg.keys():
            summ = 0 
            for entry in self.ravg[key].keys():
                summ += self.ravg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
            
    def test_gseq(self):
        self.assertTrue(len(self.gseq) == 51)
        for key in self.gavg.keys():
            summ = 0 
            for entry in self.gavg[key].keys():
                summ += self.gavg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
コード例 #14
0
ファイル: run_unit_tests.py プロジェクト: CyberPoint/libpgm
class TestSampleAggregator(unittest.TestCase):

    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        agg = SampleAggregator()
        agg.aggregate(self.bn.randomsample(50))
        self.rseq = agg.seq
        self.ravg = agg.avg
        self.fn = TableCPDFactorization(self.bn)
        evidence = dict(Letter='weak')
        agg.aggregate(self.fn.gibbssample(evidence, 51))
        self.gseq = agg.seq
        self.gavg = agg.avg

    def test_rseq(self):
        self.assertTrue(len(self.rseq) == 50)
        for key in self.ravg.keys():
            summ = 0
            for entry in self.ravg[key].keys():
                summ += self.ravg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)

    def test_gseq(self):
        self.assertTrue(len(self.gseq) == 51)
        for key in self.gavg.keys():
            summ = 0
            for entry in self.gavg[key].keys():
                summ += self.gavg[key][entry]
            self.assertTrue(summ > .99 and summ < 1.01)
コード例 #15
0
ファイル: run_unit_tests.py プロジェクト: Anaphory/libpgm
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     self.fn = TableCPDFactorization(self.bn)
コード例 #16
0
ファイル: pgm.py プロジェクト: finnhacks42/causality
def simple_graph(pz, px1gz, px2gz):
    pgm = DiscretePGM()
    pgm.addNode('Z', [0, 1], None, [1 - pz, pz])
    pgm.addNode('X1', [0, 1], ['Z'], cpd(px1gz))
    pgm.addNode('X2', [0, 1], ['Z'], cpd(px2gz))
    model = pgm.construct()
    factorization = TableCPDFactorization(model)
    return factorization
コード例 #17
0
def classify(evidence,bn):
    #q1=dict(Speed=evidence['Speed'])
    q2=dict(Volume=evidence['Volume'])
   # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)#t
    result=fn.condprobve(q2,evidence)#t
    mx=max(result.vals)
    indx=result.vals.index(mx)
    vl=bn.Vdata['Volume']['vals'][indx]
    return [0,vl]
コード例 #18
0
def classify(evidence, bn):
    #q1=dict(Speed=evidence['Speed'])
    q2 = dict(Volume=evidence['Volume'])
    # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)  #t
    result = fn.condprobve(q2, evidence)  #t
    mx = max(result.vals)
    indx = result.vals.index(mx)
    vl = bn.Vdata['Volume']['vals'][indx]
    return [0, vl]
コード例 #19
0
ファイル: temp.py プロジェクト: barryColumbia/ch16-new
def getTableCPD():
    nd = NodeData()
    skel = GraphSkeleton()
    jsonpath = ""
    nd.load(jsonpath)
    skel.load(jsonpath)
    bn = DiscreteBayesianNetwork(skel, nd)
    tablecpd = TableCPDFactorization(bn)
    return tablecpd
コード例 #20
0
def getTableCPD():
    nd = NodeData()
    skel = GraphSkeleton()
    jsonpath = "./graph/graph_example.txt"
    nd.load(jsonpath)
    skel.load(jsonpath)
    # load Bayesian network
    bn = DiscreteBayesianNetwork(skel, nd)
    tablecpd = TableCPDFactorization(bn)
    return tablecpd
コード例 #21
0
    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res
コード例 #22
0
ファイル: pgm.py プロジェクト: ml4ai/b3
 def setup(self):
     self.nd = NodeData()
     self.skel = GraphSkeleton()
     self.skel.V, self.skel.E = [], []
     self.nd.Vdata = {}
     for i, node in enumerate(self.node.values()):
         dNode = {}
         node.sId = str(i)
         dNode["numoutcomes"] = len(node.values)
         dNode["vals"] = node.values
         dNode["cprob"] = node.cpt
         #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
         self.skel.V.append(node.name)
         aParents = []
         for parent in node.parents:
             if parent == None: continue
             aParents.append(parent.name)
             self.skel.E.append([parent.name, node.name])
         dNode["parents"] = aParents if len(aParents) > 0 else None
         self.nd.Vdata[node.name] = dNode
     self.skel.toporder()
     self.bn = DiscreteBayesianNetwork(self.skel, self.nd)
     self.fn = TableCPDFactorization(self.bn)
コード例 #23
0
ファイル: run_unit_tests.py プロジェクト: Anaphory/libpgm
 def setUp(self):
     skel = GraphSkeleton()
     skel.load("unittestdict.txt")
     skel.toporder()
     nodedata = NodeData.load("unittestdict.txt")
     self.bn = DiscreteBayesianNetwork(nodedata)
     agg = SampleAggregator()
     agg.aggregate(self.bn.randomsample(50))
     self.rseq = agg.seq
     self.ravg = agg.avg
     self.fn = TableCPDFactorization(self.bn)
     evidence = dict(Letter='weak')
     agg.aggregate(self.fn.gibbssample(evidence, 51))
     self.gseq = agg.seq
     self.gavg = agg.avg
コード例 #24
0
def inferPosteriorDistribution(
        queries, evidence,
        baynet):  # TODO: extend to handle multiple query nodes

    fn = TableCPDFactorization(baynet)

    # result = fn.condprobve(query, evidence) #from library
    result = condprobve2(fn, queries, evidence)  # written here
    print 'result.vals ', result.vals
    probabilities = printdist(result, baynet)
    # for index,key in queries:
    probabilities.sort_values(
        ['max_def'],
        inplace=True)  # make sure probabilities are listed in order of bins

    return probabilities
コード例 #25
0
ファイル: pgm.py プロジェクト: finnhacks42/causality
def confounded_graph(n):
    epsilon = 0.4
    pZ = .5  #P(Z = 1)
    pX0 = .1  #P(X0 = 1) must be <= .5
    pXgivenZ = [.4, .3]  #P(X=1|Z=0),P(X=1|Z=1)
    pYgivenX0 = [
        .5 - pX0 / (1.0 - pX0) * epsilon,
        .5 + epsilon,
    ]  #P(Y = 1|X0)

    pgm = DiscretePGM()
    pgm.addNode('Z', [0, 1], None, [1 - pZ, pZ])
    pgm.addNode('X0', [0, 1], None, [1 - pX0, pX0])
    for i in range(1, n):
        pgm.addNode('X' + str(i), [0, 1], ['Z'], cpd(pXgivenZ))
    pgm.addNode('Y', [0, 1], ['X0'], cpd(pYgivenX0))
    model = pgm.construct()
    factorization = TableCPDFactorization(model)
    return model, factorization
コード例 #26
0
ファイル: k_mean_3.py プロジェクト: yari7852/AI-Final-project
    temp.append(float(max(list))/3)
    temp.append(float(max(list))/3*2)
    return temp
    
EachLikeThreshold = Threshold(EachLike) 
EachLikedThreshold = Threshold(EachLiked)
print EachLikeThreshold
print EachLikedThreshold

BulliedPro = []
nd = NodeData()
skel = GraphSkeleton()
nd.load('unittestdict.txt')
skel.load('unittestdict.txt')
bn = DiscreteBayesianNetwork(skel, nd)
fn = TableCPDFactorization(bn)

for i in range(len(EachLike)):
    evidence = {}
    if EachLike[i] <= EachLikeThreshold[0]:
        evidence['LikeN'] = 'Small'
    elif EachLikeThreshold[0] < EachLike[i] and EachLike[i] <= EachLikeThreshold[1]:
        evidence['LikeN'] = 'Mid'
    else:
        evidence['LikeN'] = 'Big'
    if EachLiked[i] <= EachLikedThreshold[0]:
        evidence['LikedN'] = 'Small'
    elif EachLikedThreshold[0] < EachLiked[i] and EachLiked[i] <= EachLikedThreshold[1]:
        evidence['LikedN'] = 'Mid'
    else:
        evidence['LikedN'] = 'Big'  
lgbn = LGBayesianNetwork(skel, nd)


text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)


asciiData=filter(lambda x: x in printable, data)
listofDicts=json.loads(asciiData)



skel = GraphSkeleton()
skel.load("../skeleton.json")


learner = PGMLearner()


result = learner.discrete_mle_estimateparams(skel, listofDicts)


tcf=TableCPDFactorization(result)


myquery = dict(rating=[5])
myevidence = dict(occupation='student')
res2=tcf.gibbssample(evidence=myevidence,n=3)

print json.dumps(res2, indent=2)
コード例 #28
0
ファイル: run_unit_tests.py プロジェクト: CyberPoint/libpgm
class TestTableCPDFactorization(unittest.TestCase):

    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)

    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))

    def test_refresh(self):
        evidence = dict(Letter='weak')
        query = dict(Intelligence=['high'])
        result1 = self.fn.specificquery(query, evidence)
        self.fn.refresh()
        result2 = self.fn.specificquery(query, evidence)
        self.assertEqual(result1, result2)

    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1
                index = x

        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)

    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs:
            self.assertTrue(entry["Letter"] == 'weak')
コード例 #29
0
jsonpath_node ="titanic_nodes.json"
nd.load(jsonpath_node)
skel.load(jsonpath_skel)

# load bayesian network
bn       = DiscreteBayesianNetwork(skel, nd)

print (skel.getchildren("Class"),skel.getchildren("Sex"),skel.getchildren("Fare"),skel.getchildren("Surv"))
([u'Surv'], [u'Surv'], [u'Class'], [])
# In[ ]:


# We can now start querying our network. We provide a query (first dictionary in the arguments)
# and an evidence (second dictionary in the args))

tablecpd=TableCPDFactorization(bn)
print ("P(Surv=0) = {}".format(tablecpd.specificquery(dict(Surv='0'),dict())))


# In[ ]:


tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict())))

tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='0'))))
tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 1) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1'))))
tablecpd=TableCPDFactorization(bn)
print("P(Surv = 1 | Fare = 1, Sex = 0) = {}".format(tablecpd.specificquery(dict(Surv='1'),dict(Fare='1' , Sex='0'))))
コード例 #30
0
import json

from libpgm.graphskeleton import GraphSkeleton
from libpgm.nodedata import NodeData
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("grades.txt")
skel.load("grades.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample 
result = fn.gibbssample(evidence, 1000)

# output
print json.dumps(result, indent=2)
skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

#print json.dumps(result.randomsample(10), indent=2)
#print json.dumps(result.Vdata, indent=2)

#nd = NodeData()
#nd.load("../tests/unittestdict.txt")

#evidence = dict(Letter='weak')
tcf = TableCPDFactorization(result)

occupations = [
    'administrator', 'artist'
    'doctor'
    'educator'
    'engineer'
    'entertainment'
    'executive'
    'healthcare'
    'homemaker'
    'lawyer'
    'librarian'
    'marketing'
    'none'
    'other'
コード例 #32
0
import json

from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("tests/net1.json")    # any input file
skel.load("tests/net1.json")

# topologically order graphskeleton
skel.toporder()

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

fn = TableCPDFactorization(bn)


# sample 
result = fn.specificquery(dict(C='T'), dict(B='F'))

# output
print json.dumps(result, indent=2)
コード例 #33
0
ファイル: pgm.py プロジェクト: ml4ai/b3
class Graph:
    def __init__(self):
        self.node = dict()
        self.obs = dict()

    def addnode(self, node):
        self.node[node.name] = node

    def removeNode(self, name):
        if self.node.has_key(name):
            del self.node[name]

    def addobs(self, node, value):
        self.obs[node.name] = [node, value]

    def removeObs(self, name):
        if self.obs.has_key(name):
            del self.obs[name]

    def setup(self):
        self.nd = NodeData()
        self.skel = GraphSkeleton()
        self.skel.V, self.skel.E = [], []
        self.nd.Vdata = {}
        for i, node in enumerate(self.node.values()):
            dNode = {}
            node.sId = str(i)
            dNode["numoutcomes"] = len(node.values)
            dNode["vals"] = node.values
            dNode["cprob"] = node.cpt
            #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
            self.skel.V.append(node.name)
            aParents = []
            for parent in node.parents:
                if parent == None: continue
                aParents.append(parent.name)
                self.skel.E.append([parent.name, node.name])
            dNode["parents"] = aParents if len(aParents) > 0 else None
            self.nd.Vdata[node.name] = dNode
        self.skel.toporder()
        self.bn = DiscreteBayesianNetwork(self.skel, self.nd)
        self.fn = TableCPDFactorization(self.bn)

#     def setup(self):
#         self.nd = NodeData();
#         self.skel = GraphSkeleton();
#         self.skel.V, self.skel.E = [], [];
#         self.nd.Vdata = {};
#         for i,node in enumerate(self.node.values()):
#             dNode = {};
#             node.sId = str(i);
#             dNode["numoutcomes"] = len(node.values);
#             dNode["vals"] = node.values;
#             dNode["cprob"] = node.cpt;
# #             dNode["parents"] = map(lambda x: if x=x.name, node.parents);
#             self.skel.V.append(node.name);
#             aParents = [];
#             for parent in node.parents:
#                 if parent==None: continue;
#                 aParents.append(parent.name);
#                 self.skel.E.append([parent.name, node.name]);
#             dNode["parents"] = aParents if len(aParents)>0 else None;
#             self.nd.Vdata[node.name] = dNode;
#         self.skel.toporder();
#         self.bn = DiscreteBayesianNetwork(self.skel, self.nd);
#         self.fn = TableCPDFactorization(self.bn);

    def getPost(self, query, evidence):
        result = self.fn.specificquery(query, evidence)
        return result

    def write2dot(self, fname="graph.dot"):
        f = open(fname, "w")
        f.write("digraph G {\n")
        f.write("node[shape=circle, width=0.4];\n")
        for node in self.node.values():
            l = "\"" + node.name + "\""
            f.write(node.sId)
            if node in map(lambda x: x[0], self.obs):
                f.write("[label=" + l + ",style=filled,color=blue]")
            else:
                f.write("[label=" + l + "]")
            f.write(";\n")
            for parent in node.parents:
                if parent == None: continue
                f.write(parent.sId + " -> " + node.sId + ";\n")
        f.write("}")
        f.close()

    def write2pdf(self, fname="graph.pdf"):
        if ".pdf" in fname:
            fname = fname[:-4]
        pdfFile = fname + ".pdf"
        dotFile = fname + ".dot"
        self.write2dot(dotFile)
        call(['dot', '-Tpdf', dotFile, '-o', pdfFile])
コード例 #34
0
ファイル: examples.py プロジェクト: Anaphory/libpgm
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample 
result = fn.gibbssample(evidence, 10)

# output - toggle comment to see
#print json.dumps(result, indent=2)

# (5) --------------------------------------------------------------------------
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")
コード例 #35
0
ファイル: create_struct.py プロジェクト: spoilr/ml-profiling
def inference(bn, evidence):
  fn = TableCPDFactorization(bn)
  result = fn.gibbssample(evidence, GIBBS_ITERATIONS)
  agg = SampleAggregator()
  result = agg.aggregate(result)
  return json.dumps(result, indent=2)
コード例 #36
0
class TestTableCPDFactorization(unittest.TestCase):
    
    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)
    
    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))
    
    def test_refresh(self):
        self.fn.refresh()
        for x in range(5):
            self.assertTrue(isinstance(self.fn.factorlist[x], TableCPDFactor))
                            
    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1 
                index = x
                
        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
    
    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
        
    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)
    
    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs: 
            self.assertTrue(entry["Letter"] == 'weak')
コード例 #37
0
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
print skel.toporder()

# load evidence
evidence = {"Intelligence": "high"}
query = {"Grade": "A"}

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# # calculate probability distribution
# result = fn.condprobve(query, evidence)
# print json.dumps(result.vals, indent=2)
# print json.dumps(result.scope, indent=2)
# print json.dumps(result.card, indent=2)
# print json.dumps(result.stride, indent=2)

result = fn.specificquery(query, evidence)
print result
コード例 #38
0
 clean = int(testdf.iloc[i]["clean"])
 # # small = int(testdf.iloc[i]["small"])
 bad = int(testdf.iloc[i]["bad"])
 old = int(testdf.iloc[i]["old"])
 Rooms = int(testdf.iloc[i]["Rooms"])
 Location = int(testdf.iloc[i]["Location"])
 Service = int(testdf.iloc[i]["Service"])
 Cleanliness = int(testdf.iloc[i]["Cleanliness"])
 #Checkin = int(testdf.iloc[i]["Checkin"])
 #Businessservice = int(testdf.iloc[i]["Businessservice"])
 Value = int(testdf.iloc[i]["Value"])
 Overall = int(testdf.iloc[i]["Overall"])
 #append the overall score to the target list
 target.append(Overall)
 #getting all cpt from our model
 a = TableCPDFactorization(res)
 #compute the query and evidences as dicts
 query = dict(Overall=Overall)
 evidence = dict(Service=Service,
                 Location=Location,
                 Cleanliness=Cleanliness,
                 Value=Value,
                 bad=bad,
                 Rooms=Rooms,
                 old=old,
                 good=good,
                 great=great,
                 comfortable=comfortable)
 #Checkin=Checkin,Businessservice=Businessservice
 #run the query given evidence
 result = a.condprobve(query, evidence)
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#print asciiData
#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

#print listofDicts[0]

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)
print tcf
text = open("../unifiedMLData2.json")
data=text.read()
printable = set(string.printable)
asciiData=filter(lambda x: x in printable, data)

listofDicts=json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf=TableCPDFactorization(result)

#Rating 1 Given Genre  is Drama
myquery = dict(rating=[1])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result


tcf.refresh()

#Rating 2 Given Genre  is Drama
myquery = dict(rating=[2])
myevidence = dict(genre='Drama')
result=tcf.specificquery(query=myquery,evidence=myevidence)
print result
コード例 #41
0
text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)
asciiData = filter(lambda x: x in printable, data)

#listofDicts=json.dumps(data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

#Rating 1 Given Occupation is student
myquery = dict(rating=[1])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result

tcf.refresh()

#Rating 2 Given Occupation is student
myquery = dict(rating=[2])
myevidence = dict(occupation='student')
result = tcf.specificquery(query=myquery, evidence=myevidence)
print result
コード例 #42
0
ファイル: bayes_net.py プロジェクト: johnhw/mgsharedcontrol
    def __init__(self, nodes):

        self.nodes = {}

        self.children = defaultdict(list)
        self.parents = defaultdict(list)
        self.outputs = {}
        for name, node_spec in nodes.iteritems():
            node_type = node_spec["type"]
            if node_type == "inferred":
                parents = node_spec["parents"]
                # store the relationship between these elements
                for parent in parents:
                    normalised = normalise_name(parent)
                    self.parents[name].append(normalised)
                    self.children[normalised].append(name)
                truth_table = parse_truth_table(node_spec["p"], parents)
                node = make_node(truth_table, parents, node_type)
                self.nodes[name] = node

            if node_type == "fsm_input":
                node = make_node([1.0, 0.0], None, node_type)
                self.nodes[name] = node

            if node_type == "sensor_input":
                proxy_node = make_node([1.0, 0.0], None, "proxy")
                proxy_name = "_proxy_%s" % name
                self.nodes[proxy_name] = proxy_node
                self.children[proxy_name].append(name)
                node = make_node({
                    "['T']": [1.0, 0.0],
                    "['F']": [0.0, 1.0]
                }, [proxy_name], node_type)
                self.nodes[name] = node
            if node_type == "output":
                self.outputs[name] = node_spec

        for node in self.nodes:
            if len(self.children[node]) > 0:
                self.nodes[node]["children"] = self.children[node]
            else:
                self.nodes[node]["children"] = None

        # certainty scaling
        self.event_caution = 0.0

        og = OrderedSkeleton()
        og.V = self.nodes.keys()
        edges = []
        for k, children in self.children.iteritems():
            for child in children:
                edges.append((k, child))

        og.E = edges
        og.toporder()

        nd = NodeData()
        nd.Vdata = self.nodes

        #logging.debug(pprint.pformat(nd.Vdata))

        self.net = DiscreteBayesianNetwork(og, nd)
        self.factor_net = TableCPDFactorization(self.net)
コード例 #43
0
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")

# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict(Letter='weak')

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# sample
result = fn.gibbssample(evidence, 10)

# output - toggle comment to see
#print json.dumps(result, indent=2)

# (5) --------------------------------------------------------------------------
# Compute the probability distribution over a specific node or nodes

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")
コード例 #44
0
    #INITIALIZING BN 2
    # load nodedata and graphskeleton
    nd2 = NodeData()
    skel2 = GraphSkeleton()
    nd2.load(path_bn2)
    skel2.load(path_bn2)
    skel2.toporder()  # toporder graph skeleton

    # FINDING NEXT ACTIVITY ATTRIBUTES THROUGH INFERENCE ON BN 1
    # wkday variable query
    evidence1 = dict(wkdayT0=userinput[0])
    for i, item in enumerate(wkdayValsList):
        # loading bayesian network and factorization - needs to be done at every iteration
        bn1 = DiscreteBayesianNetwork(skel1, nd1)
        fn1 = TableCPDFactorization(bn1)
        # setting the query
        query1 = dict(wkdayT1=[item])
        # querying in accordance to the given evidence and appending it to the list of probability of each value
        wkdayProbList.append(fn1.specificquery(query1, evidence1))
        #print "Iteration: " + str(i) + "-> wkdayTO (Input): " + userinput[0] + "; wkdayT1 (Output): " + item + " - prob: " + str(wkdayProbList[i])
    most_probable_wkdayT1 = wkdayValsList[numpy.argmax(wkdayProbList)]

    # hour variable query
    evidence1 = dict(hourT0=userinput[1])
    for i, item in enumerate(hourValsList):
        # loading bayesian network and factorization - needs to be done at every iteration
        bn1 = DiscreteBayesianNetwork(skel1, nd1)
        fn1 = TableCPDFactorization(bn1)
        # setting the query
        query1 = dict(hourT1=[item])
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.lgbayesiannetwork import LGBayesianNetwork
from libpgm.hybayesiannetwork import HyBayesianNetwork
from libpgm.dyndiscbayesiannetwork import DynDiscBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
from libpgm.sampleaggregator import SampleAggregator
from libpgm.pgmlearner import PGMLearner

lgbn = LGBayesianNetwork(skel, nd)

text = open("../unifiedMLData2.json")
data = text.read()
printable = set(string.printable)

asciiData = filter(lambda x: x in printable, data)
listofDicts = json.loads(asciiData)

skel = GraphSkeleton()
skel.load("../skeleton.json")

learner = PGMLearner()

result = learner.discrete_mle_estimateparams(skel, listofDicts)

tcf = TableCPDFactorization(result)

myquery = dict(rating=[5])
myevidence = dict(occupation='student')
res2 = tcf.gibbssample(evidence=myevidence, n=3)

print json.dumps(res2, indent=2)