Beispiel #1
0
def inferCustomerClasses(param_file, evidence_dir, year):
    """
    This function uses the variable elimination algorithm from libpgm to infer the customer class of each AnswerID, given the evidence presented in the socio-demographic survey responses. 
    
    It returns a tuple of the dataframe with the probability distribution over all classes for each AnswerID and the BN object.
    """
    bn = loadbn(param_file)
    evidence, a_id = readEvidence(year, evidence_dir)
    query = {"customer_class": ''}

    cols = bn.Vdata.get('customer_class')['vals']
    result = pd.DataFrame(
        columns=cols
    )  #create empty dataframe in which to store inferred probabilities

    count = 0  #set counter
    for e in evidence:
        bn = loadbn(param_file)
        fn = TableCPDFactorization(bn)
        try:
            inf = fn.condprobve(query, e)
            classprobs = list(inf.vals)
            result.loc[count] = classprobs
            count += 1
        except:
            result.loc[count] = [None] * len(cols)
            count += 1

    result['AnswerID'] = a_id
    result.set_index(keys='AnswerID', inplace=True)

    return result
Beispiel #2
0
def estimate_distrib(skel, samples, query, evidence):
    learner = PGMLearner()
    bayesnet = learner.discrete_mle_estimateparams(skel, samples)
    tablecpd = TableCPDFactorization(bayesnet)
    fac = tablecpd.condprobve(query, evidence)
    df2 = printdist(fac, bayesnet)
    return df2
Beispiel #3
0
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
    def compute_vertex_marginal(self, v, evidence):
        """
        :return: a dictionary with: state name -> marginal values
            ex. {"state1": 0.5, "state2": 0.5}
        """
        query = {v: ''}
        vertex_marginals = {}
        states = self.get_states(v)

        if v in evidence:
            vals = []
            s_evidence = evidence[v]
            for s in states:
                if s == s_evidence:
                    vertex_marginals[s] = 1.0
                else:
                    vertex_marginals[s] = 0.0
        # if query node.
        else:
            #marginal values
            fn = TableCPDFactorization(self.clone())
            mar_vals = fn.condprobve(query, evidence)

            # Associate marginals with values
            for i in range(len(states)):
                vertex_marginals[states[i]] = mar_vals.vals[i]
        return vertex_marginals
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"],
              ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0/3, 1.0/3, 1.0/3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A","B","C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
def q_without_ros():
    skel = GraphSkeleton()
    skel.V = ["prize_door", "guest_door", "monty_door"]
    skel.E = [["prize_door", "monty_door"], ["guest_door", "monty_door"]]
    skel.toporder()
    nd = NodeData()
    nd.Vdata = {
        "prize_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "guest_door": {
            "numoutcomes": 3,
            "parents": None,
            "children": ["monty_door"],
            "vals": ["A", "B", "C"],
            "cprob": [1.0 / 3, 1.0 / 3, 1.0 / 3],
        },
        "monty_door": {
            "numoutcomes": 3,
            "parents": ["prize_door", "guest_door"],
            "children": None,
            "vals": ["A", "B", "C"],
            "cprob": {
                "['A', 'A']": [0., 0.5, 0.5],
                "['B', 'B']": [0.5, 0., 0.5],
                "['C', 'C']": [0.5, 0.5, 0.],
                "['A', 'B']": [0., 0., 1.],
                "['A', 'C']": [0., 1., 0.],
                "['B', 'A']": [0., 0., 1.],
                "['B', 'C']": [1., 0., 0.],
                "['C', 'A']": [0., 1., 0.],
                "['C', 'B']": [1., 0., 0.],
            },
        },
    }
    bn = DiscreteBayesianNetwork(skel, nd)
    fn = TableCPDFactorization(bn)

    query = {
        "prize_door": ["A", "B", "C"],
    }
    evidence = {
        "guest_door": "A",
        "monty_door": "B",
    }

    res = fn.condprobve(query, evidence)
    print res.vals
    print res.scope
    print res.card
    print res.stride
Beispiel #7
0
def classify(evidence, bn):
    #q1=dict(Speed=evidence['Speed'])
    q2 = dict(Volume=evidence['Volume'])
    # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)  #t
    result = fn.condprobve(q2, evidence)  #t
    mx = max(result.vals)
    indx = result.vals.index(mx)
    vl = bn.Vdata['Volume']['vals'][indx]
    return [0, vl]
def classify(evidence,bn):
    #q1=dict(Speed=evidence['Speed'])
    q2=dict(Volume=evidence['Volume'])
   # del evidence['Speed']
    del evidence['Volume']

    #fn = TableCPDFactorization(bn)#toolbx
    #result=fn.condprobve(q1,evidence)#t
    #mx=max(result.vals)
    #indx=result.vals.index(mx)
    #sp= bn.Vdata['Speed']['vals'][indx]

    fn = TableCPDFactorization(bn)#t
    result=fn.condprobve(q2,evidence)#t
    mx=max(result.vals)
    indx=result.vals.index(mx)
    vl=bn.Vdata['Volume']['vals'][indx]
    return [0,vl]
Beispiel #9
0
    def discrete_query_cb(self, req):
        nd = U.discrete_nodedata_from_ros(req.nodes)
        skel = U.graph_skeleton_from_node_data(nd)
        skel.toporder()
        bn = DiscreteBayesianNetwork(skel, nd)
        fn = TableCPDFactorization(bn)
        q = {n: nd.Vdata[n]["vals"] for n in req.query}
        ev = {ns.node: ns.state for ns in req.evidence}

        rospy.loginfo("resolving query %s with evidence %s" % (q, ev))
        ans = fn.condprobve(query=q, evidence=ev)
        rospy.loginfo("%s -> %s" % (ans.scope, ans.vals))
        res = DiscreteQueryResponse()
        node = DiscreteNode()
        node.name = ans.scope[0]
        node.outcomes = q[node.name]
        node.CPT.append(ConditionalProbability(node.outcomes, ans.vals))
        res.nodes.append(node)
        return res
Beispiel #10
0
        a = TableCPDFactorization(res)
        #compute the query and evidences as dicts
        query = dict(Overall=Overall)
        evidence = dict(Service=Service,
                        Location=Location,
                        Cleanliness=Cleanliness,
                        Value=Value,
                        bad=bad,
                        Rooms=Rooms,
                        old=old,
                        good=good,
                        great=great,
                        comfortable=comfortable)
        #Checkin=Checkin,Businessservice=Businessservice
        #run the query given evidence
        result = a.condprobve(query, evidence)
        #result2 = a.specificquery(query, evidence)
        #print(result2)
        #print json.dumps(result.vals, indent=2)
        #choose the max probability ditribution as model prediction
        maxvalue = max(result.vals)

        pos = GetRealValueLast(result.vals.index(maxvalue))

        #append it to our prediction list
        pred.append(pos + 1)
        print(count)
        count = count + 1
    #print performances on the performances.csv file
    with open("performances3.csv", "a") as f:
        f.write("ACCURACY of the " + str(score) + "th score: " +
Beispiel #11
0
class TestTableCPDFactorization(unittest.TestCase):

    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)

    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))

    def test_refresh(self):
        evidence = dict(Letter='weak')
        query = dict(Intelligence=['high'])
        result1 = self.fn.specificquery(query, evidence)
        self.fn.refresh()
        result2 = self.fn.specificquery(query, evidence)
        self.assertEqual(result1, result2)

    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1
                index = x

        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)

    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)

    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs:
            self.assertTrue(entry["Letter"] == 'weak')
Beispiel #12
0
class TestTableCPDFactorization(unittest.TestCase):
    
    def setUp(self):
        skel = GraphSkeleton()
        skel.load("unittestdict.txt")
        skel.toporder()
        nodedata = NodeData()
        nodedata.load("unittestdict.txt")
        self.bn = DiscreteBayesianNetwork(skel, nodedata)
        self.fn = TableCPDFactorization(self.bn)
    
    def test_constructor(self):
        self.assertTrue(len(self.fn.originalfactorlist) == 5)
        for x in range(5):
            self.assertTrue(isinstance(self.fn.originalfactorlist[x], TableCPDFactor))
    
    def test_refresh(self):
        self.fn.refresh()
        for x in range(5):
            self.assertTrue(isinstance(self.fn.factorlist[x], TableCPDFactor))
                            
    def test_sumproducteliminatevar(self):
        self.fn.refresh()
        self.fn.sumproducteliminatevar("Difficulty")
        yes = 0
        for x in range(len(self.fn.factorlist)):
            if (self.fn.factorlist[x].scope == ['Grade', 'Intelligence']):
                yes += 1 
                index = x
                
        self.assertTrue(yes == 1)
        exp = [0.2, 0.33999999999999997, 0.45999999999999996, 0.74, 0.16799999999999998, 0.09200000000000001]
        for x in range(6):
            self.assertTrue(abs(self.fn.factorlist[index].vals[x] - exp[x]) < .01)

    def test_sumproductve(self):
        input = ["Difficulty", "Grade", "Intelligence", "SAT"]
        self.fn.refresh()
        self.fn.sumproductve(input)
        exp = [.498, .502]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
    
    def test_condprobve(self):
        evidence = dict(Grade='C', SAT='highscore')
        query = dict(Intelligence='high')
        self.fn.refresh()
        self.fn.condprobve(query, evidence)
        exp = [.422, .578]
        for x in range(2):
            self.assertTrue(abs(self.fn.factorlist.vals[x] - exp[x]) < .01)
        
    def test_specificquery(self):
        evidence = dict(Difficulty='easy')
        query = dict(Grade=['A', 'B'])
        self.fn.refresh()
        answer = self.fn.specificquery(query, evidence)
        self.assertTrue(abs(answer - .784) < .01)
    
    def test_gibbssample(self):
        evidence = dict(Letter='weak')
        gs = self.fn.gibbssample(evidence, 5)
        self.assertTrue(gs[0]["Difficulty"] == 'easy' or gs[0]["Difficulty"] == 'hard')
        self.assertTrue(len(gs) == 5)
        for entry in gs: 
            self.assertTrue(entry["Letter"] == 'weak')
Beispiel #13
0
# toporder graph skeleton
skel.toporder()

# load evidence
evidence = dict()
query = {"Grade":['A']}

# load bayesian network
bn = DiscreteBayesianNetwork(skel, nd)

# load factorization
fn = TableCPDFactorization(bn)

# calculate probability distribution
result = fn.condprobve(query, evidence)

# output - toggle comment to see
#print json.dumps(result.vals, indent=2)
#print json.dumps(result.scope, indent=2)
#print json.dumps(result.card, indent=2)
#print json.dumps(result.stride, indent=2)

# (6) ---------------------------------------------------------------------------
# Compute the exact probability of an outcome

# load nodedata and graphskeleton
nd = NodeData()
skel = GraphSkeleton()
nd.load("../tests/unittestdict.txt")
skel.load("../tests/unittestdict.txt")
    'entertainment'
    'executive'
    'healthcare'
    'homemaker'
    'lawyer'
    'librarian'
    'marketing'
    'none'
    'other'
    'programmer'
    'retired'
    'salesman'
    'scientist'
    'student'
    'technician'
    'writer'
]

myevidence = dict(gender='F')
res2 = []
for occu in occupations:
    myquery = dict(occupation=[occu])
    res2 = tcf.condprobve(query=myquery, evidence=myevidence)
    #res2=tcf.specificquery(query=myquery,evidence=myevidence)
    #print res2
    print json.dumps(res2.vals, indent=2)
mle = res2[0]
for i in range(1, len(res2) - 1):
    mle = max(res2[i - 1], res2[i])
print "mle of occupation given gender is Female"
print mle
Beispiel #15
0
def fun(inputData):

    #Defining formatting data method
    def format_data(df):
        result = []
        for row in df.itertuples():
            #print(row.Pclass)
            result.append(
                dict(great=row.great,
                     good=row.good,
                     clean=row.clean,
                     comfortable=row.comfortable,
                     bad=row.bad,
                     old=row.old,
                     Cleanliness=row.Cleanliness,
                     Location=row.Location,
                     Service=row.Service,
                     Rooms=row.Rooms,
                     Value=row.Value,
                     Overall=row.Overall))
        return result

    #load all preprocessed training data
    df = pd.read_csv('features.csv', sep=',')

    #format data to let them correctly processed by libpgm functions
    node_data = format_data(df)

    skel = GraphSkeleton()
    #load structure of our net
    skel.load("./our-skel.txt")
    #setting the topologic order
    skel.toporder()
    #learner which will estimate parameters e if needed net structure
    learner = PGMLearner()

    #estismting parameters for our own model
    res = learner.discrete_mle_estimateparams(skel, node_data)

    # get CPT
    a = TableCPDFactorization(res)
    #compute the query and evidences as dicts
    query = dict(Overall=1)
    # prepare dictionary of values (dopo gli uguali devi mettere i valori che leggi dalla GUI)

    evidence = dict(Value=inputData[0],
                    Location=inputData[1],
                    Cleanliness=inputData[2],
                    Service=inputData[3],
                    Rooms=inputData[4],
                    bad=inputData[5],
                    old=inputData[6],
                    good=inputData[7],
                    great=inputData[8],
                    comfortable=inputData[9],
                    clean=inputData[10])

    print(query)
    print(evidence)

    #run the query given evidence
    result = a.condprobve(query, evidence)

    print json.dumps(result.vals, indent=2)
    #res.Vdata["Overall"]["vals"][pos]
    #arr=[]
    dizionario = {}
    for i in range(1, 6):
        dizionario[res.Vdata["Overall"]["vals"][i - 1]] = result.vals[i - 1]
    #    arr.append(dizionario)
    #print(str(arr))
    return dizionario