Example #1
0
def calculateOverlay():
    model, table = projizz.readPrefixTreeModelWithTable("../yago/yagoPatternTree.model", "../patty/yagoPatternTreeWithConfidence.table")
    
    overlay = {}
    for relation in projizz.getYagoRelation():
        overlay[relation] = {}
        for rela in projizz.getYagoRelation():
            overlay[relation][rela] = 0

    # Build table
    for pid in table:
        if table[pid]["used"]:
            if "eval" in table[pid] and not table[pid]["eval"]:
                continue
            for relation in table[pid]["relations"]:
                for rela in table[pid]["relations"]:
                    overlay[relation][rela] += 1
        else:
            pass

    for relaA in projizz.getYagoRelation():
        j = []
        for relaB in projizz.getYagoRelation():
            overC = overlay[relaA][relaB]
            j.append((relaB,overC))
        j.sort(key=lambda x:x[1],reverse=True)
        a = overlay[relaA][relaA]
        for relaB,overC in j:
            b = overlay[relaB][relaB]
            print "%s(%d) -> %s(%d) %d/%d %.5f (%d/%d,%.5f)" % (relaA,a,relaB,b,overC,a,float(overC)/a,overC,b,float(overC)/b)
Example #2
0
def calculateOverlay():
    model, table = projizz.readPrefixTreeModelWithTable(
        "../yago/yagoPatternTree.model",
        "../patty/yagoPatternTreeWithConfidence.table")

    overlay = {}
    for relation in projizz.getYagoRelation():
        overlay[relation] = {}
        for rela in projizz.getYagoRelation():
            overlay[relation][rela] = 0

    # Build table
    for pid in table:
        if table[pid]["used"]:
            if "eval" in table[pid] and not table[pid]["eval"]:
                continue
            for relation in table[pid]["relations"]:
                for rela in table[pid]["relations"]:
                    overlay[relation][rela] += 1
        else:
            pass

    for relaA in projizz.getYagoRelation():
        j = []
        for relaB in projizz.getYagoRelation():
            overC = overlay[relaA][relaB]
            j.append((relaB, overC))
        j.sort(key=lambda x: x[1], reverse=True)
        a = overlay[relaA][relaA]
        for relaB, overC in j:
            b = overlay[relaB][relaB]
            print "%s(%d) -> %s(%d) %d/%d %.5f (%d/%d,%.5f)" % (
                relaA, a, relaB, b, overC, a, float(overC) / a, overC, b,
                float(overC) / b)
Example #3
0
def calculateOverlay():
    model, table = projizz.readPrefixTreeModelWithTable(
        "../yago/yagoPatternTree.model",
        "../patty/yagoPatternTreeWithConfidence.table")

    overlay = {}
    for relation in projizz.getYagoRelation():
        overlay[relation] = {}
        for rela in projizz.getYagoRelation():
            overlay[relation][rela] = 0

    # Build table
    for pid in table:
        if table[pid]["used"]:
            if "eval" in table[pid] and not table[pid]["eval"]:
                continue
            for relation in table[pid]["relations"]:
                for rela in table[pid]["relations"]:
                    overlay[relation][rela] += 1
        else:
            pass

    yagoRela = projizz.getYagoRelation()
    yagoRela.sort()
    yagoRela.remove("produced")

    #print yagoRela

    print "      ",
    for i in range(13, 25):
        print "& (%d)" % (i),
    print "\\\\"

    for relaA in yagoRela:
        j = []
        for relaB in yagoRela:
            overC = overlay[relaA][relaB]
            j.append((relaB, overC))
        #j.sort(key=lambda x:x[1],reverse=True)
        a = overlay[relaA][relaA]
        _id = yagoRela.index(relaA) + 1
        print "(%d) %s" % (_id, relaA),
        for relaB, overC in j:
            b = overlay[relaB][relaB]
            _tid = yagoRela.index(relaB) + 1
            if _tid < 13:
                continue
            #print "%s(%d) -> %s(%d) %d/%d %.5f (%d/%d,%.5f)" % (relaA,_id,relaB,b,overC,a,float(overC)/a,overC,b,float(overC)/b)
            print " & %2.2f" % (float(overC) / a),
        print "\\\\"
Example #4
0
def calculateOverlay():
    model, table = projizz.readPrefixTreeModelWithTable("../yago/yagoPatternTree.model", "../patty/yagoPatternTreeWithConfidence.table")
    
    overlay = {}
    for relation in projizz.getYagoRelation():
        overlay[relation] = {}
        for rela in projizz.getYagoRelation():
            overlay[relation][rela] = 0

    # Build table
    for pid in table:
        if table[pid]["used"]:
            if "eval" in table[pid] and not table[pid]["eval"]:
                continue
            for relation in table[pid]["relations"]:
                for rela in table[pid]["relations"]:
                    overlay[relation][rela] += 1
        else:
            pass

    yagoRela = projizz.getYagoRelation()
    yagoRela.sort()
    yagoRela.remove("produced")

    #print yagoRela

    print "      ",
    for i in range(13,25):
        print "& (%d)" % (i),
    print "\\\\"

    for relaA in yagoRela:
        j = []
        for relaB in yagoRela:
            overC = overlay[relaA][relaB]
            j.append((relaB,overC))
        #j.sort(key=lambda x:x[1],reverse=True)
        a = overlay[relaA][relaA]
        _id = yagoRela.index(relaA) + 1
        print "(%d) %s" % (_id,relaA),
        for relaB,overC in j:
            b = overlay[relaB][relaB]
            _tid = yagoRela.index(relaB) + 1
            if _tid < 13:
                continue
            #print "%s(%d) -> %s(%d) %d/%d %.5f (%d/%d,%.5f)" % (relaA,_id,relaB,b,overC,a,float(overC)/a,overC,b,float(overC)/b)
            print " & %2.2f" % (float(overC)/a),
        print "\\\\"
Example #5
0
def buildModels(inputpath, outputPath):

    projizz.checkPath(outputPath)

    cpuCount = multiprocessing.cpu_count()
    if cpuCount > 8:
        cpuCount = 8

    pool = multiprocessing.Pool(processes=cpuCount)
    t = 0

    relations = projizz.getYagoRelation()
    for relation in relations:
        if relation == "produced":
            continue
        pool.apply_async(trainModel, (t, relation, inputpath, outputPath))
        t += 1
    pool.close()
    pool.join()

    print "Done training all classifiers"
Example #6
0
def buildModels(inputpath,outputPath):

    projizz.checkPath(outputPath)

    cpuCount = multiprocessing.cpu_count()
    if cpuCount > 8:
        cpuCount = 8

    pool = multiprocessing.Pool(processes=cpuCount) 
    t = 0

    relations = projizz.getYagoRelation()
    for relation in relations:
        if relation == "produced":
            continue
        pool.apply_async(trainModel, (t,relation,inputpath,outputPath))
        t += 1
    pool.close()
    pool.join()

    print "Done training all classifiers"
def parseYagoData():

    phase = "used"

    if phase == "build":
        model, table = projizz.readPrefixTreeModelWithTable(
            "../yago/yagoPatternTree.model", "../yago/yagoPatternTree.table")
    else:
        model, table = projizz.readPrefixTreeModelWithTable(
            "../yago/yagoPatternTree.model",
            "./yagoPatternTreeWithConfidence.table")

    # function testing.
    #test = "has appeared like [[num]]"
    ##test = "has appeared like [[num]"
    #i = projizz.naiveMatchPattern(test,model)
    #print i

    a = table.keys()
    originL = len(a)

    ptnByRelation = {}

    for relation in projizz.getYagoRelation():
        if not phase == "build":
            break

        f = open("./yagoRela/%s.txt" % (relation))

        print relation

        text = f.readline()
        ptnSynsetTxt = text.split("\",\" ")[1:]
        ptnSynsetTxt = ptnSynsetTxt[:-1] + [ptnSynsetTxt[-1][:-7]]

        ptnByRelation[relation] = []

        evC = 0
        for text in ptnSynsetTxt:
            ptns = text.split("#")
            # ptns[1] : pattern synset id in patty
            # ptns[3] : pattern domain
            # ptns[4] : pattern plain text
            # ptns[5] : pattern range
            # pnts[6] : confidence
            # ptns[7] : support co-occurrence
            # ptns[8] : some has, I guess it is eval result.
            if len(ptns) > 8:
                evC += 1

            patterns = ptns[4].split(";%")
            patterns = patterns[:-1] + [patterns[-1][:-1]]

            for pattern in patterns:
                pid = projizz.naiveMatchPattern(pattern, model)
                if pid < 0:
                    pass
                    #print relation,pattern
                else:
                    pid = str(pid)
                    if pid in a:
                        a.remove(pid)
                    if not pid in ptnByRelation[relation]:
                        ptnByRelation[relation].append(pid)

                    if not relation in table[pid]["relations"]:
                        table[pid]["relations"].append(relation)
                        #print relation,pid,pattern

                    ptnS = table[pid]
                    if not "confidence" in ptnS:
                        table[pid]["confidence"] = float(ptns[6])
                        table[pid]["support"] = int(ptns[7])
                        table[pid]["used"] = True

                        if len(ptns) > 8:
                            if ptns[8] == "false":
                                table[pid]["eval"] = False
                                #print pid,table[pid]["relations"],pattern,ptns[8]
                            else:
                                table[pid]["eval"] = True

        f.close()

    if phase == "build":

        for pid in a:
            table[pid]["used"] = False

        for pid in table:
            if table[pid]["used"]:
                needRemove = []
                for relation in table[pid]["relations"]:
                    if not pid in ptnByRelation[relation]:
                        print pid, table[pid]["pattern"], relation
                        needRemove.append(relation)
                for p in needRemove:
                    table[pid]["relations"].remove(p)
                if len(table[pid]["relations"]) == 0:
                    print pid, table[pid]["pattern"], "!!!"
            else:
                pass

        projizz.jsonWrite(table, "./yagoPatternTreeWithConfidence.table")

    else:
        c = 0
        used = 0
        for pid in table:
            if table[pid]["used"]:
                # 如果有true或false在,就只留True的Pattern
                if "eval" in table[pid]:
                    if not table[pid]["eval"]:
                        continue
                used += 1
                for relation in table[pid]["relations"]:
                    if not relation in ptnByRelation:
                        ptnByRelation[relation] = []
                    if not pid in ptnByRelation[relation]:
                        ptnByRelation[relation].append(pid)
            else:
                c += 1

    # 一些小計算
    #for relation in ptnByRelation:
    #    print relation,len(ptnByRelation[relation])

    # 找最高(意思就是不能再更高了)信心值
    # 每組Relation的最高之中最小的那一個

    minC = 1.0
    minCR = ""
    for relation in ptnByRelation:
        c75 = 0
        c50 = 0
        ptns = []
        for pid in ptnByRelation[relation]:
            ptns.append(table[pid])
            ptns[-1]["pid"] = pid
        ptns.sort(key=lambda x: x["confidence"], reverse=True)
        if ptns[0]["confidence"] < minC:
            minC = ptns[0]["confidence"]
            minCR = relation

        #print relation,ptns[0]
        f = open("./yagoSortedRela/%s.txt" % (relation), "w")
        for ptn in ptns:
            if ptn["confidence"] > .75:
                c75 += 1
            if ptn["confidence"] > .5:
                c50 += 1
            f.write("%s\t%s\t%.3f\t%d\t%s\n" %
                    (ptn["pid"], ptn["pattern"], ptn["confidence"],
                     ptn["support"], ptn["relations"]))
        f.close()

        print relation, len(ptns), c75, c50

    print minCR, minC, "pattern used:", used
def parseYagoData():
    
    phase = "used"
    
    if phase == "build":
        model, table = projizz.readPrefixTreeModelWithTable("../yago/yagoPatternTree.model", "../yago/yagoPatternTree.table")
    else:
        model, table = projizz.readPrefixTreeModelWithTable("../yago/yagoPatternTree.model", "./yagoPatternTreeWithConfidence.table")
    
    # function testing.
    #test = "has appeared like [[num]]"
    ##test = "has appeared like [[num]"
    #i = projizz.naiveMatchPattern(test,model) 
    #print i


    a = table.keys()
    originL = len(a)

    ptnByRelation = {}

    

    for relation in projizz.getYagoRelation():
        if not phase == "build":
            break
        
        f = open("./yagoRela/%s.txt" % (relation))
        
        print relation

        text = f.readline()
        ptnSynsetTxt = text.split("\",\" ")[1:]
        ptnSynsetTxt = ptnSynsetTxt[:-1] + [ ptnSynsetTxt[-1][:-7] ]

        ptnByRelation[relation] = []
        

        evC = 0
        for text in ptnSynsetTxt:
            ptns = text.split("#")
            # ptns[1] : pattern synset id in patty
            # ptns[3] : pattern domain
            # ptns[4] : pattern plain text
            # ptns[5] : pattern range
            # pnts[6] : confidence
            # ptns[7] : support co-occurrence
            # ptns[8] : some has, I guess it is eval result.
            if len(ptns) > 8:
                evC += 1

            patterns = ptns[4].split(";%")
            patterns = patterns[:-1] + [patterns[-1][:-1]]

            for pattern in patterns:
                pid = projizz.naiveMatchPattern(pattern,model)
                if pid < 0:
                    pass
                    #print relation,pattern
                else:
                    pid = str(pid)
                    if pid in a:
                        a.remove(pid)
                    if not pid in ptnByRelation[relation]:
                        ptnByRelation[relation].append(pid)

                    if not relation in table[pid]["relations"]:
                        table[pid]["relations"].append(relation)
                        #print relation,pid,pattern

                    ptnS = table[pid]
                    if not "confidence" in ptnS:
                        table[pid]["confidence"] = float(ptns[6])
                        table[pid]["support"] = int(ptns[7])
                        table[pid]["used"] = True
            
                        if len(ptns) > 8:
                            if ptns[8] == "false":
                                table[pid]["eval"] = False
                                #print pid,table[pid]["relations"],pattern,ptns[8]
                            else:
                                table[pid]["eval"] = True

        f.close()

    if phase == "build":

        for pid in a:
            table[pid]["used"] = False
    
        for pid in table:
            if table[pid]["used"]:
                needRemove = []
                for relation in table[pid]["relations"]:
                    if not pid in ptnByRelation[relation]:
                        print pid,table[pid]["pattern"],relation
                        needRemove.append(relation)
                for p in needRemove:
                    table[pid]["relations"].remove(p)
                if len(table[pid]["relations"]) == 0:
                    print pid,table[pid]["pattern"],"!!!"
            else:
                pass

        projizz.jsonWrite(table,"./yagoPatternTreeWithConfidence.table")

    else:
        c = 0
        used = 0
        for pid in table:
            if table[pid]["used"]:
                # 如果有true或false在,就只留True的Pattern
                if "eval" in table[pid]:
                    if not table[pid]["eval"]:
                        continue
                used += 1
                for relation in table[pid]["relations"]:
                    if not relation in ptnByRelation:
                        ptnByRelation[relation] = []
                    if not pid in ptnByRelation[relation]:
                        ptnByRelation[relation].append(pid)
            else:
                c += 1

    # 一些小計算
    #for relation in ptnByRelation:
    #    print relation,len(ptnByRelation[relation])
    
    # 找最高(意思就是不能再更高了)信心值
    # 每組Relation的最高之中最小的那一個

    minC = 1.0
    minCR = ""
    for relation in ptnByRelation:
        c75 = 0
        c50 = 0
        ptns = []
        for pid in ptnByRelation[relation]:
            ptns.append(table[pid])
            ptns[-1]["pid"] = pid
        ptns.sort(key=lambda x:x["confidence"],reverse=True)
        if ptns[0]["confidence"] < minC:
            minC = ptns[0]["confidence"]
            minCR = relation
        
        #print relation,ptns[0]
        f = open("./yagoSortedRela/%s.txt" % (relation),"w")
        for ptn in ptns:
            if ptn["confidence"] > .75:
                c75 += 1
            if ptn["confidence"] > .5:
                c50 += 1
            f.write("%s\t%s\t%.3f\t%d\t%s\n" % (ptn["pid"],ptn["pattern"],ptn["confidence"],ptn["support"],ptn["relations"]))
        f.close()

        print relation,len(ptns),c75,c50

    print minCR,minC,"pattern used:",used
Example #9
0
#request = urllib2.Request(url,data,headers)

#response = urlopener.open(request)
#result = response.read()

#print result

#f = open("./patty.dbpedia.Relations.json")
#for line in f:
#    if "," in line:
#        relaName = line[2:-3]
#        print relaName
#        request = urllib2.Request(url,getData(relaName),headers)
#        g = open("./relationships/%s.txt" % (relaName), "w")
#        response = urlopener.open(request)
#        result = response.read()
#        g.write(result)
#        g.close()
#f.close()

for relaName in projizz.getYagoRelation():
    print relaName
    request = urllib2.Request(url,getYAGOData(relaName),headers)
    g = open("./yagoRela/%s.txt" % (relaName), "w")
    response = urlopener.open(request)
    result = response.read()
    g.write(result)
    g.close()