Beispiel #1
0
def test():
    sprRecs = []
    ret = readDb.readData(tbl, )
    for rec in ret:
        pid = int(rec['pid'])
        data = rec['introductionSeg'].strip()
        dataList = data.split("\t")
        sprData = seperateTag(dataList)
        sprRecs.append(sprData)
    for spr in sprRecs:
        for tag, data in spr:
            print tag, data.encode('UTF8')
    return sprRecs
def test():
    sprRecs = []
    ret = readDb.readData(tbl,)
    for rec in ret:
        pid = int(rec['pid'])
        data = rec['introductionSeg'].strip()
        dataList = data.split("\t")
        sprData = seperateTag(dataList)
        sprRecs.append(sprData) 
    for spr in sprRecs:
        for tag, data in spr:
            print tag, data.encode('UTF8')
    return sprRecs
    for pid, sents in sprRecs.items():
        ret = extractOnePerson(sents)
        #print ret
        #output
        for oneTuples in ret:
            oneTuples.insert(0, str(pid))
            #print oneTuples
            #for i in range(len(oneTuples)):
            #    if oneTuples[i] == None:
            #        oneTuples[i] = 'None'
            #    else:
            #        try:
            #            oneTuples[i] = oneTuples[i].encode('utf-8')
            #        except Exception, e:
            #            print "error, %s [%s]" % (e, oneTuples[i])
            #print "|||".join(oneTuples)
    #done


if __name__ == '__main__':
    #init DB
    if R.initDb() != True:
        print "exit"
        sys.exit(-1)
    #if we have toooo many recs(more than 100K),
    #we may optimise here.
    dbData = R.readData('intro_for_event_extraction', 2)

    extractData(dbData)
    R.quitDb()
    sprRecs = sprData(recs)
    for pid, sents in sprRecs.items():
        ret = extractOnePerson(sents)
        #print ret
        #output 
        for oneTuples in ret:
            oneTuples.insert(0, str(pid))
            #print oneTuples
            #for i in range(len(oneTuples)):
            #    if oneTuples[i] == None:
            #        oneTuples[i] = 'None'
            #    else:
            #        try:
            #            oneTuples[i] = oneTuples[i].encode('utf-8')
            #        except Exception, e:
            #            print "error, %s [%s]" % (e, oneTuples[i])
            #print "|||".join(oneTuples)
    #done

if __name__ == '__main__':
    #init DB
    if R.initDb() != True:
        print "exit"
        sys.exit(-1)    
    #if we have toooo many recs(more than 100K),
    #we may optimise here.
    dbData = R.readData('intro_for_event_extraction', 2)    

    extractData(dbData)
    R.quitDb()