def extractOneSent(sent): """ extract one sent, find out the |pid|time|company|department|position| if they exist in this sentence """ global g_retTupleList #clean the cache g_retTupleList = [] #combine wors with the same tag into a word sent = combineSameTag(sent) #combine the sent into string devided by space sentStr = " ".join([ x[1] for x in sent ]) #debug print sentStr ##print sentStr #sentStr = " ".join([ "%s_%s" % (x[1], x[0]) for x in sent ]) #print sentStr ret = P.parse(sentStr.encode('utf-8')) t = nltk.Tree.parse(ret[1]) #t.draw() # traverse the tree and the sent which will modify the # global variable g_retTupleList traverse(t, sent) # combine the g_retTupleList so that we have the real tuples without \ # redundants retTuples = [] #debug for each in g_retTupleList: #print 'each' , each #print 'combined', combineTuple(each) retTuples += combineTuple(each) #print 'ret', retTuples return retTuples
def extractOneSent(sent): """ extract one sent, find out the |pid|time|company|department|position| if they exist in this sentence """ global g_retTupleList #clean the cache g_retTupleList = [] #combine wors with the same tag into a word sent = combineSameTag(sent) #combine the sent into string devided by space sentStr = " ".join([x[1] for x in sent]) #debug print sentStr ##print sentStr #sentStr = " ".join([ "%s_%s" % (x[1], x[0]) for x in sent ]) #print sentStr ret = P.parse(sentStr.encode('utf-8')) t = nltk.Tree.parse(ret[1]) #t.draw() # traverse the tree and the sent which will modify the # global variable g_retTupleList traverse(t, sent) # combine the g_retTupleList so that we have the real tuples without \ # redundants retTuples = [] #debug for each in g_retTupleList: #print 'each' , each #print 'combined', combineTuple(each) retTuples += combineTuple(each) #print 'ret', retTuples return retTuples
# by zhangzhi @2013-11-08 19:16:28 # Copyright 2013 NONE rights reserved. import stanfordParserPipeCn as P import nltk def traverse(t): try: t.node except AttributeError: "one leaf is here" print "Error" print t, else: # Now we know that t.node is defined print '(', t.node, print "=========" for child in t: print "child" traverse(child) print ')', if __name__ == '__main__': #ret = P.parse("张智 于 2010 年 于 哈尔滨 工业 大学 毕业 , 毕业 后 在 百度 工作 至 2013 年 。") ret = P.parse("The dog find the god") #print ret[1] t = nltk.Tree.parse(ret[1]) t.draw() traverse(t)
# Copyright 2013 NONE rights reserved. import stanfordParserPipeCn as P import nltk def traverse(t): try: t.node except AttributeError: "one leaf is here" print "Error" print t, else: # Now we know that t.node is defined print '(', t.node, print "=========" for child in t: print "child" traverse(child) print ')', if __name__ == '__main__': #ret = P.parse("张智 于 2010 年 于 哈尔滨 工业 大学 毕业 , 毕业 后 在 百度 工作 至 2013 年 。") ret = P.parse("The dog find the god") #print ret[1] t = nltk.Tree.parse(ret[1]) t.draw() traverse(t)