import nltk import json import yaml from random import choice from stat_parser import Parser, display_tree parser = Parser() d = json.load(open('tree_ship_words.json')) f = open("ships.yml") ships = yaml.load(f.read()) name = choice(ships) print name.lower() #tree = parser.parse("Anticipation of a new lover's arrival") tree = parser.parse("they had good pretzels") display_tree(tree)
def main(): time1 = time() parser = Parser() inFile = sys.argv[1] outFile = sys.argv[2] f = open(outFile,'w+') for line in open(inFile): if config.print_line: print line global code, errorCode, nodeList, nodeNum, firstVBNN, firstNNVB, found, sqFlag, gqFlag, NNNode, VBNode code = 0 nodeNum = 0 errorCode = [] nodeList = [] firstVBNN = [] firstNNVB = [] found = 0 sqFlag = 0 gpFlag = 0 NNNode = [] VBNode = [] wordNum = len(line.split()) if config.print_word_num: print 'Word Num: ', wordNum if config.ignore_long_sentence and wordNum>config.max_word_num: # print 'Long Sentence' # print '\n'*2 # print -1, line, f.write(str(-1) + ' ' + line) continue try: start = time() indentTree = parser.raw_parse(line) # raw parse for indent tree list end = time() except: # print 'Parsing Error' # print '\n'*2 # print -1, line, f.write(str(-1) + ' ' + line) continue if config.print_parse_time: print 'Raw Parse Time: ', end-start, 's' if config.print_indent_tree: pprint(indentTree) # unit test root = trans(indentTree) # recursively transform list to "tree of node" assignCode(root) assignDesNum(root) if config.dfs_indent: dfsIndent(root,0) # unit test if config.print_node_list: # unit test for node in nodeList: print node.getData(), '\t', print if config.show_nltk_tree: start = time() nlktTree = parser.parse(line) # nlktTree, could be drawn into graph end = time() print 'NLKT Tree Parse Time: ', end-start, 's' display_tree(nlktTree) # unit test """ Now the check begins! """ preCheck() totalCheck(root) """ If there is some NP+VP left """ for i in range(len(nodeList)-1): node1 = nodeList[i] node2 = nodeList[i+1] if node2.getParent().getData() == u'RB' and i+2<len(nodeList): node2 = nodeList[i+2] if node1.getParent().getData() in NNList and node2.getParent().getData() in VBList: if (node1 in NNNode and node2 in VBNode) or (firstNNVB and (node1==firstNNVB[0] and node2==firstNNVB[1])): pass else: NNNode.append(node1) VBNode.append(node2) """ Be Check """ for i in range(len(nodeList)-1): node1 = nodeList[i] node2 = nodeList[i+1] if node2.getParent().getData() == u'RB' and i+2<len(nodeList): node2 = nodeList[i+2] if node1.getData().lower() == 'i': if node2.getData() in BEList and node2.getData() not in ['am', 'was',"'m"]: errorCode.append(node2.getCode()) elif node1.getData().lower() in PRPSecondList: if node2.getData() in BEList and node2.getData() not in ['are', 'were',"'re"]: errorCode.append(node2.getCode()) elif node1.getData().lower() in PRPThirdList: if node1.getData().lower()=='that': if i>0 and nodeList[i-1].getParent().getData() in NNList: continue if node2.getData() in BEList and node2.getData() not in ['is', 'was',"'s"]: errorCode.append(node2.getCode()) else: pass """ del duplicates """ if firstNNVB: n = firstNNVB[0] v = firstNNVB[1] if n in NNNode and v in VBNode: NNNode.remove(n) VBNode.remove(v) """ replace RB with the word after (Maybe VB) """ if firstNNVB: v = firstNNVB[1] if v.getParent().getData()==u'RB': code = v.getCode() new = nodeList[code+1] if new.getParent().getData() in VBList: firstNNVB[1] = new else: pass for v in VBNode: if v.getParent().getData()==u'RB': code = v.getCode() new = nodeList[code+1] if new.getParent().getData() in VBList: VBNode[VBNode.index(v)] = new else: pass """ print dependencies """ if config.print_npvp: print 'FROM ERRORCODE:' for i in errorCode: print nodeList[i].getData(), '\t', print print 'FROM QUESTION:' for node in firstVBNN: print node.getData() for node in firstNNVB: print node.getData() print print 'FROM NPVP & OTHERS:' for i in range(len(NNNode)): print NNNode[i].getParent().getData(), NNNode[i].getData(), '\t', VBNode[i].getParent().getData(), VBNode[i].getData() print """ canonicalize """ if firstVBNN: if not (firstVBNN[0].getParent().getData() in VBList and firstVBNN[1].getParent().getData() in NNList): firstVBNN = [] if firstNNVB: if not (firstNNVB[1].getParent().getData() in VBList and firstNNVB[0].getParent().getData() in NNList): firstNNVB = [] for v in VBNode: i = VBNode.index(v) if not (NNNode[i].getParent().getData() in NNList and VBNode[i].getParent().getData() in VBList): del NNNode[i] del VBNode[i] """ Finally! We add codes! """ if config.print_standard_answer: """ FROM QUESTION """ if sqFlag or gqFlag: if firstVBNN: v = firstVBNN[0] n = firstVBNN[1] if n.getData().lower() in PRPThirdList or n.getParent().getData() in [u'NN', u'NNP']: # single noun if v.getParent().getData() in [u'VB', u'VBP']: errorCode.append(v.getCode()) else: if v.getParent().getData()==u'VBZ': errorCode.append(v.getCode()) if firstNNVB: v = firstNNVB[1] if v.getParent().getData()==u'VBZ': errorCode.append(v.getCode()) else: if firstNNVB: if not (firstNNVB[0] in NNNode and firstNNVB[1] in VBNode): NNNode.append(firstNNVB[0]) VBNode.append(firstNNVB[1]) """ FROM NPVP & OTHERS """ for i in range(len(NNNode)): n = NNNode[i] v = VBNode[i] if n.getData().lower() in PRPThirdList or n.getParent().getData() in [u'NN', u'NNP']: # single noun if v.getParent().getData() in [u'VB', u'VBP']: errorCode.append(v.getCode()) else: if v.getParent().getData()==u'VBZ': errorCode.append(v.getCode()) errorCode = list(set(errorCode)) errorCode.sort() if errorCode: for i in errorCode: # print i+1, f.write(str(i+1) + ' ') else: # print -1, f.write(str(-1) + ' ') # print line, f.write(line) if config.print_vb: printVB(root) # print all verb and MD in tree print if config.print_empty_line: print '\n'*2 time2 = time() if config.print_total_time: print 'Total Execution Time: ', time2-time1, 's' if config.show_nltk_tree: Tkinter._test() # show the nlktTree graph
from stat_parser import Parser, display_tree parser = Parser() # http://www.thrivenotes.com/the-last-question/ tree = parser.parse( "How can the net amount of entropy of the universe be massively decreased?" ) display_tree(tree)
from stat_parser import Parser, display_tree parser = Parser() [tree1, tree2] = parser.parse("John saw Mary with the telescope") display_tree([tree1, tree2])