def treebank_bracket_parse(t): try: return tree.bracket_parse(t) except IndexError: # in case it's the real treebank format, # strip first and last brackets before parsing return tree.bracket_parse(t.strip()[1:-1])
def treebank_bracket_parse(t): try: return Tree.fromstring(t, remove_empty_top_bracketing=True) except IndexError: # in case it's the real treebank format, # strip first and last brackets before parsing return tree.bracket_parse(t.strip()[1:-1])
def _parse(self, t): try: return bracket_parse(self._normalize(t)) except ValueError, e: sys.stderr.write("Bad tree detected; trying to recover...\n") # Try to recover, if we can: if e.args == ('mismatched parens',): for n in range(1, 5): try: v = bracket_parse(self._normalize(t+')'*n)) sys.stderr.write(" Recovered by adding %d close " "paren(s)\n" % n) return v except ValueError: pass # Try something else: sys.stderr.write(" Recovered by returning a flat parse.\n") #sys.stderr.write(' '.join(t.split())+'\n') return Tree('S', self._tag(t))
def string_to_bracketing(s): """Converts a string to a bracketing. >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))') """ s2 = s.replace('(', '(X ') s2 = '((X ' + s2 + '))' t = treebank.Tree(tree.bracket_parse(s2)) b = tree_to_bracketing(t) return b
def _parse(self, t): try: return bracket_parse(self._normalize(t)) except ValueError, e: sys.stderr.write("Bad tree detected; trying to recover...\n") # Try to recover, if we can: if e.args == ('mismatched parens', ): for n in range(1, 5): try: v = bracket_parse(self._normalize(t + ')' * n)) sys.stderr.write(" Recovered by adding %d close " "paren(s)\n" % n) return v except ValueError: pass # Try something else: sys.stderr.write(" Recovered by returning a flat parse.\n") #sys.stderr.write(' '.join(t.split())+'\n') return Tree('S', self._tag(t))
def string_to_bracketing(s): """Converts a string to a bracketing. >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))') """ s2 = s.replace('(', '(X ') s2 = '((X '+s2+'))' t = treebank.Tree(tree.bracket_parse(s2)) b = tree_to_bracketing(t) return b
def TreeView(TreeList): # List of trees # TreeList = [tree,tree2,tree3,tree4] # Parse bracket t = [] for tree in TreeList: t.append(bracket_parse(tree)) print t # Add widget tc = [] for i in range(0, len(t)): tc.append(TreeWidget(cf.canvas(), t[i])) # print tc paren = [] # Gen paren of trees for i in range(1, len(t)): paren.append(ParenWidget(cf.canvas(), tc[i])) # print paren # Locate position ## Line 1 # cf.add_widget(tc[0],10,10) # cf.add_widget(paren[0],tc[0].bbox()[2],10) # ## Line 2 # cf.add_widget(paren[1],10,tc[0].bbox()[3]+10) # cf.add_widget(paren[2],tc[2].bbox()[2]+10,tc[0].bbox()[3]+10) # ## Drawing # cf.mainloop() cf.add_widget(tc[0], 10, 10) count = 1 for i in range(1, len(t)): # Line 1 cf.add_widget(paren[i - 1], tc[i - 1].bbox()[2] + 10, 10) # ps_file = "canvas_" + str(i) # cf.print_to_file(ps_file) count = count + 1 # if count>=3: # print i # # Write to line 2 # cf.add_widget(paren[1],10,tc[0].bbox()[3]+10) # # #cf.add_widget(paren[i-1],tc[i-1].bbox()[2]+10,tc[0].bbox()[3]+10) # #cf.add_widget(paren[i-1],10,tc[0].bbox()[3]+10) # #cf.add_widget(paren[i-1],tc[i-1].bbox()[2]+10,tc[0].bbox()[3]+10) # count = count + 1 # break cf.mainloop()
def parse(self, sentence): """Parses sentence string and returns a sentenceparse instance. Quotes " seem to give json errors (a ValueError) and paranthesis () sometimes cause the lisp-server parser to break, so filter out sentences with these characters. """ self.socket.sendall(sentence) jsondata = simplejson.loads(' '.join(self.socket.recv(8192).split())) tree = nltr.bracket_parse(jsondata['parse']) scopes = jsondata['scopes'] scopes = fix_scopes(tree, scopes) print tree return [sentence, tree, scopes]
def parsed(self, files=None): """ Prepared for Penn format. May be overriden. @param files: One or more treebank files to be processed @type files: L{string} or L{tuple(string)} @rtype: iterator over L{tree} """ if files is None: files = sorted(os.listdir(self.basedir)) # Just one file to process? If so convert to a tuple so we can iterate if isinstance(files, str): files = (files,) for file in files: print "Parsing file "+file path = os.path.join(self.basedir, file) s = open(path).read() # i = 0 for i,t in itertools.izip(itertools.count(), tokenize_paren(s)): yield Tree(tree.bracket_parse(t), [file, i])
def parsed(self, files=None): """ Prepared for Penn format. May be overriden. @param files: One or more treebank files to be processed @type files: L{string} or L{tuple(string)} @rtype: iterator over L{tree} """ if files is None: files = os.listdir(self.basedir) # Just one file to process? If so convert to a tuple so we can iterate if isinstance(files, str): files = (files, ) for file in files: print "Parsing file " + file path = os.path.join(self.basedir, file) s = open(path).read() # i = 0 for i, t in itertools.izip(itertools.count(), tokenize_paren(s)): yield Tree(tree.bracket_parse(t), [file, i])
def demo(): import random def fill(cw): cw['fill'] = '#%06d' % random.randint(0, 999999) cf = CanvasFrame(width=550, height=450, closeenough=2) t = tree.bracket_parse(''' (S (NP the very big cat) (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''') tc = TreeWidget(cf.canvas(), t, draggable=1, node_font=('helvetica', -14, 'bold'), leaf_font=('helvetica', -12, 'italic'), roof_fill='white', roof_color='black', leaf_color='green4', node_color='blue2') cf.add_widget(tc, 10, 10) def boxit(canvas, text): big = ('helvetica', -16, 'bold') return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill='green') def ovalit(canvas, text): return OvalWidget(canvas, TextWidget(canvas, text), fill='cyan') treetok = tree.bracket_parse( '(S (NP this tree) (VP (V is) (AdjP shapeable)))') tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1) def color(node): node['color'] = '#%04d00' % random.randint(0, 9999) def color2(treeseg): treeseg.node()['fill'] = '#%06d' % random.randint(0, 9999) treeseg.node().child()['color'] = 'white' tc.bind_click_trees(tc.toggle_collapsed) tc2.bind_click_trees(tc2.toggle_collapsed) tc.bind_click_nodes(color, 3) tc2.expanded_tree(1).bind_click(color2, 3) tc2.expanded_tree().bind_click(color2, 3) paren = ParenWidget(cf.canvas(), tc2) cf.add_widget(paren, tc.bbox()[2] + 10, 10) tree3 = tree.bracket_parse(''' (S (NP this tree) (AUX was) (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''') tc3 = tree_to_treesegment(cf.canvas(), tree3, tree_color='green4', tree_xspace=2, tree_width=2) tc3['draggable'] = 1 cf.add_widget(tc3, 10, tc.bbox()[3] + 10) def orientswitch(treewidget): if treewidget['orientation'] == 'horizontal': treewidget.expanded_tree(1, 1).subtrees()[0].set_text('vertical') treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('vertical') treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical') treewidget.collapsed_tree().subtrees()[3].set_text('vertical') treewidget['orientation'] = 'vertical' else: treewidget.expanded_tree(1, 1).subtrees()[0].set_text('horizontal') treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('horizontal') treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal') treewidget.collapsed_tree().subtrees()[3].set_text('horizontal') treewidget['orientation'] = 'horizontal' text = """ Try clicking, right clicking, and dragging different elements of each of the trees. The top-left tree is a TreeWidget built from a Tree. The top-right is a TreeWidget built from a Tree, using non-default widget constructors for the nodes & leaves (BoxWidget and OvalWidget). The bottom-left tree is built from tree_to_treesegment.""" twidget = TextWidget(cf.canvas(), text.strip()) textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1) cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10) tree4 = tree.bracket_parse( '(S (NP this tree) (VP (V is) (Adj horizontal)))') tc4 = TreeWidget(cf.canvas(), tree4, draggable=1, line_color='brown2', roof_color='brown2', node_font=('helvetica', -12, 'bold'), node_color='brown4', orientation='horizontal') tc4.manage() cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10) tc4.bind_click(orientswitch) tc4.bind_click_trees(tc4.toggle_collapsed, 3) # Run mainloop cf.mainloop()
def isAncestorOf(self, n): p = n.parent while p: if p == self: return True p = p.parent return False def follows(self, n): L = [n] p = n.parent while p: L.append(p) p = p.parent p = self pp = None while p not in L: pp, p = p, p.parent if pp is None or p==n: return False i = L.index(p) ni = p.children.index(L[i-1]) mi = p.children.index(pp) return ni < mi if __name__ == "__main__": from nltk.tree import bracket_parse s = "(S (NP (N I)) (VP (VP (V saw) (NP (DT the) (N man))) (PP (P with) (NP (DT a) (N telescope)))))" t = bracket_parse(s) root = TreeModel.importNltkLiteTree(t) print root.treebankString("label")
def parsetree_json_decode(parsetree_json): if parsetree_json != 'null': return nltr.bracket_parse(parsetree_json).freeze() return None
def isAncestorOf(self, n): p = n.parent while p: if p == self: return True p = p.parent return False def follows(self, n): L = [n] p = n.parent while p: L.append(p) p = p.parent p = self pp = None while p not in L: pp, p = p, p.parent if pp is None or p == n: return False i = L.index(p) ni = p.children.index(L[i - 1]) mi = p.children.index(pp) return ni < mi if __name__ == "__main__": from nltk.tree import bracket_parse s = "(S (NP (N I)) (VP (VP (V saw) (NP (DT the) (N man))) (PP (P with) (NP (DT a) (N telescope)))))" t = bracket_parse(s) root = TreeModel.importNltkLiteTree(t) print root.treebankString("label")
# to display, indexed starting at 1. When no list is specified # all sentences are drawn. # * parsedsentencefile is the bracketed parse tree output of ./parse # # TYPICAL USAGE: # 1. Generate a grammar file: # ~> cat S1.gr S1_Vocab.gr S2.gr S2_Vocab.gr TopNo2.gr > GRAMMAR.gr # 2. Parse the sentences into s-expressions. # ~> cat examples.sen | ./parse -g GRAMMAR.gr > training_examples.txt # 3. Draw the trees # ~> python drawtrees.py < training_examples.txt from nltk.draw.tree import draw_trees from nltk import tree import sys args = sys.argv[1:len(sys.argv)] tt = [] c = 0 for l in sys.stdin: c = c + 1 if ((len(args) == 0) or (str(c) in args)): try: t = tree.bracket_parse(l) tt.append(t) except: print "encountered failure on sentence " + str(c) apply(draw_trees,tt)
def demo(): """ A demonstration showing how C{Tree}s and C{Tree}s can be used. This demonstration creates a C{Tree}, and loads a C{Tree} from the L{treebank<nltk.corpus.treebank>} corpus, and shows the results of calling several of their methods. """ from nltk import tree # Demonstrate tree parsing. s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))' t = tree.bracket_parse(s) print "Convert bracketed string into tree:" print t print t.__repr__() print "Display tree properties:" print t.node # tree's constituent type print t[0] # tree's first child print t[1] # tree's second child print t.height() print t.leaves() print t[1] print t[1, 1] print t[1, 1, 0] # Demonstrate tree modification. the_cat = t[0] the_cat.insert(1, tree.bracket_parse('(JJ big)')) print "Tree modification:" print t t[1, 1, 1] = tree.bracket_parse('(NN cake)') print t print # Tree transforms print "Collapse unary:" t.collapse_unary() print t print "Chomsky normal form:" t.chomsky_normal_form() print t print # Demonstrate probabilistic trees. pt = tree.ProbabilisticTree('x', ['y', 'z'], prob=0.5) print "Probabilistic Tree:" print pt print # Demonstrate parsing of treebank output format. t = tree.bracket_parse(t.pprint()) print "Convert tree to bracketed string and back again:" print t print # Demonstrate LaTeX output print "LaTeX output:" print t.pprint_latex_qtree() print # Demonstrate Productions print "Production output:" print t.productions() print # Demonstrate tree nodes containing objects other than strings t.node = ('test', 3) print t
def demo(): """ A demonstration showing how C{Tree}s and C{Tree}s can be used. This demonstration creates a C{Tree}, and loads a C{Tree} from the L{treebank<nltk.corpus.treebank>} corpus, and shows the results of calling several of their methods. """ from nltk import tree # Demonstrate tree parsing. s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))' t = tree.bracket_parse(s) print "Convert bracketed string into tree:" print t print t.__repr__() print "Display tree properties:" print t.node # tree's constituent type print t[0] # tree's first child print t[1] # tree's second child print t.height() print t.leaves() print t[1] print t[1,1] print t[1,1,0] # Demonstrate tree modification. the_cat = t[0] the_cat.insert(1, tree.bracket_parse('(JJ big)')) print "Tree modification:" print t t[1,1,1] = tree.bracket_parse('(NN cake)') print t print # Tree transforms print "Collapse unary:" t.collapse_unary() print t print "Chomsky normal form:" t.chomsky_normal_form() print t print # Demonstrate probabilistic trees. pt = tree.ProbabilisticTree('x', ['y', 'z'], prob=0.5) print "Probabilistic Tree:" print pt print # Demonstrate parsing of treebank output format. t = tree.bracket_parse(t.pprint()) print "Convert tree to bracketed string and back again:" print t print # Demonstrate LaTeX output print "LaTeX output:" print t.pprint_latex_qtree() print # Demonstrate Productions print "Production output:" print t.productions() print # Demonstrate tree nodes containing objects other than strings t.node = ('test', 3) print t
def demo(): import random def fill(cw): cw['fill'] = '#%06d' % random.randint(0,999999) cf = CanvasFrame(width=550, height=450, closeenough=2) t = tree.bracket_parse(''' (S (NP the very big cat) (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''') tc = TreeWidget(cf.canvas(), t, draggable=1, node_font=('helvetica', -14, 'bold'), leaf_font=('helvetica', -12, 'italic'), roof_fill='white', roof_color='black', leaf_color='green4', node_color='blue2') cf.add_widget(tc,10,10) def boxit(canvas, text): big = ('helvetica', -16, 'bold') return BoxWidget(canvas, TextWidget(canvas, text, font=big), fill='green') def ovalit(canvas, text): return OvalWidget(canvas, TextWidget(canvas, text), fill='cyan') treetok = tree.bracket_parse('(S (NP this tree) (VP (V is) (AdjP shapeable)))') tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1) def color(node): node['color'] = '#%04d00' % random.randint(0,9999) def color2(treeseg): treeseg.node()['fill'] = '#%06d' % random.randint(0,9999) treeseg.node().child()['color'] = 'white' tc.bind_click_trees(tc.toggle_collapsed) tc2.bind_click_trees(tc2.toggle_collapsed) tc.bind_click_nodes(color, 3) tc2.expanded_tree(1).bind_click(color2, 3) tc2.expanded_tree().bind_click(color2, 3) paren = ParenWidget(cf.canvas(), tc2) cf.add_widget(paren, tc.bbox()[2]+10, 10) tree3 = tree.bracket_parse(''' (S (NP this tree) (AUX was) (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''') tc3 = tree_to_treesegment(cf.canvas(), tree3, tree_color='green4', tree_xspace=2, tree_width=2) tc3['draggable'] = 1 cf.add_widget(tc3, 10, tc.bbox()[3]+10) def orientswitch(treewidget): if treewidget['orientation'] == 'horizontal': treewidget.expanded_tree(1,1).subtrees()[0].set_text('vertical') treewidget.collapsed_tree(1,1).subtrees()[0].set_text('vertical') treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical') treewidget.collapsed_tree().subtrees()[3].set_text('vertical') treewidget['orientation'] = 'vertical' else: treewidget.expanded_tree(1,1).subtrees()[0].set_text('horizontal') treewidget.collapsed_tree(1,1).subtrees()[0].set_text('horizontal') treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal') treewidget.collapsed_tree().subtrees()[3].set_text('horizontal') treewidget['orientation'] = 'horizontal' text = """ Try clicking, right clicking, and dragging different elements of each of the trees. The top-left tree is a TreeWidget built from a Tree. The top-right is a TreeWidget built from a Tree, using non-default widget constructors for the nodes & leaves (BoxWidget and OvalWidget). The bottom-left tree is built from tree_to_treesegment.""" twidget = TextWidget(cf.canvas(), text.strip()) textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1) cf.add_widget(textbox, tc3.bbox()[2]+10, tc2.bbox()[3]+10) tree4 = tree.bracket_parse('(S (NP this tree) (VP (V is) (Adj horizontal)))') tc4 = TreeWidget(cf.canvas(), tree4, draggable=1, line_color='brown2', roof_color='brown2', node_font=('helvetica', -12, 'bold'), node_color='brown4', orientation='horizontal') tc4.manage() cf.add_widget(tc4, tc3.bbox()[2]+10, textbox.bbox()[3]+10) tc4.bind_click(orientswitch) tc4.bind_click_trees(tc4.toggle_collapsed, 3) # Run mainloop cf.mainloop()