Esempio n. 1
0
def treebank_bracket_parse(t):
    try:
        return tree.bracket_parse(t)
    except IndexError:
        # in case it's the real treebank format,
        # strip first and last brackets before parsing
        return tree.bracket_parse(t.strip()[1:-1])
Esempio n. 2
0
def treebank_bracket_parse(t):
    try:
        return Tree.fromstring(t, remove_empty_top_bracketing=True)
    except IndexError:
        # in case it's the real treebank format,
        # strip first and last brackets before parsing
        return tree.bracket_parse(t.strip()[1:-1])
Esempio n. 3
0
def treebank_bracket_parse(t):
    try:
        return Tree.fromstring(t, remove_empty_top_bracketing=True)
    except IndexError:
        # in case it's the real treebank format,
        # strip first and last brackets before parsing
        return tree.bracket_parse(t.strip()[1:-1])
Esempio n. 4
0
 def _parse(self, t):
     try:
         return bracket_parse(self._normalize(t))
     except ValueError, e:
         sys.stderr.write("Bad tree detected; trying to recover...\n")
         # Try to recover, if we can:
         if e.args == ('mismatched parens',):
             for n in range(1, 5):
                 try:
                     v = bracket_parse(self._normalize(t+')'*n))
                     sys.stderr.write("  Recovered by adding %d close "
                                      "paren(s)\n" % n)
                     return v
                 except ValueError: pass
         # Try something else:
         sys.stderr.write("  Recovered by returning a flat parse.\n")
         #sys.stderr.write(' '.join(t.split())+'\n')
         return Tree('S', self._tag(t))
Esempio n. 5
0
def string_to_bracketing(s):
    """Converts a string to a bracketing.

    >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))')
    """
    s2 = s.replace('(', '(X ')
    s2 = '((X ' + s2 + '))'
    t = treebank.Tree(tree.bracket_parse(s2))
    b = tree_to_bracketing(t)
    return b
Esempio n. 6
0
 def _parse(self, t):
     try:
         return bracket_parse(self._normalize(t))
     except ValueError, e:
         sys.stderr.write("Bad tree detected; trying to recover...\n")
         # Try to recover, if we can:
         if e.args == ('mismatched parens', ):
             for n in range(1, 5):
                 try:
                     v = bracket_parse(self._normalize(t + ')' * n))
                     sys.stderr.write("  Recovered by adding %d close "
                                      "paren(s)\n" % n)
                     return v
                 except ValueError:
                     pass
         # Try something else:
         sys.stderr.write("  Recovered by returning a flat parse.\n")
         #sys.stderr.write(' '.join(t.split())+'\n')
         return Tree('S', self._tag(t))
Esempio n. 7
0
def string_to_bracketing(s):
    """Converts a string to a bracketing.

    >>> string_to_bracketing('(DT NNP NN) (VBD (DT (VBZ (DT JJ NN))))')
    """
    s2 = s.replace('(', '(X ')
    s2 = '((X '+s2+'))'
    t = treebank.Tree(tree.bracket_parse(s2))
    b = tree_to_bracketing(t)
    return b
Esempio n. 8
0
def TreeView(TreeList):
    # List of trees
    # TreeList = [tree,tree2,tree3,tree4]
    # Parse bracket
    t = []
    for tree in TreeList:
        t.append(bracket_parse(tree))
    print t
    # Add widget
    tc = []
    for i in range(0, len(t)):
        tc.append(TreeWidget(cf.canvas(), t[i]))
    # print tc
    paren = []
    # Gen paren of trees
    for i in range(1, len(t)):
        paren.append(ParenWidget(cf.canvas(), tc[i]))
    # print paren
    # Locate position

    ## Line 1
    # cf.add_widget(tc[0],10,10)
    # cf.add_widget(paren[0],tc[0].bbox()[2],10)
    #
    ## Line 2
    # cf.add_widget(paren[1],10,tc[0].bbox()[3]+10)
    # cf.add_widget(paren[2],tc[2].bbox()[2]+10,tc[0].bbox()[3]+10)
    #
    ## Drawing
    # cf.mainloop()

    cf.add_widget(tc[0], 10, 10)
    count = 1
    for i in range(1, len(t)):
        # Line 1
        cf.add_widget(paren[i - 1], tc[i - 1].bbox()[2] + 10, 10)
        # ps_file = "canvas_" + str(i)
        # cf.print_to_file(ps_file)

        count = count + 1

        # if count>=3:
        #    print i
        #    # Write to line 2
        #    cf.add_widget(paren[1],10,tc[0].bbox()[3]+10)
        #
        #    #cf.add_widget(paren[i-1],tc[i-1].bbox()[2]+10,tc[0].bbox()[3]+10)
        #    #cf.add_widget(paren[i-1],10,tc[0].bbox()[3]+10)
        #    #cf.add_widget(paren[i-1],tc[i-1].bbox()[2]+10,tc[0].bbox()[3]+10)
        #    count = count + 1
        #    break

    cf.mainloop()
Esempio n. 9
0
 def parse(self, sentence):
     """Parses sentence string and returns a sentenceparse instance.  Quotes
     " seem to give json errors (a ValueError) and paranthesis () sometimes
     cause the lisp-server parser to break, so filter out sentences with
     these characters. """
     self.socket.sendall(sentence)
     jsondata = simplejson.loads(' '.join(self.socket.recv(8192).split()))
     tree = nltr.bracket_parse(jsondata['parse'])
     scopes = jsondata['scopes']
     scopes = fix_scopes(tree, scopes)
     print tree
     return [sentence, tree, scopes]
Esempio n. 10
0
 def parsed(self, files=None):
     """
     Prepared for Penn format. May be overriden.
     
     @param files: One or more treebank files to be processed
     @type files: L{string} or L{tuple(string)}
     @rtype: iterator over L{tree}
     """
     if files is None:
         files = sorted(os.listdir(self.basedir))
     
     # Just one file to process?  If so convert to a tuple so we can iterate
     if isinstance(files, str):
         files = (files,)
     
     for file in files:
         print "Parsing file "+file
         path = os.path.join(self.basedir, file)
         s = open(path).read()
         # i = 0
         for i,t in itertools.izip(itertools.count(), tokenize_paren(s)):
             yield Tree(tree.bracket_parse(t), [file, i])
Esempio n. 11
0
    def parsed(self, files=None):
        """
        Prepared for Penn format. May be overriden.
        
        @param files: One or more treebank files to be processed
        @type files: L{string} or L{tuple(string)}
        @rtype: iterator over L{tree}
        """
        if files is None:
            files = os.listdir(self.basedir)

        # Just one file to process?  If so convert to a tuple so we can iterate
        if isinstance(files, str):
            files = (files, )

        for file in files:
            print "Parsing file " + file
            path = os.path.join(self.basedir, file)
            s = open(path).read()
            # i = 0
            for i, t in itertools.izip(itertools.count(), tokenize_paren(s)):
                yield Tree(tree.bracket_parse(t), [file, i])
Esempio n. 12
0
def demo():
    import random

    def fill(cw):
        cw['fill'] = '#%06d' % random.randint(0, 999999)

    cf = CanvasFrame(width=550, height=450, closeenough=2)

    t = tree.bracket_parse('''
    (S (NP the very big cat)
       (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''')

    tc = TreeWidget(cf.canvas(),
                    t,
                    draggable=1,
                    node_font=('helvetica', -14, 'bold'),
                    leaf_font=('helvetica', -12, 'italic'),
                    roof_fill='white',
                    roof_color='black',
                    leaf_color='green4',
                    node_color='blue2')
    cf.add_widget(tc, 10, 10)

    def boxit(canvas, text):
        big = ('helvetica', -16, 'bold')
        return BoxWidget(canvas,
                         TextWidget(canvas, text, font=big),
                         fill='green')

    def ovalit(canvas, text):
        return OvalWidget(canvas, TextWidget(canvas, text), fill='cyan')

    treetok = tree.bracket_parse(
        '(S (NP this tree) (VP (V is) (AdjP shapeable)))')
    tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1)

    def color(node):
        node['color'] = '#%04d00' % random.randint(0, 9999)

    def color2(treeseg):
        treeseg.node()['fill'] = '#%06d' % random.randint(0, 9999)
        treeseg.node().child()['color'] = 'white'

    tc.bind_click_trees(tc.toggle_collapsed)
    tc2.bind_click_trees(tc2.toggle_collapsed)
    tc.bind_click_nodes(color, 3)
    tc2.expanded_tree(1).bind_click(color2, 3)
    tc2.expanded_tree().bind_click(color2, 3)

    paren = ParenWidget(cf.canvas(), tc2)
    cf.add_widget(paren, tc.bbox()[2] + 10, 10)

    tree3 = tree.bracket_parse('''
    (S (NP this tree) (AUX was)
       (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''')
    tc3 = tree_to_treesegment(cf.canvas(),
                              tree3,
                              tree_color='green4',
                              tree_xspace=2,
                              tree_width=2)
    tc3['draggable'] = 1
    cf.add_widget(tc3, 10, tc.bbox()[3] + 10)

    def orientswitch(treewidget):
        if treewidget['orientation'] == 'horizontal':
            treewidget.expanded_tree(1, 1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1, 1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical')
            treewidget.collapsed_tree().subtrees()[3].set_text('vertical')
            treewidget['orientation'] = 'vertical'
        else:
            treewidget.expanded_tree(1, 1).subtrees()[0].set_text('horizontal')
            treewidget.collapsed_tree(1,
                                      1).subtrees()[0].set_text('horizontal')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal')
            treewidget.collapsed_tree().subtrees()[3].set_text('horizontal')
            treewidget['orientation'] = 'horizontal'

    text = """
Try clicking, right clicking, and dragging
different elements of each of the trees.
The top-left tree is a TreeWidget built from
a Tree.  The top-right is a TreeWidget built
from a Tree, using non-default widget
constructors for the nodes & leaves (BoxWidget
and OvalWidget).  The bottom-left tree is
built from tree_to_treesegment."""
    twidget = TextWidget(cf.canvas(), text.strip())
    textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1)
    cf.add_widget(textbox, tc3.bbox()[2] + 10, tc2.bbox()[3] + 10)

    tree4 = tree.bracket_parse(
        '(S (NP this tree) (VP (V is) (Adj horizontal)))')
    tc4 = TreeWidget(cf.canvas(),
                     tree4,
                     draggable=1,
                     line_color='brown2',
                     roof_color='brown2',
                     node_font=('helvetica', -12, 'bold'),
                     node_color='brown4',
                     orientation='horizontal')
    tc4.manage()
    cf.add_widget(tc4, tc3.bbox()[2] + 10, textbox.bbox()[3] + 10)
    tc4.bind_click(orientswitch)
    tc4.bind_click_trees(tc4.toggle_collapsed, 3)

    # Run mainloop
    cf.mainloop()
Esempio n. 13
0
    def isAncestorOf(self, n):
        p = n.parent
        while p:
            if p == self: return True
            p = p.parent
        return False

    def follows(self, n):
        L = [n]
        p = n.parent
        while p:
            L.append(p)
            p = p.parent
        p = self
        pp = None
        while p not in L:
            pp, p = p, p.parent
        if pp is None or p==n: return False
        i = L.index(p)
        ni = p.children.index(L[i-1])
        mi = p.children.index(pp)
        return ni < mi
    
if __name__ == "__main__":
    from nltk.tree import bracket_parse
    s = "(S (NP (N I)) (VP (VP (V saw) (NP (DT the) (N man))) (PP (P with) (NP (DT a) (N telescope)))))"
    t = bracket_parse(s)
    root = TreeModel.importNltkLiteTree(t)
    print root.treebankString("label")
Esempio n. 14
0
def parsetree_json_decode(parsetree_json):
    if parsetree_json != 'null':
        return nltr.bracket_parse(parsetree_json).freeze()
    return None
Esempio n. 15
0
    def isAncestorOf(self, n):
        p = n.parent
        while p:
            if p == self: return True
            p = p.parent
        return False

    def follows(self, n):
        L = [n]
        p = n.parent
        while p:
            L.append(p)
            p = p.parent
        p = self
        pp = None
        while p not in L:
            pp, p = p, p.parent
        if pp is None or p == n: return False
        i = L.index(p)
        ni = p.children.index(L[i - 1])
        mi = p.children.index(pp)
        return ni < mi


if __name__ == "__main__":
    from nltk.tree import bracket_parse
    s = "(S (NP (N I)) (VP (VP (V saw) (NP (DT the) (N man))) (PP (P with) (NP (DT a) (N telescope)))))"
    t = bracket_parse(s)
    root = TreeModel.importNltkLiteTree(t)
    print root.treebankString("label")
Esempio n. 16
0
#    to display, indexed starting at 1.  When no list is specified
#    all sentences are drawn.
#  * parsedsentencefile is the bracketed parse tree output of ./parse
# 
# TYPICAL USAGE:
# 1. Generate a grammar file:
# ~> cat S1.gr S1_Vocab.gr S2.gr S2_Vocab.gr TopNo2.gr > GRAMMAR.gr
# 2. Parse the sentences into s-expressions.
# ~> cat examples.sen | ./parse -g GRAMMAR.gr > training_examples.txt
# 3. Draw the trees
# ~> python drawtrees.py < training_examples.txt

from nltk.draw.tree import draw_trees
from nltk import tree
import sys

args = sys.argv[1:len(sys.argv)]

tt = []
c = 0
for l in sys.stdin:
	c = c + 1
	if ((len(args) == 0) or (str(c) in args)):
		try:
			t = tree.bracket_parse(l)
			tt.append(t)
		except:
			print "encountered failure on sentence " + str(c)
	
apply(draw_trees,tt)
Esempio n. 17
0
def demo():
    """
    A demonstration showing how C{Tree}s and C{Tree}s can be
    used.  This demonstration creates a C{Tree}, and loads a
    C{Tree} from the L{treebank<nltk.corpus.treebank>} corpus,
    and shows the results of calling several of their methods.
    """

    from nltk import tree

    # Demonstrate tree parsing.
    s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))'
    t = tree.bracket_parse(s)
    print "Convert bracketed string into tree:"
    print t
    print t.__repr__()

    print "Display tree properties:"
    print t.node  # tree's constituent type
    print t[0]  # tree's first child
    print t[1]  # tree's second child
    print t.height()
    print t.leaves()
    print t[1]
    print t[1, 1]
    print t[1, 1, 0]

    # Demonstrate tree modification.
    the_cat = t[0]
    the_cat.insert(1, tree.bracket_parse('(JJ big)'))
    print "Tree modification:"
    print t
    t[1, 1, 1] = tree.bracket_parse('(NN cake)')
    print t
    print

    # Tree transforms
    print "Collapse unary:"
    t.collapse_unary()
    print t
    print "Chomsky normal form:"
    t.chomsky_normal_form()
    print t
    print

    # Demonstrate probabilistic trees.
    pt = tree.ProbabilisticTree('x', ['y', 'z'], prob=0.5)
    print "Probabilistic Tree:"
    print pt
    print

    # Demonstrate parsing of treebank output format.
    t = tree.bracket_parse(t.pprint())
    print "Convert tree to bracketed string and back again:"
    print t
    print

    # Demonstrate LaTeX output
    print "LaTeX output:"
    print t.pprint_latex_qtree()
    print

    # Demonstrate Productions
    print "Production output:"
    print t.productions()
    print

    # Demonstrate tree nodes containing objects other than strings
    t.node = ('test', 3)
    print t
Esempio n. 18
0
def demo():
    """
    A demonstration showing how C{Tree}s and C{Tree}s can be
    used.  This demonstration creates a C{Tree}, and loads a
    C{Tree} from the L{treebank<nltk.corpus.treebank>} corpus,
    and shows the results of calling several of their methods.
    """
    
    from nltk import tree

    # Demonstrate tree parsing.
    s = '(S (NP (DT the) (NN cat)) (VP (VBD ate) (NP (DT a) (NN cookie))))'
    t = tree.bracket_parse(s)
    print "Convert bracketed string into tree:"
    print t
    print t.__repr__()

    print "Display tree properties:"
    print t.node           # tree's constituent type
    print t[0]             # tree's first child
    print t[1]             # tree's second child
    print t.height()
    print t.leaves()
    print t[1]
    print t[1,1]
    print t[1,1,0]

    # Demonstrate tree modification.
    the_cat = t[0]
    the_cat.insert(1, tree.bracket_parse('(JJ big)'))
    print "Tree modification:"
    print t
    t[1,1,1] = tree.bracket_parse('(NN cake)')
    print t
    print

    # Tree transforms
    print "Collapse unary:"
    t.collapse_unary()
    print t
    print "Chomsky normal form:"
    t.chomsky_normal_form()
    print t
    print

    # Demonstrate probabilistic trees.
    pt = tree.ProbabilisticTree('x', ['y', 'z'], prob=0.5)
    print "Probabilistic Tree:"
    print pt
    print

    # Demonstrate parsing of treebank output format.
    t = tree.bracket_parse(t.pprint())
    print "Convert tree to bracketed string and back again:"
    print t
    print

    # Demonstrate LaTeX output
    print "LaTeX output:"
    print t.pprint_latex_qtree()
    print

    # Demonstrate Productions
    print "Production output:"
    print t.productions()
    print

    # Demonstrate tree nodes containing objects other than strings
    t.node = ('test', 3)
    print t
Esempio n. 19
0
def demo():
    import random
    def fill(cw):
        cw['fill'] = '#%06d' % random.randint(0,999999)
    
    cf = CanvasFrame(width=550, height=450, closeenough=2)

    t = tree.bracket_parse('''
    (S (NP the very big cat)
       (VP (Adv sorta) (V saw) (NP (Det the) (N dog))))''')
                
    tc = TreeWidget(cf.canvas(), t, draggable=1, 
                    node_font=('helvetica', -14, 'bold'),
                    leaf_font=('helvetica', -12, 'italic'),
                    roof_fill='white', roof_color='black',
                    leaf_color='green4', node_color='blue2')
    cf.add_widget(tc,10,10)
    
    def boxit(canvas, text):
        big = ('helvetica', -16, 'bold')
        return BoxWidget(canvas, TextWidget(canvas, text,
                                            font=big), fill='green')
    def ovalit(canvas, text):
        return OvalWidget(canvas, TextWidget(canvas, text),
                          fill='cyan')

    treetok = tree.bracket_parse('(S (NP this tree) (VP (V is) (AdjP shapeable)))')
    tc2 = TreeWidget(cf.canvas(), treetok, boxit, ovalit, shapeable=1)
    
    def color(node):
        node['color'] = '#%04d00' % random.randint(0,9999)
    def color2(treeseg):
        treeseg.node()['fill'] = '#%06d' % random.randint(0,9999)
        treeseg.node().child()['color'] = 'white'

    tc.bind_click_trees(tc.toggle_collapsed)
    tc2.bind_click_trees(tc2.toggle_collapsed)
    tc.bind_click_nodes(color, 3)
    tc2.expanded_tree(1).bind_click(color2, 3)
    tc2.expanded_tree().bind_click(color2, 3)

    paren = ParenWidget(cf.canvas(), tc2)
    cf.add_widget(paren, tc.bbox()[2]+10, 10)

    tree3 = tree.bracket_parse('''
    (S (NP this tree) (AUX was)
       (VP (V built) (PP (P with) (NP (N tree_to_treesegment)))))''')
    tc3 = tree_to_treesegment(cf.canvas(), tree3, tree_color='green4',
                              tree_xspace=2, tree_width=2)
    tc3['draggable'] = 1
    cf.add_widget(tc3, 10, tc.bbox()[3]+10)

    def orientswitch(treewidget):
        if treewidget['orientation'] == 'horizontal':
            treewidget.expanded_tree(1,1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1,1).subtrees()[0].set_text('vertical')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('vertical')
            treewidget.collapsed_tree().subtrees()[3].set_text('vertical')
            treewidget['orientation'] = 'vertical'
        else:
            treewidget.expanded_tree(1,1).subtrees()[0].set_text('horizontal')
            treewidget.collapsed_tree(1,1).subtrees()[0].set_text('horizontal')
            treewidget.collapsed_tree(1).subtrees()[1].set_text('horizontal')
            treewidget.collapsed_tree().subtrees()[3].set_text('horizontal')
            treewidget['orientation'] = 'horizontal'

    text = """
Try clicking, right clicking, and dragging
different elements of each of the trees.
The top-left tree is a TreeWidget built from
a Tree.  The top-right is a TreeWidget built
from a Tree, using non-default widget
constructors for the nodes & leaves (BoxWidget
and OvalWidget).  The bottom-left tree is
built from tree_to_treesegment."""
    twidget = TextWidget(cf.canvas(), text.strip())
    textbox = BoxWidget(cf.canvas(), twidget, fill='white', draggable=1)
    cf.add_widget(textbox, tc3.bbox()[2]+10, tc2.bbox()[3]+10)

    tree4 = tree.bracket_parse('(S (NP this tree) (VP (V is) (Adj horizontal)))')
    tc4 = TreeWidget(cf.canvas(), tree4, draggable=1,
                     line_color='brown2', roof_color='brown2',
                     node_font=('helvetica', -12, 'bold'),
                     node_color='brown4', orientation='horizontal')
    tc4.manage()
    cf.add_widget(tc4, tc3.bbox()[2]+10, textbox.bbox()[3]+10)
    tc4.bind_click(orientswitch)
    tc4.bind_click_trees(tc4.toggle_collapsed, 3)

    # Run mainloop
    cf.mainloop()