Beispiel #1
0
def demo(draw_parses=None, print_parses=None):
    """
    A simple demo showing some basic functionality.
    """
    demos = ['aandeelhoudersvergadering', 'hardloopwedstrijd']
    trees = []
    with MBMA() as program:
        for word in demos:
            print 'Parsing: %s' % word
            results = program.classify(word)
            trees.extend(program.trees(results))
    if draw_parses is None:
        print
        print 'Draw parses (y/n)?',
        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
    if draw_parses:
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*trees)

    if print_parses is None:
        print
        print 'Print parses (y/n)?',
        print_parses = sys.stdin.readline().strip().lower().startswith('y')
    if print_parses:
        for parse in trees:
            print parse
Beispiel #2
0
    def draw(self):
        """
        Open a new window containing a graphical diagram of this tree.
        """
        from nltk.draw.tree import draw_trees

        draw_trees(self)
Beispiel #3
0
def test():
    model = torch.load('./ckpt/model0.pt')
    leafmodel = LeafNet()
    x = treebank.sents('wsj_0003.mrg')[0]
    y = treebank.parsed_sents('wsj_0003.mrg')[0]
    preprocess(y)
    # embed_x is the list of embedding vectors of x
    embed_x = []
    x_list = []
    l = int(len(x))

    for i in range(0, l):
        txlist = []
        x[i] = x[i].lower()
        txlist.append(x[i])
        tembed = torch.Tensor(get_embed(x[i]))
        embed_x.append(tembed)

        pred = leafmodel(embed_x[i])
        gt = (torch.argmax(pred)).item()
        txlist.append(gt)
        x_list.append(txlist)

    # we got the (sentence,gt) list, embedding vector list for the leafs
    xscore = 0.0
    while (len(x_list) != 1):
        x_list, embed_x, tscore = calculate_score(x_list, embed_x, model)
        xscore = xscore + tscore
    x_list = str(x_list).replace('[', '(').replace(']', ')').replace(
        '\'', '').replace(',', '')
    x_list_tree = Tree.fromstring((x_list))

    draw_trees(x_list_tree)
    draw_trees(y)
Beispiel #4
0
def demo(draw_parses=None, print_parses=None):
    """
    A simple demo showing some basic functionality.
    """
    demos = ['aandeelhoudersvergadering', 'hardloopwedstrijd']
    trees = []
    with MBMA() as program:
        for word in demos:
            print 'Parsing: %s' % word
            results = program.classify(word)
            trees.extend(program.trees(results))
    if draw_parses is None:
        print
        print 'Draw parses (y/n)?',
        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
    if draw_parses:
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*trees)

    if print_parses is None:
        print
        print 'Print parses (y/n)?',
        print_parses = sys.stdin.readline().strip().lower().startswith('y')
    if print_parses:
        for parse in trees:
            print parse
Beispiel #5
0
def demo():
    """
    A demonstration showing how each tree transform can be used.
    """

    from copy import deepcopy

    from nltk import tree, treetransforms
    from nltk.draw.tree import draw_trees

    # original tree from WSJ bracketed text
    sentence = """(TOP
  (S
    (S
      (VP
        (VBN Turned)
        (ADVP (RB loose))
        (PP
          (IN in)
          (NP
            (NP (NNP Shane) (NNP Longman) (POS 's))
            (NN trading)
            (NN room)))))
    (, ,)
    (NP (DT the) (NN yuppie) (NNS dealers))
    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
    (. .)))"""
    t = tree.Tree.fromstring(sentence, remove_empty_top_bracketing=True)

    # collapse subtrees with only one child
    collapsedTree = deepcopy(t)
    treetransforms.collapse_unary(collapsedTree)

    # convert the tree to CNF
    cnfTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(cnfTree)

    # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two
    parentTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(parentTree, horzMarkov=2, vertMarkov=1)

    # convert the tree back to its original form (used to make CYK results comparable)
    original = deepcopy(parentTree)
    treetransforms.un_chomsky_normal_form(original)

    # convert tree back to bracketed text
    sentence2 = original.pprint()
    print(sentence)
    print(sentence2)
    print("Sentences the same? ", sentence == sentence2)

    draw_trees(t, collapsedTree, cnfTree, parentTree, original)
Beispiel #6
0
def demo():
    """
    A demonstration showing how each tree transform can be used.
    """

    from nltk.draw.tree import draw_trees
    from nltk import tree, treetransforms
    from copy import deepcopy

    # original tree from WSJ bracketed text
    sentence = """(TOP
  (S
    (S
      (VP
        (VBN Turned)
        (ADVP (RB loose))
        (PP
          (IN in)
          (NP
            (NP (NNP Shane) (NNP Longman) (POS 's))
            (NN trading)
            (NN room)))))
    (, ,)
    (NP (DT the) (NN yuppie) (NNS dealers))
    (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
    (. .)))"""
    t = tree.Tree.parse(sentence, remove_empty_top_bracketing=True)

    # collapse subtrees with only one child
    collapsedTree = deepcopy(t)
    treetransforms.collapse_unary(collapsedTree)

    # convert the tree to CNF
    cnfTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(cnfTree)

    # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two
    parentTree = deepcopy(collapsedTree)
    treetransforms.chomsky_normal_form(parentTree, horzMarkov=2, vertMarkov=1)

    # convert the tree back to its original form (used to make CYK results comparable)
    original = deepcopy(parentTree)
    treetransforms.un_chomsky_normal_form(original)

    # convert tree back to bracketed text
    sentence2 = original.pprint()
    print sentence
    print sentence2
    print "Sentences the same? ", sentence == sentence2

    draw_trees(t, collapsedTree, cnfTree, parentTree, original)
Beispiel #7
0
def testTreeFilter(tree=None):
    """

    :param tree: Sample tree string in bracket notation.
    """
    if tree:
        t = Tree(tree)
    else:
        t = Tree(
            '((S(NP-SBJ (PRP They))(ADVP-TMP (RB never))(VP (VBD considered)(S (NP-SBJ (PRP themselves) (VP (TO to) (VP (VB be) (NP-PRD (NN anything) (RB else)))))))))')
    t2 = t.copy(deep=True)
    filterLexical(t2)

    from nltk.draw.tree import draw_trees

    draw_trees(t, t2)
def to_one_hot(smiles):
    """ Encode a list of smiles strings to one-hot vectors """
    token = tokenize(smiles)
    parser = nltk.ChartParser(zinc_grammar.GCFG)
    parse_tree = parser.parse(token).next()
    draw_trees(parse_tree)
    print(type(parse_tree))
    exit(0)
    productions_seq = parse_tree.productions()
    print(smiles)
    for i in productions_seq:
        print(i)
    exit(0)
    indices = [prod_map[prod] for prod in productions_seq]
    one_hot = np.zeros(shape=(MAX_LEN, NRULES), dtype=np.float32)
    num_productions = len(indices)
    one_hot[np.arange(num_productions), indices] = 1.
    one_hot[np.arange(num_productions, MAX_LEN), -1] = 1.
    return one_hot
	def parse(self, sentence_string):
		"""Extra credit : parse an arbitrary string
		
		This is actually what we want at the end.
		Given an arbitrary string
		0) split it into sentences (if you want to accept multiple sentences.)
		1) tokenize
		2) POS-tag and other pre-processing technique
		3) parse it!
		4) draw it using nltk draw_trees like in the example
			it does not support labeled arc though :(
		"""
		#draw a tree
		from nltk.draw.tree import draw_trees
		from nltk.tree import Tree
		import nltk
		

		words = nltk.pos_tag(nltk.word_tokenize(sentence_string))
		sentence = {'tokens': ['ROOT'], 'arcs': [], 'pos':['ROOT']}
		for word, pos in words:
			sentence['tokens'].append(word)
			sentence['pos'].append(pos)
		indices = range(len(sentence['tokens']))
		fcg = self.make_fully_connected_graph(sentence)
		weighted = self._featurized_to_weighted(fcg)
		max_spanning_tree = mst(0, weighted)
		wlist = sentence['tokens']
		
		#print the dependencies
		for i in max_spanning_tree.keys():
			for j in max_spanning_tree[i].keys():
				print "%s->%s" % (i, j)
		
		t = self._build_tree(max_spanning_tree, 0, wlist)
		draw_trees(Tree(t))
Beispiel #10
0
# nb: returns tree with blank nodes excised
def parse_ccgbank_tree(s):
    t = Tree.parse(s,
                   parse_node=parse_ccgbank_node,
                   parse_leaf=parse_ccgbank_leaf,
                   node_pattern=ccgbank_node_pattern,
                   leaf_pattern=ccgbank_leaf_pattern)
    return excise_empty_nodes(t)


print
print 'parsing: ' + deriv
t = parse_ccgbank_tree(deriv)
print t

t2 = None
if deriv2 != None:
    print
    print 'parsing: ' + deriv2
    t2 = parse_ccgbank_tree(deriv2)
    print t2

print
if t2 == None:
    print 'drawing tree'
    draw_trees(t)
else:
    print 'drawing trees'
    draw_trees(t, t2)
Beispiel #11
0
 def draw_tree(self):
     draw_trees(self.nltk_tree)
Beispiel #12
0
def demo(choice=None, draw_parses=None, print_parses=None):
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import pchart

    # Define two demos.  Each demo has a sentence and a grammar.
    toy_pcfg1 = PCFG.fromstring("""
    S -> NP VP [1.0]
    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
    Det -> 'the' [0.8] | 'my' [0.2]
    N -> 'man' [0.5] | 'telescope' [0.5]
    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
    V -> 'ate' [0.35] | 'saw' [0.65]
    PP -> P NP [1.0]
    P -> 'with' [0.61] | 'under' [0.39]
    """)

    toy_pcfg2 = PCFG.fromstring("""
    S    -> NP VP         [1.0]
    VP   -> V NP          [.59]
    VP   -> V             [.40]
    VP   -> VP PP         [.01]
    NP   -> Det N         [.41]
    NP   -> Name          [.28]
    NP   -> NP PP         [.31]
    PP   -> P NP          [1.0]
    V    -> 'saw'         [.21]
    V    -> 'ate'         [.51]
    V    -> 'ran'         [.28]
    N    -> 'boy'         [.11]
    N    -> 'cookie'      [.12]
    N    -> 'table'       [.13]
    N    -> 'telescope'   [.14]
    N    -> 'hill'        [.5]
    Name -> 'Jack'        [.52]
    Name -> 'Bob'         [.48]
    P    -> 'with'        [.61]
    P    -> 'under'       [.39]
    Det  -> 'the'         [.41]
    Det  -> 'a'           [.31]
    Det  -> 'my'          [.28]
    """)

    demos = [('I saw John with my telescope', toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope',
              toy_pcfg2)]

    if choice is None:
        # Ask the user which demo they want to use.
        print()
        for i in range(len(demos)):
            print('%3s: %s' % (i+1, demos[i][0]))
            print('     %r' % demos[i][1])
            print()
        print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
        choice = int(sys.stdin.readline().strip())-1
    try:
        sent, grammar = demos[choice]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()

    # Define a list of parsers.  We'll use all parsers.
    parsers = [
        pchart.InsideChartParser(grammar),
        pchart.RandomChartParser(grammar),
        pchart.UnsortedChartParser(grammar),
        pchart.LongestChartParser(grammar),
        pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)   # was BeamParser
        ]

    # Run the parsers on the tokenized sentence.
    times = []
    average_p = []
    num_parses = []
    all_parses = {}
    for parser in parsers:
        print('\ns: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
        parser.trace(3)
        t = time.time()
        parses = list(parser.parse(tokens))
        times.append(time.time()-t)
        p = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) if parses else 0)
        average_p.append(p)
        num_parses.append(len(parses))
        for p in parses: all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print('       Parser      Beam | Time (secs)   # Parses   Average P(parse)')
    print('------------------------+------------------------------------------')
    for i in range(len(parsers)):
        print('%18s %4d |%11.4f%11d%19.14f' % (parsers[i].__class__.__name__,
                                             parsers[i].beam_size,
                                             times[i],num_parses[i],average_p[i]))
    parses = all_parses.keys()
    if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print('------------------------+------------------------------------------')
    print('%18s      |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p))

    if draw_parses is None:
        # Ask the user if we should draw the parses.
        print()
        print('Draw parses (y/n)? ', end=' ')
        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
    if draw_parses:
        from nltk.draw.tree import draw_trees
        print('  please wait...')
        draw_trees(*parses)

    if print_parses is None:
        # Ask the user if we should print the parses.
        print()
        print('Print parses (y/n)? ', end=' ')
        print_parses = sys.stdin.readline().strip().lower().startswith('y')
    if print_parses:
        for parse in parses:
            print(parse)
import nltk
sentence = """At eight o'clock on Thursday morning
... Arthur didn't feel very good."""
tokens = nltk.word_tokenize(sentence)
print(tokens)
tagged = nltk.pos_tag(tokens)
print(tagged[0:6])
entities = nltk.chunk.ne_chunk(tagged)
print(entities)
from nltk.draw.tree import draw_trees
draw_trees(entities)

Beispiel #14
0
 def draw(self):
     """
     Open a new window containing a graphical diagram of this tree.
     """
     from nltk.draw.tree import draw_trees
     draw_trees(self)
Beispiel #15
0
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    import nltk
    from nltk import tokenize
    from nltk.parse import ViterbiParser

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('I saw the man with my telescope', nltk.toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope', nltk.toy_pcfg2)]

    # Ask the user which demo they want to use.
    print
    for i in range(len(demos)):
        print '%3s: %s' % (i+1, demos[i][0])
        print '     %r' % demos[i][1]
        print
    print 'Which demo (%d-%d)? ' % (1, len(demos)),
    try:
        snum = int(sys.stdin.readline().strip())-1
        sent, grammar = demos[snum]
    except:
        print 'Bad sentence number'
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print '\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar)
    parser.trace(3)
    t = time.time()
    parses = parser.nbest_parse(tokens)
    time = time.time()-t
    if parses:
        average = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else:
        average = 0
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print
    print 'Time (secs)   # Parses   Average P(parse)'
    print '-----------------------------------------'
    print '%11.4f%11d%19.14f' % (time, num_parses, average)
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print '------------------------------------------'
    print '%11s%11d%19.14f' % ('n/a', len(parses), p)

    # Ask the user if we should draw the parses.
    print
    print 'Draw parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print
    print 'Print parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print parse
Beispiel #16
0
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize, toy_pcfg1, toy_pcfg2
    from nltk.parse import pchart

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('I saw John with my telescope', toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope',
              toy_pcfg2)]

    # Ask the user which demo they want to use.
    print
    for i in range(len(demos)):
        print '%3s: %s' % (i + 1, demos[i][0])
        print '     %r' % demos[i][1]
        print
    print 'Which demo (%d-%d)? ' % (1, len(demos)),
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print 'Bad sentence number'
        return

    # Tokenize the sentence.
    tokens = sent.split()

    # Define a list of parsers.  We'll use all parsers.
    parsers = [
        pchart.InsideChartParser(grammar),
        pchart.RandomChartParser(grammar),
        pchart.UnsortedChartParser(grammar),
        pchart.LongestChartParser(grammar),
        pchart.InsideChartParser(grammar,
                                 beam_size=len(tokens) + 1)  # was BeamParser
    ]

    # Run the parsers on the tokenized sentence.
    times = []
    average_p = []
    num_parses = []
    all_parses = {}
    for parser in parsers:
        print '\ns: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar)
        parser.trace(3)
        t = time.time()
        parses = parser.nbest_parse(tokens)
        times.append(time.time() - t)
        if parses:
            p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
        else:
            p = 0
        average_p.append(p)
        num_parses.append(len(parses))
        for p in parses:
            all_parses[p.freeze()] = 1

    # Print some summary statistics
    print
    print '       Parser      Beam | Time (secs)   # Parses   Average P(parse)'
    print '------------------------+------------------------------------------'
    for i in range(len(parsers)):
        print '%18s %4d |%11.4f%11d%19.14f' % (parsers[i].__class__.__name__,
                                               parsers[i].beam_size, times[i],
                                               num_parses[i], average_p[i])
    parses = all_parses.keys()
    if parses: p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else: p = 0
    print '------------------------+------------------------------------------'
    print '%18s      |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses),
                                          p)

    # Ask the user if we should draw the parses.
    print
    print 'Draw parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print
    print 'Print parses (y/n)? ',
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print parse
Beispiel #17
0
def main():
    import sys
    from optparse import OptionParser, OptionGroup
    usage = """%%prog [options] [grammar_file]""" % globals()

    opts = OptionParser(usage=usage)
    opts.add_option("-c", "--components",
        action="store_true", dest="show_components", default=0,
        help="show hole semantics components")
    opts.add_option("-r", "--raw",
        action="store_true", dest="show_raw", default=0,
        help="show the raw hole semantics expression")
    opts.add_option("-d", "--drawtrees",
        action="store_true", dest="draw_trees", default=0,
        help="show formula trees in a GUI window")
    opts.add_option("-v", "--verbose",
        action="count", dest="verbosity", default=0,
        help="show more information during parse")

    (options, args) = opts.parse_args()

    if len(args) > 0:
        filename = args[0]
    else:
        filename = 'grammars/hole.fcfg'

    print 'Reading grammar file', filename
    #grammar = data.load(filename)
    parser = load_earley(filename, trace=options.verbosity)

    # Prompt the user for a sentence.
    print 'Sentence: ',
    line = sys.stdin.readline()[:-1]

    # Parse the sentence.
    tokens = line.split()
    trees = parser.nbest_parse(tokens)
    print 'Got %d different parses' % len(trees)

    for tree in trees:
        # Get the semantic feature from the top of the parse tree.
        sem = tree[0].node['sem'].simplify()

        # Skolemise away all quantifiers.  All variables become unique.
        sem = sem.skolemise()

        # Reparse the semantic representation from its bracketed string format.
        # I find this uniform structure easier to handle.  It also makes the
        # code mostly independent of the lambda calculus classes.
        usr = bracket_parse(str(sem))

        # Break the hole semantics representation down into its components
        # i.e. holes, labels, formula fragments and constraints.
        hole_sem = HoleSemantics(usr)

        # Maybe print the raw semantic representation.
        if options.show_raw:
            print
            print 'Raw expression'
            print usr

        # Maybe show the details of the semantic representation.
        if options.show_components:
            print
            print 'Holes:       ', hole_sem.holes
            print 'Labels:      ', hole_sem.labels
            print 'Constraints: ', hole_sem.constraints
            print 'Top hole:    ', hole_sem.top_hole
            print 'Top labels:  ', hole_sem.top_most_labels
            print 'Fragments:'
            for (l,f) in hole_sem.fragments.items():
                print '\t%s: %s' % (l, f)

        # Find all the possible ways to plug the formulas together.
        pluggings = hole_sem.pluggings()

        # Build FOL formula trees using the pluggings.
        trees = map(hole_sem.formula_tree, pluggings)

        # Print out the formulas in a textual format.
        n = 1
        for tree in trees:
            print
            print '%d. %s' % (n, tree)
            n += 1

        # Maybe draw the formulas as trees.
        if options.draw_trees:
            draw_trees(*trees)

        print
        print 'Done.'
from SyntacticAnalyzer import SynatcticAnalyzer
from GraphematicAnalyzer import GraphematicAnalyzer
from MorphologicalAnalyzer import MorphologicalAnalyzer
from nltk.draw.tree import draw_trees

FILE_NAME = "Test files\\" + "simpleSents.txt"

try:
    file = open(FILE_NAME, "r", encoding="UTF-8")
    text = file.read()
    file.close()

    graphAn = GraphematicAnalyzer()
    proccessedText = graphAn.sentences(text)
    for sentence in proccessedText:
        parserResult = SynatcticAnalyzer.analyzeSentence(sentence)
        draw_trees(*(tree for tree in parserResult))

    #morph = MorphologicalAnalyzer()
    #for a in morph.analyzeWord("кругом"):
    #    print(a['часть речи'])

except FileNotFoundError:
    print("The file does not exist")
Beispiel #19
0
from nltk.tree import ParentedTree
ptree = ParentedTree.fromstring('(ROOT (S (NP (JJ Congressional) \
    (NNS representatives)) (VP (VBP are) (VP (VBN motivated) \
    (PP (IN by) (NP (NP (ADJ shiny) (NNS money))))))) (. .))')
from nltk import tree, treetransforms
from copy import deepcopy
from nltk.draw.tree import draw_trees


def traverse(t):
    try:
        t.label()
    except AttributeError:
        return
    else:

        if t.height() == 2:  #child nodes
            print(t)
            return

        for child in t:
            traverse(child)


traverse(ptree)
draw_trees(ptree)
Beispiel #20
0
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
""")

sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
parser = nltk.ChartParser(groucho_grammar)
for tree1 in parser.parse(sent):
    print(tree1)

grammar1 = nltk.CFG.fromstring("""
S -> NP VP
VP -> V NP | V NP PP
PP -> P NP
V -> "saw" | "ate" | "walked"
NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
Det -> "a" | "an" | "the" | "my"
N -> "man" | "dog" | "cat" | "telescope" | "park"
P -> "in" | "on" | "by" | "with"
""")

sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)
for tree2 in rd_parser.parse(sent):
    print(tree2)

from nltk.draw.tree import draw_trees
draw_trees(tree1, tree2)     
Beispiel #21
0
@author: Prince
"""

import nltk
from nltk.draw.tree import draw_trees
from nltk.parse import pchart
grammer = nltk.PCFG.fromstring("""
                               S -> NP VP [0.8] | Aux NP VP [0.1] | VP [0.1]
                               NP -> Det Nominal [0.6] | Pronoun [0.2] | Proper-Noun [0.2]
                               Nominal -> Noun [0.3] | Nominal Noun [0.2] | Nomial PP [0.5]
                               VP -> Verb [0.3] | Verb NP [0.2] | VP PP [0.5]
                               PP -> Prep NP [1.0]
                               Det -> 'the' [0.6] | 'a' [0.2] | 'that' [0.1] | 'is' [0.1]
                               Verb -> 'book' [0.5] | 'include' [0.2] | 'prefer' [0.3]
                               Noun -> 'book' [0.1] | 'flight' [0.5] | 'meal' [0.2] | 'money' [0.2]
                               Proper-Noun -> 'Houston' [0.8] | 'NWA' [0.2]
                               Prep -> 'from' [0.25] |'to' [0.25] | 'near' [0.1] | 'through' [0.2] | 'on' [0.2]
                               """)

print(grammer)
parser = pchart.InsideChartParser(grammer)

sent = "book the flight through Houston"

trees = parser.parse(sent.split())

for t in trees:
    print(t)
    draw_trees(t)

    
Beispiel #22
0
    def fetch_keyphrases(self, text_, display_tree=False):
        '''
            Uses Berkley Neural Parser to extract noun, prepositional, verb and other phrases with
            max_len = max number of words in phrase. The remaining single words are combined into phrases using an extended set
            of part-of-speech patterns.
            If display_tree = True, the parsed constituency tree is visualized
            Arguments:
                text_      - text as one string to have keywords extracted from
            Returns:
                keyphrases - list of all keyphrases as json objects
        '''
        if not isinstance(text_, str):
            print(f"Warning: expected type str, got {type(text_)} as input")
            return []
        elif not text_:
            print('Warning: got empty string as input')
            return []

        punct = set([
            '`',
            "'",
            '"',
            '#',
            '-',
            '_',
            ',',
            '.',
            ';',
            ':',
            '?',
            '!',
            '+',
            '[',
            ']',
            '(',
            ')',
            '$',
            '%',
            '&',
            '@',
            '\\',
            '/',
            '<',
            '>',
            '^',
            '|',
            '~',
        ])
        res = []
        words = []
        seen_tokens = set()
        max_len = 5
        self.doc = self.nlp(text_)

        ##### CONSTITUENCY PARSING (Berkeley Neural Parser) #####

        for sent in list(self.doc.sents):

            # display the constituency tree
            if display_tree:
                tree = Tree.fromstring(sent._.parse_string)
                if is_notebook():
                    display(tree)
                else:
                    draw_trees(tree)

            for c in list(sent._.constituents):
                # skip whole sentences and long spans
                if 'S' in c._.labels or\
                   len(c.text.split()) > max_len or\
                   c.text.strip().lower() in punct:
                    continue

                children_labels = list(
                    sum([i._.labels for i in list(c._.children)], ()))

                # split NPs w/2+ NPs, PPs w/2+ PPs, VPs w/2+ NPs
                if ('NP' in c._.labels and children_labels.count('NP') > 1) or\
                   ('NP' in c._.labels and children_labels.count('PP') > 1) or\
                   ('PP' in c._.labels and children_labels.count('NP') > 1) or\
                   ('PP' in c._.labels and children_labels.count('PP') > 1) or\
                   ('VP' in c._.labels and children_labels.count('NP') > 1) or\
                   ('VP' in c._.labels and children_labels.count('PP') > 1):
                    continue

                # split VPs w/NP inside if phrase is too long
                if ('VP' in c._.labels and 'NP' in children_labels):
                    #and len(c.text.split()) >= 5:
                    continue

                if c.start not in seen_tokens and c.end - 1 not in seen_tokens:
                    if len(c.text.split()) > 1:
                        res.append(c)
                        seen_tokens.update(range(c.start, c.end))
                        #print(f'Appending constituent {c.text}')
                    else:
                        words.append(c)
                        #print(f'Appending word {c.text}')

        ##### FIT IN POS PATTERNS #####

        # get additional spans based on POS patters
        matcher = self.build_matcher()
        matches = matcher(self.doc)
        spans = [self.doc[start:end] for _, start, end in matches]
        spans = [span for span in spans if len(span.text.split()) > 1]
        #print('Spacy spans:', spans)

        # sort additional spans, fit in
        get_sort_key = lambda span: (span.end - span.start, -span.start)
        sorted_spans = sorted(spans, key=get_sort_key, reverse=True)

        for span in sorted_spans:
            if span.start not in seen_tokens and span.end - 1 not in seen_tokens:

                # additional precaution since constituents may be shorter
                mid_point1 = span.start + int((span.end - span.start) * 0.4)
                mid_point2 = span.start + int((span.end - span.start) / 0.8)
                if mid_point1 not in seen_tokens and mid_point2 not in seen_tokens:
                    res.append(span)
                    seen_tokens.update(range(span.start, span.end))

        ##### FIT IN WORDS #####
        for word in words:
            if word.start not in seen_tokens and word.end - 1 not in seen_tokens:
                res.append(word)
                seen_tokens.update(range(word.start, word.end))

        res = sorted(res, key=lambda x: x.start)

        # if only 1 span and it's the whole sentence (usually a very short one)
        #if len(res) == 1 and res[0].text == text_:
        #    return [ {'text':  token.text,
        #              'start': token.idx,
        #              'end':   token.token.idx + len(token.text), } for token in self.doc ]

        return [{
            'text': span.text,
            'start': span.start_char,
            'end': span.end_char,
        } for span in res]
Beispiel #23
0
import nltk
from nltk.draw.tree import draw_trees
my_grammar = nltk.CFG.fromstring("""
s -> ss | sgroup
sgroup -> ss sgm
sgm -> ss | sgroup
ss -> vp_sg np_gen_pl | vp | negpron v_pl | negpron vp_pl | negpron v_sg | negpron vp_sg | name vp_sg | v_gov_pl n_pl | v_gov_sg n_sg | indefpron vp_sg | demonstrpron vp_gov_sg | inf_clause | conj ss | np_nom_pl negvptrans | np_nom_sg negvptrans | vp_gov_sg n_sg | vp_gov_sg np_nom_sg | vp_gov_pl n_pl | vp_gov_pl np_nom_pl | demonstrpron inftrans_phrase | attrpron vp_pl | np_nom_pl vpcopula_gov | vptrans_pl | vp_pl | obj vptrans_pl | v_sg pp_sg | pp_gen_sg vp_pl | fio vp_gov_sg | fio vp_gov_pl | fio v_gov_sg | fio v_gov_pl | fio vptrans_gov_sg | fio vptrans_gov_pl | vp_pl inftrans_phrase | vp_sg inftrans_phrase | tdeter ss | ss rdeter | np_nom_sg  vp_gov_sg | np_nom_pl  vp_gov_pl | n_sg vp_gov_sg | n_pl vp_gov_pl  | pron_sg vp_gov_sg  | pron_pl vp_gov_pl | np_gen_sg vp_gov_sg | np_gen_sg vp_gov_pl | np_gen_pl vp_gov_sg | np_gen_pl vp_gov_pl  | np_nom_pl  | v_gov_pl np_nom_pl | np_nom_sg v_gov_sg  | np_nom_sg v_gov_pl | v_gov_sg np_nom_sg  | np_nom_pl v_gov_pl | pron_sg vptrans_gov_sg | pron_pl vptrans_gov_pl | np_nom_sg vptrans_gov_sg  | np_nom_pl vptrans_gov_pl | vptrans_gov_sg np_nom_sg | vtrans_gov_sg np_nom_sg | vptrans_gov_pl np_nom_pl | vtrans_gov_pl np_nom_pl  | n_sg vptrans_gov_sg | n_pl vptrans_gov_pl | n_sg v_gov_sg | n_pl v_gov_pl | np_nom_sg vpcopula_gov | np_nom_pl vpcopula_gov | pron_sg vpcopula_gov | pron_pl vpcopula_gov | np_nom_sg sparoradj_gov | np_nom_pl sparoradj_gov
tdeter -> pp_num | n_instr_sg emphparticle | n_instr_sg num | adj_pl n_sg | pp_gen_sg | pp_gen_pl | n_instr_pl cnp_acc_pl | n_instr_pl cnp_acc_sg | n_instr_sg cnp_acc_pl | n_instr_sg cnp_acc_sg | pronadv ss | conj pp_sg |  n_instr cnp_acc_sg | n_instr_pl fio_gov | n_instr_sg fio_gov | deyerpob | demonstrpron adjpron | n_instr_sg particle | n_instr_pl particle | adv_gov | n_instr_pl pp_gov_pl | n_instr_sg pp_gov_sg | n_instr_sg np_gen_gov_sg | n_instr_pl np_gen_gov_pl | n_instr_sg np_gen_gov_pl | n_instr_pl np_gen_gov_sg | n_instr_sg fio_gov | n_instr_pl fio_gov | np_instr_sg np_gen_gov_sg | np_instr_pl np_gen_gov_pl | np_instr_pl np_gen_gov_pl | np_instr_pl np_gen_gov_sg | n_instr_sg deyeprob | n_instr_pl deyeprob | n_instr_sg | n_instr_pl | pp_sg | pp_pl | np_instr_sg advp | np_instr_pl advp | n_instr_sg cnp | n_instr_pl cnp
rdeter -> n_instr_sg num | adj_pl n_sg | pp_gen_sg | pp_gen_pl | n_instr_pl cnp_acc_pl | n_instr_pl cnp_acc_sg | n_instr_sg cnp_acc_pl | n_instr_sg cnp_acc_sg | n_instr_pl fio_gov | n_instr_sg fio_gov | deyerpob | demonstrpron adjpron | adv | n_instr_pl pp | n_instr_sg pp | n_instr_sg np_gen_gov_sg | n_instr_pl np_gen_gov_pl | n_instr_sg np_gen_gov_pl | n_instr_pl np_gen_gov_sg | n_instr_sg fio_gov | n_instr_pl fio_gov | np_instr_sg np_gen_gov_sg | np_instr_pl np_gen_gov_pl | np_instr_pl np_gen_gov_pl | np_instr_pl np_gen_gov_sg | n_instr_sg deyeprob | n_instr_pl deyeprob | p indirectpron | n_instr_sg | n_instr_pl | pp | adv ss | np_instr_sg advp | np_instr_pl advp | n_instr_sg cnp | n_instr_pl cnp | pp_sg | pp_pl
circ -> n_instr_sg deyeprob | n_instr_pl deyeprob | p np_loc_gov_sg | p np_loc_gov_pl | p indirectpron | p n_loc_gov_sg | p n_loc_gov_pl | n_instr_sg | n_instr_pl | np_instr_sg pp | np_instr_pl pp | pp | pp_gen_sg | pp_gen_pl | n_instr_sg np_gen_gov_pl | n_instr_pl np_gen_gov_sg | n_instr_sg np_gen_gov_sg | n_instr_pl np_gen_gov_pl | np_instr_sg np_gen_gov_sg | np_instr_pl np_gen_gov_sg | np_instr_sg np_gen_gov_pl | np_instr_pl np_gen_gov_pl | pp_sg cdeyeprob
cdeyeprob -> conj deyeprob
obj -> indirectpron defpron
np_nom_sg  -> pp_gov_sg indefpron | n_sg emphparticle | par n_sg | np_nom_sg fio_gov | parp n_sg | pronparticle n_sg | n_sg fio_gov | n_sg pp_gov_sg | n_sg pp_gov_pl | np_nom_sg  pp_gen_gov | n_sg np_nom_sg | np_nom_sg  np_nom_sg  | attr np_nom_sg  | par np_nom_sg | attr n_sg | n_sg parp | pron_sg pp_gov_sg | pron_sg pp_gov_pl | adj_sg np_nom_sg  | demonstrpron_gov np_nom_sg  | adj_sg n_sg | n_sg adj_sg | n_sg attributiveclause | n_sg np_gen_gov_sg | n_sg n_gen_gov_sg | n_sg np_gen_gov_pl | n_sg n_gen_gov_pl | adjp_sg n_sg | np_nom_sg np_gen_gov_sg | n_sg n_gen_gov_sg | n_sg n_gen_gov_pl | demonstrpron_gov n_sg | pron_sg pp_gov_sg | pospron np_nom_sg | pospron n_sg | n_sg pospron
np_nom_pl  -> pron_pl pp_sg | par n_pl |parp n_pl | pronparticle n_pl | n_pl fio_gov | n_pl pp_gov_sg | n_pl pp_gov_pl | np_nom_pl  pp_gen_gov_sg |  n_pl np_nom_pl | np_nom_pl  np_nom_pl  | attr np_nom_pl  | par np_nom_pl | attr n_pl | n_pl parp | pron_pl pp_gov_pl | pron_pl pp_gov_sg | adj_pl np_nom_pl  | demonstrpron_gov np_nom_pl  | adj_pl n_pl | n_pl adj_pl | n_pl attributiveclause | n_pl np_gen_gov_sg | n_pl n_gen_gov_sg | n_pl np_gen_gov_pl | n_pl n_gen_gov_pl | adjp_pl n_pl | np_nom_pl np_gen_gov_pl | n_pl n_gen_gov_pl | n_pl n_gen_gov_sg | demonstrpron_gov n_pl | pron_pl pp_gov_pl | pospron n_pl | n_pl pospron
np_gen_sg -> np_gen_sg cnp_gen_sg | n_gen_gov_sg cnp_gen_sg | demonstrpron n_gen_sg | pospron n_gen_sg | demonstrpron np_gen_sg | n_gen_sg cnp_gen_pl | n_gen_sg pp_gen_sg | n_gen_sg parp | np_gen_sg fio | n_gen_sg fio | pronparticle n_gen_sg | num n_gen_sg | adj_sg n_gen_sg | adj_sg np_gen_sg | n_gen_sg adv | n_gen_sg n_gen_gov_pl | n_gen_sg n_gen_gov_sg | n_gen_sg np_gen_gov_pl | n_gen_sg np_gen_gov_sg | n_gen_sg attr | particle np_gen_sg | n_gen_sg pp_gov_sg | n_gen_sg pp_gov_pl | n_gen_sg np_gen_gov_sg | np_gen_sg np_gen_gov_sg | n_gen_sg cpp | pp_gen n_gen_sg | n_gen_sg attributiveclause_gen | n_gen_sg cnp | np_gen_sg pp_gov_sg | np_gen_sg pp_gov_pl
np_gen_pl -> adv n_gen_pl | attrpron n_gen_pl | demonstrpron n_gen_pl | num np_gen_pl | par n_gen_pl | n_gen_pl parp | np_gen_pl fio | n_gen_pl fio | pronparticle n_gen_pl | num n_gen_pl | adj_pl n_gen_pl | adj_pl np_gen_pl | n_gen_pl adv | n_gen_pl n_gen_gov_pl | n_gen_pl n_gen_gov_sg | n_gen_pl np_gen_gov_sg | n_gen_pl np_gen_gov_pl | n_gen_pl attr | particle np_gen_pl | n_gen_pl pp_gov_sg | n_gen_pl pp_gov_pl | n_gen_pl np_gen_gov_pl | np_gen_pl np_gen_gov_pl | n_gen_pl cpp | pp_gen n_gen_pl | n_gen_pl attributiveclause_gen | n_gen_pl cnp | np_gen_pl pp_gov_pl | np_gen_pl pp_gov_sg
np_instr_sg -> par n_instr_sg | n_instr_sg cnp_instr_pl | n_instr_sg pp_gen_gov_sg | n_instr_sg ell_n_gen_gov | np_instr_sg cnp_instr_gov_sg | n_instr_sg fio_gov | adj_gov_sg n_instr_sg | adj_gov_sg np_instr_sg | n_instr_sg adj | n_instr_sg pp_gov_sg | n_instr_sg circ | num n_gen | np_instr cpp | num n_instr_sg | pospron n_instr_sg | n_instr_sg np_gen_gov_sg | n_instr_sg np_gen_gov_pl
np_instr_pl -> n_instr_pl cnp_instr_pl | np_instr_pl cnp_instr_pl | n_instr_pl np_gen_sg | n_instr_pl n_gen_sg | n_instr_pl fio_gov | adj_gov_pl n_instr_pl | n_instr_pl adj | n_instr_pl pp_gov_pl | n_instr_pl circ | num n_gen | np_instr cpp | num n_instr_pl | pospron n_instr_pl | n_instr_pl np_gen_gov_sg | n_instr_pl np_gen_gov_pl
np_dat_sg -> n_dat_sg cnp_dat_sg | par n_dat_sg |n_dat_sg conn_clause | n_dat_sg surname | np_dat_sg cnp_dat_sg | n_dat_sg adj_sg | np_dat_sg cnp_dat_sg | np_dat_sg cnp_dat_pl | adj_sg n_dat_sg | adj_sg np_dat_sg | n_dat_sg n_gen_gov_sg | n_dat_sg n_gen_gov_pl | pospron n_dat_sg
np_dat_pl -> n_dat_pl cnp_dat_pl | par n_dat_pl | n_dat_pl conn_clause | np_dat_pl cnp_dat_pl | n_dat_pl adj_pl | np_dat_pl cnp_dat_sg | np_dat_pl cnp_dat_pl | adj_pl n_dat_pl | adj_pl np_dat_pl | n_dat_pl n_gen_gov_sg | n_dat_pl n_gen_gov_pl | pospron n_dat_pl | n_dat_pl adjp_gov_sg
np_acc_sg -> n_acc_sg compsent | demonstrpron n_acc_sg | n_acc_sg emphpp | np_acc_sg pp_gen_gov_sg | indefpron n_acc_sg | n_acc_sg np_acc_gov_sg | pospron np_acc_sg | np_acc_sg cnp_acc_sg | n_acc_sg adj_gov_sg | n_acc_sg attr | particle np_scc_sg | adjpron np_acc_sg | num n_gen_sg | attrpron n_acc_sg | n_acc_sg n_gen_gov_sg | n_acc_sg n_gen_gov_pl | n_acc_sg np_gen_gov_sg | n_acc_sg np_gen_gov_sg  | attr n_acc_sg | attr np_acc_sg | n_acc_sg  np_gen_gov_sg | n_acc_sg  np_gen_gov_pl | np_acc_sg pp_gov_sg | np_acc_sg pp_gov_pl | particle np_acc_sg | n_acc_sg pp_gov_sg | n_acc_sg pp_gov_pl | n_acc_sg pp_gen_gov_pl | n_acc_sg pp_gen_gov_sg | n_acc_sg n_gen_gov_sg | n_acc_sg n_gen_gov_pl | adjp_gov_sg n_acc_sg | adjp_gov_pl n_acc_sg | adj_gov_sg np_acc_sg | adj_gov_sg n_acc_sg | n_acc_sg np_gen_gov_sg | n_acc_sg np_gen_gov_pl | pospron np_acc_sg | pospron n_acc_sg | n_acc_sg cnp | np_acc_sg cnp | indirectpron np_acc_sg | n_acc_sg compsent | n_acc_sg attributiveclause
np_acc_pl -> n_acc_pl np_instr_gov_sg | particle np_scc_pl | num n_gen_pl | n_acc_pl n_gen_gov_pl | attr n_acc_pl | attr np_acc_pl  | n_acc_pl  np_gen_gov_sg | n_acc_pl np_gen_gov_pl | np_acc_pl pp_gov_sg | np_acc_pl  pp_gov_pl | particle np_acc_pl | n_acc_pl  pp_gov_sg | n_acc_pl  pp_gov_pl | n_acc_pl  pp_gov_pl| n_acc_pl  pp_gen_gov_pl | n_acc_pl  n_gen_gov_sg | n_acc_pl  n_gen_gov_pl | adjp n_acc_pl  | adj_gov_pl np_acc_pl  | adj_gov_pl n_acc_pl  | n_acc_pl  np_gen_gov_sg | n_acc_pl  np_gen_gov_pl | pospron np_acc_pl  | pospron n_acc_pl  | n_acc_pl  cnp | np_acc_pl  cnp | indirectpron np_acc_pl  | n_acc_pl  compsent | np_acc_pl  attributiveclause | n_acc_pl  attributiveclause
demonstr_np_acc -> demonstrpron_gov np_acc_pl | demonstrpron_gov np_acc_sg
demonstr_np_loc -> demonstrpron n_loc_sg
np_loc_sg -> num n_loc_sg | n_loc_sg num | n_loc_sg corpname | demonstrpron n_loc_sg | num np_loc_sg | attrpron n_loc_sg | attrpron np_loc_sg | n_loc_sg inftrans_phrase_gov | n_loc_sg cnp_loc_sg | n_loc_sg cnp_loc_pl | n_loc_sg np_gen_gov_sg | n_loc_sg np_gen_gov_pl |  n_loc_sg np_gen_gov_sg | n_loc_sg n_gen_gov_sg | np_loc_sg cnp | pospron n_loc_sg | pospron np_loc_sg | adj_gov_sg np_loc_attr_sg | adj_gov_pl np_loc_attr_pl | adj_gov_sg np_loc_sg | adj_gov_pl np_loc_pl |  n_loc_sg np_gen_gov_sg |  n_loc_sg n_gen_gov_pl  | n_loc_sg np_gen_gov_pl  | adj_gov_sg n_loc_sg
np_loc_pl -> attrpron np_loc_pl | demonstrpron n_loc_pl | attrpron n_loc_pl | n_loc_pl np_gen_gov_sg | n_loc_pl np_gen_gov_pl |  np_loc_pl cnp | pospron n_loc_pl |  adj_gov_pl np_loc_attr_pl |  n_loc_pl np_gen_gov_sg | n_loc_pl np_gen_gov_pl | adj_gov_pl n_loc_pl
np_loc_attr_sg -> np_loc_sg attributiveclause
np_loc_attr_pl -> np_loc_pl attributiveclause
fio -> name surname_gov | name patro_gov | surname | namepatro surname_gov | surname_gov name
pp_fio -> p fio_gov
Beispiel #24
0
    def draw(self):
        t = self.sem_labeled()
        from nltk.draw.tree import draw_trees

        draw_trees(t)
Beispiel #25
0
def demo(choice=None, draw_parses=None, print_parses=None):
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import pchart

    # Define two demos.  Each demo has a sentence and a grammar.
    toy_pcfg1 = PCFG.fromstring("""
    S -> NP VP [1.0]
    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
    Det -> 'the' [0.8] | 'my' [0.2]
    N -> 'man' [0.5] | 'telescope' [0.5]
    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
    V -> 'ate' [0.35] | 'saw' [0.65]
    PP -> P NP [1.0]
    P -> 'with' [0.61] | 'under' [0.39]
    """)

    toy_pcfg2 = PCFG.fromstring("""
    S    -> NP VP         [1.0]
    VP   -> V NP          [.59]
    VP   -> V             [.40]
    VP   -> VP PP         [.01]
    NP   -> Det N         [.41]
    NP   -> Name          [.28]
    NP   -> NP PP         [.31]
    PP   -> P NP          [1.0]
    V    -> 'saw'         [.21]
    V    -> 'ate'         [.51]
    V    -> 'ran'         [.28]
    N    -> 'boy'         [.11]
    N    -> 'cookie'      [.12]
    N    -> 'table'       [.13]
    N    -> 'telescope'   [.14]
    N    -> 'hill'        [.5]
    Name -> 'Jack'        [.52]
    Name -> 'Bob'         [.48]
    P    -> 'with'        [.61]
    P    -> 'under'       [.39]
    Det  -> 'the'         [.41]
    Det  -> 'a'           [.31]
    Det  -> 'my'          [.28]
    """)

    demos = [('I saw John with my telescope', toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope',
              toy_pcfg2)]

    if choice is None:
        # Ask the user which demo they want to use.
        print()
        for i in range(len(demos)):
            print('%3s: %s' % (i + 1, demos[i][0]))
            print('     %r' % demos[i][1])
            print()
        print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
        choice = int(sys.stdin.readline().strip()) - 1
    try:
        sent, grammar = demos[choice]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()

    # Define a list of parsers.  We'll use all parsers.
    parsers = [
        pchart.InsideChartParser(grammar),
        pchart.RandomChartParser(grammar),
        pchart.UnsortedChartParser(grammar),
        pchart.LongestChartParser(grammar),
        pchart.InsideChartParser(grammar,
                                 beam_size=len(tokens) + 1)  # was BeamParser
    ]

    # Run the parsers on the tokenized sentence.
    times = []
    average_p = []
    num_parses = []
    all_parses = {}
    for parser in parsers:
        print('\ns: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar))
        parser.trace(3)
        t = time.time()
        parses = list(parser.parse(tokens))
        times.append(time.time() - t)
        p = (reduce(lambda a, b: a + b.prob(), parses, 0) /
             len(parses) if parses else 0)
        average_p.append(p)
        num_parses.append(len(parses))
        for p in parses:
            all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print(
        '       Parser      Beam | Time (secs)   # Parses   Average P(parse)')
    print(
        '------------------------+------------------------------------------')
    for i in range(len(parsers)):
        print('%18s %4d |%11.4f%11d%19.14f' %
              (parsers[i].__class__.__name__, parsers[i].beam_size, times[i],
               num_parses[i], average_p[i]))
    parses = all_parses.keys()
    if parses: p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else: p = 0
    print(
        '------------------------+------------------------------------------')
    print('%18s      |%11s%11d%19.14f' %
          ('(All Parses)', 'n/a', len(parses), p))

    if draw_parses is None:
        # Ask the user if we should draw the parses.
        print()
        print('Draw parses (y/n)? ', end=' ')
        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
    if draw_parses:
        from nltk.draw.tree import draw_trees
        print('  please wait...')
        draw_trees(*parses)

    if print_parses is None:
        # Ask the user if we should print the parses.
        print()
        print('Print parses (y/n)? ', end=' ')
        print_parses = sys.stdin.readline().strip().lower().startswith('y')
    if print_parses:
        for parse in parses:
            print(parse)
import nltk
from nltk import CFG
from nltk.draw.tree import draw_trees
import sys

decaf_func_grammar = """
    F -> 'func' ID '(' PARAMS ')' TYPE '{' BODY '}'
    ID -> 'a' | 'b' | 'c' | 'd'
    PARAMS -> HAS_PARAMS | 
    HAS_PARAMS -> ID TYPE ',' HAS_PARAMS | ID TYPE
    TYPE -> 'int' | 'char'
    BODY -> 'return' '(' ID ')' ';'
"""
grammar = CFG.fromstring(decaf_func_grammar)
text = "func a ( b int , c int , d int ) int { return ( c ) ; }".split()
parser = nltk.ChartParser(grammar)
trees = parser.parse(text)
for tree in trees:
    print(tree)
trees = parser.parse(text)
draw_trees(*trees)
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('حرك الكرة الخضراء في أسفل الزاوية اليسرى', learned_pcfg),
             ('حرك الكرة', learned_pcfg),
             ('take the green pyramid and put it in the top left corner', learned_pcfg),
              ('move the pink triangle on top of the black square', learned_pcfg),
              ('move the red block and place it on top of the blue block that is on top of a green block', learned_pcfg),
              ('move the green block on top of the blue block', learned_pcfg)]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print('%3s: %s' % (i+1, demos[i][0]))
        # print('     %r' % demos[i][1])
        print()
    print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    try:
        snum = int(sys.stdin.readline().strip())-1
        sent, grammar = demos[snum]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()
    print(grammar)

    parser = ViterbiParser(grammar)
    all_parses = {}

    # print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar))
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time()-t
    average = (reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
               if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print('Time (secs)   # Parses   Average P(parse)')
    print('-----------------------------------------')
    print('%11.4f%11d%19.14f' % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print('------------------------------------------')
    print('%11s%11d%19.14f' % ('n/a', len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print('Draw parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees
        print('  please wait...')
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print('Print parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print(parse)
Beispiel #28
0
    return Tree(t.node, [excise_empty_nodes(st) for st in t])

# nb: returns tree with blank nodes excised
def parse_ccgbank_tree(s):
    t = Tree.parse(s, 
                   parse_node=parse_ccgbank_node, 
                   parse_leaf=parse_ccgbank_leaf, 
                   node_pattern=ccgbank_node_pattern, 
                   leaf_pattern=ccgbank_leaf_pattern)
    return excise_empty_nodes(t)

print
print 'parsing: ' + deriv
t = parse_ccgbank_tree(deriv)
print t

t2 = None
if deriv2 != None:
    print
    print 'parsing: ' + deriv2
    t2 = parse_ccgbank_tree(deriv2)
    print t2

print
if t2 == None:
    print 'drawing tree'
    draw_trees(t)
else:
    print 'drawing trees'
    draw_trees(t,t2)
Beispiel #29
0
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys
    import time

    from nltk import tokenize
    from nltk.grammar import PCFG
    from nltk.parse import ViterbiParser

    toy_pcfg1 = PCFG.fromstring("""
    S -> NP VP [1.0]
    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
    Det -> 'the' [0.8] | 'my' [0.2]
    N -> 'man' [0.5] | 'telescope' [0.5]
    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
    V -> 'ate' [0.35] | 'saw' [0.65]
    PP -> P NP [1.0]
    P -> 'with' [0.61] | 'under' [0.39]
    """)

    toy_pcfg2 = PCFG.fromstring("""
    S    -> NP VP         [1.0]
    VP   -> V NP          [.59]
    VP   -> V             [.40]
    VP   -> VP PP         [.01]
    NP   -> Det N         [.41]
    NP   -> Name          [.28]
    NP   -> NP PP         [.31]
    PP   -> P NP          [1.0]
    V    -> 'saw'         [.21]
    V    -> 'ate'         [.51]
    V    -> 'ran'         [.28]
    N    -> 'boy'         [.11]
    N    -> 'cookie'      [.12]
    N    -> 'table'       [.13]
    N    -> 'telescope'   [.14]
    N    -> 'hill'        [.5]
    Name -> 'Jack'        [.52]
    Name -> 'Bob'         [.48]
    P    -> 'with'        [.61]
    P    -> 'under'       [.39]
    Det  -> 'the'         [.41]
    Det  -> 'a'           [.31]
    Det  -> 'my'          [.28]
    """)

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ("I saw the man with my telescope", toy_pcfg1),
        ("the boy saw Jack with Bob under the table with a telescope",
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print(f"{i + 1:>3}: {demos[i][0]}")
        print("     %r" % demos[i][1])
        print()
    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print("Bad sentence number")
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print(f"\nsent: {sent}\nparser: {parser}\ngrammar: {grammar}")
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print("Time (secs)   # Parses   Average P(parse)")
    print("-----------------------------------------")
    print("%11.4f%11d%19.14f" % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print("------------------------------------------")
    print("%11s%11d%19.14f" % ("n/a", len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print("Draw parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        from nltk.draw.tree import draw_trees

        print("  please wait...")
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print("Print parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        for parse in parses:
            print(parse)
Beispiel #30
0
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize
    from nltk.parse import ViterbiParser
    from nltk.grammar import toy_pcfg1, toy_pcfg2

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ('I saw the man with my telescope', toy_pcfg1),
        ('the boy saw Jack with Bob under the table with a telescope',
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print('%3s: %s' % (i + 1, demos[i][0]))
        print('     %r' % demos[i][1])
        print()
    print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ')
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print('Bad sentence number')
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print('\nsent: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar))
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print('Time (secs)   # Parses   Average P(parse)')
    print('-----------------------------------------')
    print('%11.4f%11d%19.14f' % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print('------------------------------------------')
    print('%11s%11d%19.14f' % ('n/a', len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print('Draw parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        from nltk.draw.tree import draw_trees

        print('  please wait...')
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print('Print parses (y/n)? ', end=' ')
    if sys.stdin.readline().strip().lower().startswith('y'):
        for parse in parses:
            print(parse)
Beispiel #31
0
def demo():
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys
    import time

    from nltk import tokenize
    from nltk.grammar import toy_pcfg1, toy_pcfg2
    from nltk.parse import ViterbiParser

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [
        ("I saw the man with my telescope", toy_pcfg1),
        ("the boy saw Jack with Bob under the table with a telescope",
         toy_pcfg2),
    ]

    # Ask the user which demo they want to use.
    print()
    for i in range(len(demos)):
        print(f"{i + 1:>3}: {demos[i][0]}")
        print("     %r" % demos[i][1])
        print()
    print("Which demo (%d-%d)? " % (1, len(demos)), end=" ")
    try:
        snum = int(sys.stdin.readline().strip()) - 1
        sent, grammar = demos[snum]
    except:
        print("Bad sentence number")
        return

    # Tokenize the sentence.
    tokens = sent.split()

    parser = ViterbiParser(grammar)
    all_parses = {}

    print(f"\nsent: {sent}\nparser: {parser}\ngrammar: {grammar}")
    parser.trace(3)
    t = time.time()
    parses = parser.parse_all(tokens)
    time = time.time() - t
    average = (reduce(lambda a, b: a + b.prob(), parses, 0) /
               len(parses) if parses else 0)
    num_parses = len(parses)
    for p in parses:
        all_parses[p.freeze()] = 1

    # Print some summary statistics
    print()
    print("Time (secs)   # Parses   Average P(parse)")
    print("-----------------------------------------")
    print("%11.4f%11d%19.14f" % (time, num_parses, average))
    parses = all_parses.keys()
    if parses:
        p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses)
    else:
        p = 0
    print("------------------------------------------")
    print("%11s%11d%19.14f" % ("n/a", len(parses), p))

    # Ask the user if we should draw the parses.
    print()
    print("Draw parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        from nltk.draw.tree import draw_trees

        print("  please wait...")
        draw_trees(*parses)

    # Ask the user if we should print the parses.
    print()
    print("Print parses (y/n)? ", end=" ")
    if sys.stdin.readline().strip().lower().startswith("y"):
        for parse in parses:
            print(parse)
Beispiel #32
0
from nltk import word_tokenize
from nltk import CFG
from nltk import ChartParser
from nltk.draw.tree import draw_trees

def read_grammar(grammarfile):
	gf = open(grammarfile)
	return CFG.fromstring(gf.read())

def print_trees(trees):
	for t in trees:
		print(t)

def parse_sentences(grammar, sent):
	parser = ChartParser(grammar)
	tokens = word_tokenize(sent)
	trees = parser.parse(tokens)
	return trees

if __name__ == '__main__':
	sen = 'He worked for the BBC for a decade'
	g = read_grammar('grammar.txt')
	trees = parse_sentences(g,sen)
	for tree in trees:
		draw_trees(tree)
def parseAllTestXmls(fileList, grammar, allTestSolutionsDict, verbose=False, displayTrees=False):
	testPitchLists = []
	testIntervalLists = []
	totalCorrect = 0
	totalCorrectNonN = 0
	totalProductions = 0
	totalLeaves = 0
	parseTreeStrings = {}
	for filepath in fileList:
		curPitchList = getPitchListFromFile(filepath)
		testPitchLists.append(curPitchList)
		testIntervalLists.append(getIntervalStringsFromPitchList(curPitchList, verbose))
		if verbose:
			print(testIntervalLists[-1])
		listLen = len(testIntervalLists[-1])
		if verbose:
			print(tree)
		parser = ViterbiParser(grammar)
		if verbose:
			parser.trace(0)#3
		else:
			parser.trace(0)
		try:
			parses = parser.parse_all(testIntervalLists[-1])
		except Exception as errorMsg:
			print("error parsing file " + filepath)
			print(errorMsg)
		numTrees = sum(1 for _ in parses)
		if numTrees > 0 and displayTrees == True:
			from nltk.draw.tree import draw_trees
			draw_trees(*parses)
		if numTrees == 0:
			print("Couldn't find a valid parse, this is bad, very very bad")
			return 0,0
		numCorrect = 0
		numCorrectNonN = 0
		bottomCorrect = 0
		bottomCorrectNonN = 0
		solutionTree = None
		try:
			solutionTreeStr = allTestSolutionsDict[filepath]
			solutionTree = Tree.fromstring(solutionTreeStr)
		except Exception as errorMsg:
			print("couldn't find solution for file " + filepath)
			print(errorMsg)
		if solutionTree != None and solutionTree != '':
			parseTreeStrings[filepath] = str(parses[0])
			numCorrect, numCorrectNonN = validate_tree.compareTrees(solutionTree, parses[0])
			numProductions = len(solutionTree.productions())
			totalProductions += numProductions

			#solutionTree.draw()
			#parses[0].draw()
			bottomCorrect, bottomCorrectNonN = validate_tree.compareTreesBottomUp(solutionTree, parses[0])
			parseTreeStrings[filepath+'_afterComparison'] = str(parses[0])
			totalLeaves += len(solutionTree.leaves())
			#parses[0].draw()

		totalCorrect += bottomCorrect
		totalCorrect += numCorrect
		totalCorrectNonN += numCorrectNonN
		totalCorrectNonN += bottomCorrectNonN
	return totalCorrect, totalCorrectNonN, totalProductions, totalLeaves, parseTreeStrings
Beispiel #34
0
def demo(choice=None, draw_parses=None, print_parses=None):
    """
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    """
    import sys, time
    from nltk import tokenize, toy_pcfg1, toy_pcfg2
    from nltk.parse import pchart

    # Define two demos.  Each demo has a sentence and a grammar.
    demos = [('I saw John with my telescope', toy_pcfg1),
             ('the boy saw Jack with Bob under the table with a telescope',
              toy_pcfg2)]

    if choice is None:
        # Ask the user which demo they want to use.
        print
        for i in range(len(demos)):
            print '%3s: %s' % (i+1, demos[i][0])
            print '     %r' % demos[i][1]
            print
        print 'Which demo (%d-%d)? ' % (1, len(demos)),
        choice = int(sys.stdin.readline().strip())-1
    try:
        sent, grammar = demos[choice]
    except:
        print 'Bad sentence number'
        return

    # Tokenize the sentence.
    tokens = sent.split()

    # Define a list of parsers.  We'll use all parsers.
    parsers = [
        pchart.InsideChartParser(grammar),
        pchart.RandomChartParser(grammar),
        pchart.UnsortedChartParser(grammar),
        pchart.LongestChartParser(grammar),
        pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)   # was BeamParser
        ]

    # Run the parsers on the tokenized sentence.
    times = []
    average_p = []
    num_parses = []
    all_parses = {}
    for parser in parsers:
        print '\ns: %s\nparser: %s\ngrammar: %s' % (sent,parser,grammar)
        parser.trace(3)
        t = time.time()
        parses = parser.nbest_parse(tokens)
        times.append(time.time()-t)
        if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
        else: p = 0
        average_p.append(p)
        num_parses.append(len(parses))
        for p in parses: all_parses[p.freeze()] = 1

    # Print some summary statistics
    print
    print '       Parser      Beam | Time (secs)   # Parses   Average P(parse)'
    print '------------------------+------------------------------------------'
    for i in range(len(parsers)):
        print '%18s %4d |%11.4f%11d%19.14f' % (parsers[i].__class__.__name__,
                                             parsers[i].beam_size,
                                             times[i],num_parses[i],average_p[i])
    parses = all_parses.keys()
    if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses)
    else: p = 0
    print '------------------------+------------------------------------------'
    print '%18s      |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p)

    if draw_parses is None:
        # Ask the user if we should draw the parses.
        print
        print 'Draw parses (y/n)? ',
        draw_parses = sys.stdin.readline().strip().lower().startswith('y')
    if draw_parses:
        from nltk.draw.tree import draw_trees
        print '  please wait...'
        draw_trees(*parses)

    if print_parses is None:
        # Ask the user if we should print the parses.
        print
        print 'Print parses (y/n)? ',
        print_parses = sys.stdin.readline().strip().lower().startswith('y')
    if print_parses:
        for parse in parses:
            print parse
Beispiel #35
0
def make(op):
	if op == 0:
		for name in case.keys():
			if name != "Corpus":
				print name
				print len(inst[name].grammar.productions()), "producciones en grammar"
				print len(inst[name].parser.grammar().productions()), "producciones usadas en parser"
				print "---"
	elif op == 1:
		cant_oraciones = inst["Corpus"].cant_oraciones()
		print CR,"Cantidad de oraciones: "
		print cant_oraciones
	elif op == 2:
		oracion_mas_larga = inst["Corpus"].oracion_mas_larga()
		print CR,"Oracion mas larga:"
		print len(oracion_mas_larga.split(' ')), "palabras"
		print oracion_mas_larga
	elif op == 3:
		largo_promedio_oracion = inst["Corpus"].largo_promedio_oracion()
		print CR,"Largo promedio de oración:"
		print largo_promedio_oracion, "letras"
	elif op == 4:
		palabras_frecs = inst["Corpus"].palabras_frecs()
		print CR,"Palabras frecuentes:"
		print '\n\t'.join(map(str,sorted(palabras_frecs.items(),key=lambda x:x[1],reverse=True)[:20]))
	elif op == 5:
		palabras_frecs_cat = inst["Corpus"].palabras_frecs_cat()
		print CR,"Palabras frecuentes por categoria:"
		print '\n\t'.join(map(str,sorted(palabras_frecs_cat.items(),key=lambda x:len(x[1]),reverse=True)[:20]))
	elif op == 6:
		arbol_min_nodos = inst["Corpus"].arbol_min_nodos()
		print CR,"Arbol con minima cantidad de nodos:"
		print len(arbol_min_nodos.treepositions()), "nodos"
		draw_trees(arbol_min_nodos)
	elif op == 7:
		arbol_max_nodos = inst["Corpus"].arbol_max_nodos()
		print CR,"Arbol con maxima cantidad de nodos:"
		print len(arbol_max_nodos.treepositions()), "nodos"
		draw_trees(arbol_max_nodos) 
	elif op == 8:
		lema = raw_input('\r'+' '*20+'\rLema > ')
		if not lema: lema = "mostrar"
		print "Procesando..."
		arboles_con_lema = inst["Corpus"].arboles_con_lema(lema)
		if arboles_con_lema:
			print CR,"Arboles con lema \'",lema,"\'"
			print len(arboles_con_lema), "arboles"
			if raw_input("Dibujar? [s/n] ") == 's':draw_trees(*arboles_con_lema)
			print "** Ejemplo oracion **"
			print ' '.join(arboles_con_lema[randint(0,len(arboles_con_lema)-1)].leaves())
		else: print CR,"No hay arboles con lema \'",lema,"\'"
	elif op == 9:
		print CR,len(inst["PCFG"].reglas_no_lexicas()), "reglas no lexicas"
	elif op == 10:
		print CR,len(inst["PCFG"].categorias_lexicas()), "categorias lexicas"
	elif op == 11:
		cat = raw_input(CR+'Categoria > ')
		if not cat: cat = "vmip3s0"
		print CR,len(inst["PCFG"].reglas_lexicas(cat)), "reglas lexicas con categoria \'",cat,"\'"
	elif op == 12:
		sent = inst["PCFG"].sents[0]
		parsed = inst["PCFG"].parse(sent)
	elif op == 13:
		sent = inst["PCFG"].sents[1]
		parsed = inst["PCFG"].parse(sent)
	elif op == 14:
		sent = inst["PCFG"].sents[2]
		parsed = inst["PCFG"].parse(sent)
	elif op == 15:
		sent = inst["PCFG_UNK"].sents[0]
		parsed = inst["PCFG_UNK"].parse(sent)
	elif op == 16:
		sent = inst["PCFG_UNK"].sents[1]
		parsed = inst["PCFG_UNK"].parse(sent)
	elif op == 17:
		sent = inst["PCFG_LEX"].sents[0]
		parsed = inst["PCFG"].parse(sent)
	elif op == 18:
		sent = inst["PCFG_LEX"].sents[0]
		parsed = inst["PCFG_LEX"].parse(sent)
	elif op == 19:
		sent = inst["PCFG_LEX_VERB"].sents[0]
		parsed = inst["PCFG_LEX"].parse(sent)
	elif op == 20:
		sent = inst["PCFG_LEX_VERB"].sents[1]
		parsed = inst["PCFG_LEX"].parse(sent)
	elif op == 21:
		sent = inst["PCFG_LEX_VERB"].sents[0]
		parsed = inst["PCFG_LEX_VERB"].parse(sent)
	elif op == 22:
		sent = inst["PCFG_LEX_VERB"].sents[1]
		parsed = inst["PCFG_LEX_VERB"].parse(sent)
	elif op == 23:
		sent = inst["PCFG_LEX_VERB"].sents[2]
		parsed = inst["PCFG_LEX"].parse(sent)
	elif op == 24:
		sent = inst["PCFG_LEX_VERB"].sents[2]
		parsed = inst["PCFG_LEX_VERB"].parse(sent)
	else:
		print "Comando no valido!"
	if op in range(12,25):
		parsed = list(parsed)
		# parsed = [t for t in parsed]
		print CR,"Oracion:\n%s\n" % sent
		print "Cantidad de reconoceedores:\n%i\n" % len(parsed)
		for i,parse in enumerate(parsed):
			print "************* [%i] *************" % (i+1)
			print parse
			if raw_input("Dibujar? [s/n] ") == 's':draw_trees(parse)