def demo(): """ A demonstration showing how each tree transform can be used. """ from nodebox_linguistics_extended.parser.nltk_lite.draw.tree import draw_trees from nodebox_linguistics_extended.parser.nltk_lite.parse import bracket_parse from nodebox_linguistics_extended.parser.nltk_lite.parse import treetransforms from copy import deepcopy # original tree from WSJ bracketed text sentence = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))" tree = bracket_parse(sentence) # collapse subtrees with only one child collapsedTree = deepcopy(tree) treetransforms.collapseUnary(collapsedTree) # convert the tree to CNF cnfTree = deepcopy(collapsedTree) treetransforms.chomskyNormalForm(cnfTree) # convert the tree to CNF with parent annotation (one level) and horizontal smoothing of order two parentTree = deepcopy(collapsedTree) treetransforms.chomskyNormalForm(parentTree, horzMarkov=2, vertMarkov=1) # convert the tree back to its original form (used to make CYK results comparable) original = deepcopy(parentTree) treetransforms.unChomskyNormalForm(original) # convert tree back to bracketed text sentence2 = treetransforms.toTreebank(original) print "Sentences the same? ", sentence == sentence2 draw_trees(tree, collapsedTree, cnfTree, parentTree, original)
def demo(): """ A demonstration of the probabilistic parsers. The user is prompted to select which demo to run, and how many parses should be found; and then each parser is run on the same demo, and a summary of the results are displayed. """ import sys, time from nodebox_linguistics_extended.parser.nltk_lite import tokenize from nodebox_linguistics_extended.parser.nltk_lite.parse import cfg, pcfg, pchart # Define two demos. Each demo has a sentence and a grammar. demos = [('I saw John with my cookie', pcfg.toy1), ('the boy saw Jack with Bob under the table with a telescope', pcfg.toy2)] # Ask the user which demo they want to use. print() for i in range(len(demos)): print('%3s: %s' % (i+1, demos[i][0])) print(' %r' % demos[i][1]) print() print('Which demo (%d-%d)? ' % (1, len(demos)), end=' ') try: snum = int(sys.stdin.readline().strip())-1 sent, grammar = demos[snum] except: print('Bad sentence number') return # Tokenize the sentence. tokens = list(tokenize.whitespace(sent)) # Define a list of parsers. We'll use all parsers. parsers = [ pchart.InsideParse(grammar), pchart.RandomParse(grammar), pchart.UnsortedParse(grammar), pchart.LongestParse(grammar), pchart.BeamParse(len(tokens)+1, grammar) ] # Run the parsers on the tokenized sentence. times = [] average_p = [] num_parses = [] all_parses = {} for parser in parsers: print('\ns: %s\nparser: %s\ngrammar: %s' % (sent,parser,pcfg)) parser.trace(3) t = time.time() parses = parser.get_parse_list(tokens) times.append(time.time()-t) if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) else: p = 0 average_p.append(p) num_parses.append(len(parses)) for p in parses: all_parses[p.freeze()] = 1 # Print some summary statistics print() print(' Parser | Time (secs) # Parses Average P(parse)') print('-------------------+------------------------------------------') for i in range(len(parsers)): print('%18s |%11.4f%11d%19.14f' % (parsers[i].__class__.__name__, times[i],num_parses[i],average_p[i])) parses = list(all_parses.keys()) if parses: p = reduce(lambda a,b:a+b.prob(), parses, 0)/len(parses) else: p = 0 print('-------------------+------------------------------------------') print('%18s |%11s%11d%19.14f' % ('(All Parses)', 'n/a', len(parses), p)) # Ask the user if we should draw the parses. print() print('Draw parses (y/n)? ', end=' ') if sys.stdin.readline().strip().lower().startswith('y'): from nodebox_linguistics_extended.parser.nltk_lite.draw.tree import draw_trees print(' please wait...') draw_trees(*parses) # Ask the user if we should print the parses. print() print('Print parses (y/n)? ', end=' ') if sys.stdin.readline().strip().lower().startswith('y'): for parse in parses: print(parse)
def draw(self): """ Open a new window containing a graphical diagram of this tree. """ from nodebox_linguistics_extended.parser.nltk_lite.draw.tree import draw_trees draw_trees(self)
def demo(): """ A demonstration of the probabilistic parsers. The user is prompted to select which demo to run, and how many parses should be found; and then each parser is run on the same demo, and a summary of the results are displayed. """ import sys, time from nodebox_linguistics_extended.parser.nltk_lite import tokenize from nodebox_linguistics_extended.parser.nltk_lite.parse import cfg, pcfg, ViterbiParse # Define two demos. Each demo has a sentence and a grammar. demos = [('I saw John with my cookie', pcfg.toy1), ('the boy saw Jack with Bob under the table with a telescope', pcfg.toy2)] # Ask the user which demo they want to use. print for i in range(len(demos)): print '%3s: %s' % (i + 1, demos[i][0]) print ' %r' % demos[i][1] print print 'Which demo (%d-%d)? ' % (1, len(demos)), try: snum = int(sys.stdin.readline().strip()) - 1 sent, grammar = demos[snum] except: print 'Bad sentence number' return # Tokenize the sentence. tokens = list(tokenize.whitespace(sent)) parser = ViterbiParse(grammar) all_parses = {} print '\nsent: %s\nparser: %s\ngrammar: %s' % (sent, parser, grammar) parser.trace(3) t = time.time() parses = parser.get_parse_list(tokens) time = time.time() - t if parses: average = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) else: average = 0 num_parses = len(parses) for p in parses: all_parses[p.freeze()] = 1 # Print some summary statistics print print 'Time (secs) # Parses Average P(parse)' print '-----------------------------------------' print '%11.4f%11d%19.14f' % (time, num_parses, average) parses = all_parses.keys() if parses: p = reduce(lambda a, b: a + b.prob(), parses, 0) / len(parses) else: p = 0 print '------------------------------------------' print '%11s%11d%19.14f' % ('n/a', len(parses), p) # Ask the user if we should draw the parses. print print 'Draw parses (y/n)? ', if sys.stdin.readline().strip().lower().startswith('y'): from nodebox_linguistics_extended.parser.nltk_lite.draw.tree import draw_trees print ' please wait...' draw_trees(*parses) # Ask the user if we should print the parses. print print 'Print parses (y/n)? ', if sys.stdin.readline().strip().lower().startswith('y'): for parse in parses: print parse
def main(): import sys from optparse import OptionParser, OptionGroup usage = """%%prog [options] [grammar_file]""" % globals() opts = OptionParser(usage=usage) opts.add_option("-c", "--components", action="store_true", dest="show_components", default=0, help="show hole semantics components") opts.add_option("-r", "--raw", action="store_true", dest="show_raw", default=0, help="show the raw hole semantics expression") opts.add_option("-d", "--drawtrees", action="store_true", dest="draw_trees", default=0, help="show formula trees in a GUI window") opts.add_option("-v", "--verbose", action="count", dest="verbosity", default=0, help="show more information during parse") (options, args) = opts.parse_args() if len(args) > 0: filename = args[0] else: filename = 'hole.cfg' print 'Reading grammar file', filename grammar = GrammarFile.read_file(filename) parser = grammar.earley_parser(trace=options.verbosity) # Prompt the user for a sentence. print 'Sentence: ', line = sys.stdin.readline()[:-1] # Parse the sentence. tokens = list(tokenize.whitespace(line)) trees = parser.get_parse_list(tokens) print 'Got %d different parses' % len(trees) for tree in trees: # Get the semantic feature from the top of the parse tree. sem = tree[0].node['sem'].simplify() # Skolemise away all quantifiers. All variables become unique. sem = sem.skolemise() # Reparse the semantic representation from its bracketed string format. # I find this uniform structure easier to handle. It also makes the # code mostly independent of the lambda calculus classes. usr = bracket_parse(str(sem)) # Break the hole semantics representation down into its components # i.e. holes, labels, formula fragments and constraints. hole_sem = HoleSemantics(usr) # Maybe print the raw semantic representation. if options.show_raw: print print 'Raw expression' print usr # Maybe show the details of the semantic representation. if options.show_components: print print 'Holes: ', hole_sem.holes print 'Labels: ', hole_sem.labels print 'Constraints: ', hole_sem.constraints print 'Top hole: ', hole_sem.top_hole print 'Top labels: ', hole_sem.top_most_labels print 'Fragments:' for (l, f) in hole_sem.fragments.items(): print '\t%s: %s' % (l, f) # Find all the possible ways to plug the formulas together. pluggings = hole_sem.pluggings() # Build FOL formula trees using the pluggings. trees = map(hole_sem.formula_tree, pluggings) # Print out the formulas in a textual format. n = 1 for tree in trees: print print '%d. %s' % (n, tree) n += 1 # Maybe draw the formulas as trees. if options.draw_trees: draw_trees(*trees) print print 'Done.'