def demo(): """ A demonstration showing how PCFG C{Grammar}s can be created and used. """ from en.parser.nltk_lite.corpora import treebank, extract from en.parser.nltk_lite.parse import cfg, pcfg, pchart, treetransforms from itertools import islice # Create some probabilistic CFG Productions S, A, B, C = cfg.nonterminals('S A B C') pcfg_prods = [ pcfg.Production(A, [B, B], prob=0.3), pcfg.Production(A, [C, B, C], prob=0.7), pcfg.Production(B, [B, 'b'], prob=0.5), pcfg.Production(B, [C], prob=0.5), pcfg.Production(C, ['a'], prob=0.1), pcfg.Production(C, ['b'], prob=0.9) ] pcfg_prod = pcfg_prods[2] print 'A PCFG production:', ` pcfg_prod ` print ' pcfg_prod.lhs() =>', ` pcfg_prod.lhs() ` print ' pcfg_prod.rhs() =>', ` pcfg_prod.rhs() ` print ' pcfg_prod.prob() =>', ` pcfg_prod.prob() ` print # Create and print a PCFG grammar = pcfg.Grammar(S, pcfg_prods) print 'A PCFG grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 26) print # extract productions from three trees and induce the PCFG print "Induce PCFG grammar from treebank data:" productions = [] for tree in islice(treebank.parsed(), 3): # perform optional in-place tree transformations, e.g.: # treetransforms.collapseUnary(tree, collapsePOS = False) # treetransforms.chomskyNormalForm(tree, horzMarkov = 2) productions += tree.productions() grammar = pcfg.induce(S, productions) print grammar print print "Parse sentence using induced grammar:" parser = pchart.InsideParse(grammar) parser.trace(3) sent = extract(0, treebank.raw()) print sent for parse in parser.get_parse_list(sent): print parse
def demo(): from en.parser.nltk_lite.corpora import ieer from itertools import islice from pprint import pprint # pprint(extract(75, ieer.raw())) pprint(extract(75, ieer.dictionary()))
def demo(): """ A demonstration showing how PCFG C{Grammar}s can be created and used. """ from en.parser.nltk_lite.corpora import treebank, extract from en.parser.nltk_lite.parse import cfg, pcfg, pchart, treetransforms from itertools import islice # Create some probabilistic CFG Productions S, A, B, C = cfg.nonterminals("S A B C") pcfg_prods = [ pcfg.Production(A, [B, B], prob=0.3), pcfg.Production(A, [C, B, C], prob=0.7), pcfg.Production(B, [B, "b"], prob=0.5), pcfg.Production(B, [C], prob=0.5), pcfg.Production(C, ["a"], prob=0.1), pcfg.Production(C, ["b"], prob=0.9), ] pcfg_prod = pcfg_prods[2] print "A PCFG production:", ` pcfg_prod ` print " pcfg_prod.lhs() =>", ` pcfg_prod.lhs() ` print " pcfg_prod.rhs() =>", ` pcfg_prod.rhs() ` print " pcfg_prod.prob() =>", ` pcfg_prod.prob() ` print # Create and print a PCFG grammar = pcfg.Grammar(S, pcfg_prods) print "A PCFG grammar:", ` grammar ` print " grammar.start() =>", ` grammar.start() ` print " grammar.productions() =>", # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(",", ",\n" + " " * 26) print # extract productions from three trees and induce the PCFG print "Induce PCFG grammar from treebank data:" productions = [] for tree in islice(treebank.parsed(), 3): # perform optional in-place tree transformations, e.g.: # treetransforms.collapseUnary(tree, collapsePOS = False) # treetransforms.chomskyNormalForm(tree, horzMarkov = 2) productions += tree.productions() grammar = pcfg.induce(S, productions) print grammar print print "Parse sentence using induced grammar:" parser = pchart.InsideParse(grammar) parser.trace(3) sent = extract(0, treebank.raw()) print sent for parse in parser.get_parse_list(sent): print parse