def demo(): """ A demonstration showing how PCFG C{Grammar}s can be created and used. """ from en.parser.nltk_lite.corpora import treebank, extract from en.parser.nltk_lite.parse import cfg, pcfg, pchart, treetransforms from itertools import islice # Create some probabilistic CFG Productions S, A, B, C = cfg.nonterminals('S A B C') pcfg_prods = [ pcfg.Production(A, [B, B], prob=0.3), pcfg.Production(A, [C, B, C], prob=0.7), pcfg.Production(B, [B, 'b'], prob=0.5), pcfg.Production(B, [C], prob=0.5), pcfg.Production(C, ['a'], prob=0.1), pcfg.Production(C, ['b'], prob=0.9) ] pcfg_prod = pcfg_prods[2] print 'A PCFG production:', ` pcfg_prod ` print ' pcfg_prod.lhs() =>', ` pcfg_prod.lhs() ` print ' pcfg_prod.rhs() =>', ` pcfg_prod.rhs() ` print ' pcfg_prod.prob() =>', ` pcfg_prod.prob() ` print # Create and print a PCFG grammar = pcfg.Grammar(S, pcfg_prods) print 'A PCFG grammar:', ` grammar ` print ' grammar.start() =>', ` grammar.start() ` print ' grammar.productions() =>', # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(',', ',\n' + ' ' * 26) print # extract productions from three trees and induce the PCFG print "Induce PCFG grammar from treebank data:" productions = [] for tree in islice(treebank.parsed(), 3): # perform optional in-place tree transformations, e.g.: # treetransforms.collapseUnary(tree, collapsePOS = False) # treetransforms.chomskyNormalForm(tree, horzMarkov = 2) productions += tree.productions() grammar = pcfg.induce(S, productions) print grammar print print "Parse sentence using induced grammar:" parser = pchart.InsideParse(grammar) parser.trace(3) sent = extract(0, treebank.raw()) print sent for parse in parser.get_parse_list(sent): print parse
def demo(): """ A demonstration showing how PCFG C{Grammar}s can be created and used. """ from en.parser.nltk_lite.corpora import treebank, extract from en.parser.nltk_lite.parse import cfg, pcfg, pchart, treetransforms from itertools import islice # Create some probabilistic CFG Productions S, A, B, C = cfg.nonterminals("S A B C") pcfg_prods = [ pcfg.Production(A, [B, B], prob=0.3), pcfg.Production(A, [C, B, C], prob=0.7), pcfg.Production(B, [B, "b"], prob=0.5), pcfg.Production(B, [C], prob=0.5), pcfg.Production(C, ["a"], prob=0.1), pcfg.Production(C, ["b"], prob=0.9), ] pcfg_prod = pcfg_prods[2] print "A PCFG production:", ` pcfg_prod ` print " pcfg_prod.lhs() =>", ` pcfg_prod.lhs() ` print " pcfg_prod.rhs() =>", ` pcfg_prod.rhs() ` print " pcfg_prod.prob() =>", ` pcfg_prod.prob() ` print # Create and print a PCFG grammar = pcfg.Grammar(S, pcfg_prods) print "A PCFG grammar:", ` grammar ` print " grammar.start() =>", ` grammar.start() ` print " grammar.productions() =>", # Use string.replace(...) is to line-wrap the output. print ` grammar.productions() `.replace(",", ",\n" + " " * 26) print # extract productions from three trees and induce the PCFG print "Induce PCFG grammar from treebank data:" productions = [] for tree in islice(treebank.parsed(), 3): # perform optional in-place tree transformations, e.g.: # treetransforms.collapseUnary(tree, collapsePOS = False) # treetransforms.chomskyNormalForm(tree, horzMarkov = 2) productions += tree.productions() grammar = pcfg.induce(S, productions) print grammar print print "Parse sentence using induced grammar:" parser = pchart.InsideParse(grammar) parser.trace(3) sent = extract(0, treebank.raw()) print sent for parse in parser.get_parse_list(sent): print parse
def demo(): from en.parser.nltk_lite.corpora import treebank from itertools import islice print "Parsed:" for tree in islice(treebank.parsed(), 3): print tree.pp() print print "Chunked:" for tree in islice(treebank.chunked(), 3): print tree.pp() print print "Tagged:" for sent in islice(treebank.tagged(), 3): print sent print print "Raw:" for sent in islice(treebank.raw(), 3): print sent print
def demo(): """ A demonstration of the porter stemmer on a sample from the Penn Treebank corpus. """ from en.parser.nltk_lite.corpora import treebank from en.parser.nltk_lite import stem stemmer = stem.Porter() i = 0 orig = [] stemmed = [] for sent in treebank.raw(): for word in sent: orig.append(word) sword = stemmer.stem(word) stemmed.append(sword) i += 1 if i > 3: break # Convert the results to a string, and word-wrap them. results = ' '.join(stemmed) results = re.sub(r"(.{,70})\s", r'\1\n', results + ' ').rstrip() # Convert the original to a string, and word wrap it. original = ' '.join(orig) original = re.sub(r"(.{,70})\s", r'\1\n', original + ' ').rstrip() # Print the results. print('-Original-'.center(70).replace(' ', '*').replace('-', ' ')) print(original) print('-Results-'.center(70).replace(' ', '*').replace('-', ' ')) print(results) print('*' * 70)
def demo(): """ A demonstration of the porter stemmer on a sample from the Penn Treebank corpus. """ from en.parser.nltk_lite.corpora import treebank from en.parser.nltk_lite import stem stemmer = stem.Porter() i = 0 orig = [] stemmed = [] for sent in treebank.raw(): for word in sent: orig.append(word) sword = stemmer.stem(word) stemmed.append(sword) i+=1 if i>3: break # Convert the results to a string, and word-wrap them. results = ' '.join(stemmed) results = re.sub(r"(.{,70})\s", r'\1\n', results+' ').rstrip() # Convert the original to a string, and word wrap it. original = ' '.join(orig) original = re.sub(r"(.{,70})\s", r'\1\n', original+' ').rstrip() # Print the results. print '-Original-'.center(70).replace(' ', '*').replace('-', ' ') print original print '-Results-'.center(70).replace(' ', '*').replace('-', ' ') print results print '*'*70