def bottom_up(self, grammar, text): print("Text : {}".format(text)) sent = text.split() sr_parser = nltk.ShiftReduceParser(grammar) for tree in sr_parser.parse(sent): print(tree) print("------------------")
def get_avg_depth_dict(document, grammar): #print(grammar) parser = nltk.ShiftReduceParser(nltk.CFG.fromstring(grammar), trace=1) depth_dict = dict() for sentence in document: tree = parser.parse(sentence) for word in sentence: depth = get_depth(tree, word) if word in depth_dict.keys(): freq, prev_depth = depth_dict[word] depth_dict[word] = (freq + 1, prev_depth + 1) else: depth_dict[word] = (1, depth) avg_depth_dict = dict() for word, (freq, depth_sum) in depth_dict.items(): avg_depth_dict[word] = depth_sum / freq return avg_depth_dict
def parsing_types(): grammar = nltk.parse_cfg(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw a dog".split() rd_parser = nltk.RecursiveDescentParser(grammar) print "==== recursive descent ====" for t in rd_parser.nbest_parse(sent): print t sr_parser = nltk.ShiftReduceParser(grammar) print "==== shift reduce ====" for t in sr_parser.nbest_parse(sent): print t
def main(): entry = "This is an example sentence to tokenize. I wonder if it will work. It should, shouldn't it?" # entry = user_input() print(nltk.sent_tokenize(entry)) print(nltk.word_tokenize(entry)) pos_tagged = nltk.pos_tag(nltk.word_tokenize(entry)) print(nltk.pos_tag(nltk.word_tokenize(entry))) chunkGram = """Chunk: {<RB.?>*<VB.?>*<NNP><NP>?}""" # grammar1 = nltk.data.load('file:mygrammar.cfg') grammar1 = nltk.CFG.fromstring(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) grammar = nltk.CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | NP PP VP -> V NP | VP PP Det -> 'a' | 'the' N -> 'dog' | 'cat' V -> 'chased' | 'sat' P -> 'on' | 'in' """) chunkParser = nltk.RegexpParser(chunkGram) sr_parser = nltk.ShiftReduceParser(grammar1) chart_parser = nltk.ChartParser(grammar) chunked = chunkParser.parse(pos_tagged) pos_to_draw = chart_parser.parse(pos_tagged) pos_to_draw.draw()
def parse(self, sentence): res = self.chunker.parse(sentence) save = res[:] try: newres = [] mem = {} idx = 0 for thing in res: idx += 1 mem[idx] = thing if isinstance(thing, Tree): newres.append((("<%s>" % idx), thing.node)) else: newres.append((("<%s>" % idx), thing[1])) grammar = generate_grammar(newres) self.recursive_np_chunker = nltk.ShiftReduceParser(grammar) justwords = [w for w, p in newres] print justwords res = self.recursive_np_chunker.parse(justwords) print res res = tree2iobplus(res) newres = [] idx = 0 for w, p, t in res: #if w.startswith("<"): tree = mem[w[1:-1]] newres.append((tree, p, t)) #else: # newres.append((w,p,t)) print "got something" return iobplus2tree(newres) except Exception as e: print e print "resorting to classifier" return self.chunker.parse(sentence)
N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.parse(sent): print(tree) # 递归下降分析 rd_parser = nltk.RecursiveDescentParser(grammar1) sent = 'Mary saw a dog'.split() for tree in rd_parser.parse(sent): print(tree) # 移进-归约分析 sr_parser = nltk.ShiftReduceParser(grammar1) sent = 'Mary saw a dog'.split() for tree in sr_parser.parse(sent): print(tree) # 动态规划句法分析 def init_wfst(tokens, grammar): numtokens = len(tokens) wfst = [[None for i in range(numtokens + 1)] for j in range(numtokens + 1)] for i in range(numtokens): productions = grammar.productions(rhs=tokens[i]) wfst[i][i + 1] = productions[0].lhs() return wfst
#Shift Reduce Parser for s6: s6_sr_grammar = nltk.CFG.fromstring(''' S -> Pronoun VP | S PP NP -> Det N | NP PP | NNS PP VP -> V NP PP -> P NP Pronoun -> 'He' P -> 'in' | 'with' Det -> 'the' | 'some' V -> 'eats' NNS -> 'pasta' N -> 'anchovies' | 'restaurant' ''') s6_sr = nltk.ShiftReduceParser(s6_sr_grammar, trace=2) for tree in s6_sr.parse(s6): print(tree) #Shift Reduce Parser for s7: s7_sr_grammar = nltk.CFG.fromstring(''' S -> Pronoun VP | S PP NP -> Det N | NP PP VP -> V NNS | VP PP PP -> P NP Pronoun -> 'He' P -> 'in' | 'with' Det -> 'the' | 'some' | 'a' V -> 'eats' NNS -> 'pasta' N -> 'fork' | 'restaurant'
... PP -> P NP ... NP -> Det N | Det N PP | 'I' ... VP -> V NP | VP PP ... Det -> 'an' | 'my' ... N -> 'elephant' | 'pajamas' ... V -> 'shot' ... P -> 'in' ... """) sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas'] parser = nltk.ChartParser(groucho_grammar) trees = parser.nbest_parse(sent) for tree in trees: print(tree) sr_parse = nltk.ShiftReduceParser(groucho_grammar) sent = 'Mary saw a dog'.split() print(sr_parse.parse(sent)) #wfst:符合文法的子串链表 #8.6 文法开发 from nltk.corpus import treebank t = treebank.parsed_sents('wsj_0001.mrg')[0] print(t) def filter(tree): child_nodes = [ child.node for child in tree if isinstance(child, nltk.tree)
import nltk grammar1 = nltk.CFG.fromstring(""" S -> NP VP VP -> V NP | V NP PP PP -> P NP V -> "saw" | "ate" | "walked" NP -> "John" | "Mary" | "Bob" | Det N | Det N PP Det -> "a" | "an" | "the" | "my" N -> "man" | "dog" | "cat" | "telescope" | "park" P -> "in" | "on" | "by" | "with" """) sent = "Mary saw Bob".split() parser = nltk.ChartParser(grammar) rd_parser = nltk.RecursiveDescentParser(grammar) sr_parser = nltk.ShiftReduceParser(grammar) trace_sr_parser = nltk.ShiftReduceParser(grammar1, trace=2) for tree in sr_parser.parse(sent): print(tree) tree.pretty_print()
PNNom -> PNDet Adj Adj -> 'heavy' PREP -> 'to' | 'with' |'on' SNDet -> 'A' | 'The' | 'a' | 'the' PNDet -> 'many' | 'The' | 'Some' | 'the' SN -> 'boy' | 'chair' | 'book' | 'man' | 'telescope' | 'hill' PN -> 'boys' IV -> 'left' TV -> 'eats' | 'loves' | 'love' | 'gave' | 'likes' | 'moves' | 'saw' | 'knows' | 'eat' CONJ -> 'and' """ g = nltk.CFG.fromstring(sg) # Bottom-up parser sr_parser = nltk.ShiftReduceParser(g, trace=2) # Parse sentences and observe the behavior of the parser def parse_sentence(sent): tokens = sent.split() trees = sr_parser.parse(tokens) for tree in trees: print(tree) ''' parse_sentence("John left") # should be: # (S (NP John) (VP (IV left))) parse_sentence("John eats bread")
def shiftParser(): sr_parse = nltk.ShiftReduceParser(grammar1) sent = 'Mary saw a dog'.split() print sr_parse.parse(sent)
P -> 'on' """) sent = "the angry bear chased the frightened little squirrel".split() rd_parser = nltk.RecursiveDescentParser(grammar2) for tree in rd_parser.parse(sent): tree.draw() #递归下降解析器 rd_parser = nltk.RecursiveDescentParser(grammar2) sent = 'Joe saw a bear'.split() for t in rd_parser.parse(sent): print(t) #NP -> NP PP #移进-归约解析器 sr_parse = nltk.ShiftReduceParser(grammar2, trace=2) sent = 'Joe saw a bear'.split() for t in sr_parse.parse(sent): print(t) #图表分析 groucho_grammar = nltk.CFG.fromstring(""" S -> NP VP PP -> P NP NP -> Det N | Det N PP | 'I' VP -> V NP | VP PP Det -> 'an' | 'my' N -> 'elephant' | 'pajamas' V -> 'shot' P -> 'in' """)
import nltk enGrammar = nltk.data.load('grammars/test.cfg') enRdParser = nltk.RecursiveDescentParser(enGrammar) enSRParser = nltk.ShiftReduceParser(enGrammar) #enLCParser = nltk #filGrammar = nltk.data.load("") #filRdParser = nltk.RecursiveDescentParser(filGrammar) #filSRParser = nltk.ShiftReduceParser(filGrammar) def rdParseEng(sents): return enRdParser.parse(sents) def srParseEng(sents): return enSRParser.parse(sents) def lcParseEng(sents): return enLCParser.parse(sents) #def srParseFil(sents): # return filSRParser.parse(sents)
grammar = nltk.CFG.fromstring(""" S -> NP VP | VP PP NP -> Det Nom | PropN | NP PP | PRP | N Nom -> Adj Nom | N | NNS VP -> V NP | V S | VP PP | VBZ NP PP PP -> P NP PropN -> "Bill" | "Bob" Det -> "the" | "a" | "an" | "some" N -> "bear" | "squirrel" | "park" | "river" | "dog" | "block" | "table" | "restaurant" | "pasta" | "fork" Adj -> "angry" | "frightened" | "furry" V -> "chased" | "saw" | "put" | "eats" | "eat" | "chase" | "Put" P -> "on" | "in" | "along" | "with" PRP -> "He" VBZ -> "eats" NNS -> "anchovies" """) S6 = nltk.word_tokenize("He eats pasta with some anchovies in the restaurant") S7 = nltk.word_tokenize("He eats pasta with a fork in the restaurant") parser = nltk.ShiftReduceParser(grammar, trace=2) print("!!!!!!!! S6: !!!!!!!! ") trees_S6 = parser.parse(S6) for tree in trees_S6: print(tree) print("!!!!!!!! S7: !!!!!!!! ") trees_S7 = parser.parse(S7) for tree in trees_S7: print(tree)