Exemple #1
0
 def bottom_up(self, grammar, text):
     print("Text : {}".format(text))
     sent = text.split()
     sr_parser = nltk.ShiftReduceParser(grammar)
     for tree in sr_parser.parse(sent):
         print(tree)
     print("------------------")
Exemple #2
0
def get_avg_depth_dict(document, grammar):
    #print(grammar)
    parser = nltk.ShiftReduceParser(nltk.CFG.fromstring(grammar), trace=1)
    depth_dict = dict()
    for sentence in document:
        tree = parser.parse(sentence)
        for word in sentence:
            depth = get_depth(tree, word)
            if word in depth_dict.keys():
                freq, prev_depth = depth_dict[word]
                depth_dict[word] = (freq + 1, prev_depth + 1)
            else:
                depth_dict[word] = (1, depth)

    avg_depth_dict = dict()
    for word, (freq, depth_sum) in depth_dict.items():
        avg_depth_dict[word] = depth_sum / freq

    return avg_depth_dict
Exemple #3
0
def parsing_types():
    grammar = nltk.parse_cfg("""
    S -> NP VP
    VP -> V NP | V NP PP
    PP -> P NP
    V -> "saw" | "ate" | "walked"
    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
    Det -> "a" | "an" | "the" | "my"
    N -> "man" | "dog" | "cat" | "telescope" | "park"
    P -> "in" | "on" | "by" | "with"
  """)
    sent = "Mary saw a dog".split()
    rd_parser = nltk.RecursiveDescentParser(grammar)
    print "==== recursive descent ===="
    for t in rd_parser.nbest_parse(sent):
        print t
    sr_parser = nltk.ShiftReduceParser(grammar)
    print "==== shift reduce ===="
    for t in sr_parser.nbest_parse(sent):
        print t
def main():
    entry = "This is an example sentence to tokenize. I wonder if it will work. It should, shouldn't it?"
    # entry = user_input()
    print(nltk.sent_tokenize(entry))
    print(nltk.word_tokenize(entry))
    pos_tagged = nltk.pos_tag(nltk.word_tokenize(entry))
    print(nltk.pos_tag(nltk.word_tokenize(entry)))
    chunkGram = """Chunk: {<RB.?>*<VB.?>*<NNP><NP>?}"""
    # grammar1 = nltk.data.load('file:mygrammar.cfg')
    grammar1 = nltk.CFG.fromstring("""
                                    S -> NP VP
                                    VP -> V NP | V NP PP
                                    PP -> P NP
                                    V -> "saw" | "ate" | "walked"
                                    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
                                    Det -> "a" | "an" | "the" | "my"
                                    N -> "man" | "dog" | "cat" | "telescope" | "park"
                                    P -> "in" | "on" | "by" | "with"
                                    """)
    grammar = nltk.CFG.fromstring("""
                                  S -> NP VP
                                  PP -> P NP
                                  NP -> Det N | NP PP
                                  VP -> V NP | VP PP
                                  Det -> 'a' | 'the'
                                  N -> 'dog' | 'cat'
                                  V -> 'chased' | 'sat'
                                  P -> 'on' | 'in'
                                  """)

    chunkParser = nltk.RegexpParser(chunkGram)
    sr_parser = nltk.ShiftReduceParser(grammar1)
    chart_parser = nltk.ChartParser(grammar)

    chunked = chunkParser.parse(pos_tagged)
    pos_to_draw = chart_parser.parse(pos_tagged)

    pos_to_draw.draw()
Exemple #5
0
    def parse(self, sentence):
        res = self.chunker.parse(sentence)
        save = res[:]
        try:
            newres = []
            mem = {}
            idx = 0
            for thing in res:
                idx += 1
                mem[idx] = thing
                if isinstance(thing, Tree):
                    newres.append((("<%s>" % idx), thing.node))
                else:
                    newres.append((("<%s>" % idx), thing[1]))

            grammar = generate_grammar(newres)
            self.recursive_np_chunker = nltk.ShiftReduceParser(grammar)

            justwords = [w for w, p in newres]
            print justwords
            res = self.recursive_np_chunker.parse(justwords)
            print res
            res = tree2iobplus(res)

            newres = []
            idx = 0
            for w, p, t in res:
                #if w.startswith("<"):
                tree = mem[w[1:-1]]
                newres.append((tree, p, t))
                #else:
                #    newres.append((w,p,t))
            print "got something"
            return iobplus2tree(newres)
        except Exception as e:
            print e
            print "resorting to classifier"
            return self.chunker.parse(sentence)
Exemple #6
0
    N -> "man" | "dog" | "cat" | "telescope" | "park"
    P -> "in" | "on" | "by" | "with"
    """)
sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)
for tree in rd_parser.parse(sent):
    print(tree)

# 递归下降分析
rd_parser = nltk.RecursiveDescentParser(grammar1)
sent = 'Mary saw a dog'.split()
for tree in rd_parser.parse(sent):
    print(tree)

# 移进-归约分析
sr_parser = nltk.ShiftReduceParser(grammar1)
sent = 'Mary saw a dog'.split()
for tree in sr_parser.parse(sent):
    print(tree)


# 动态规划句法分析
def init_wfst(tokens, grammar):
    numtokens = len(tokens)
    wfst = [[None for i in range(numtokens + 1)] for j in range(numtokens + 1)]
    for i in range(numtokens):
        productions = grammar.productions(rhs=tokens[i])
        wfst[i][i + 1] = productions[0].lhs()
    return wfst

Exemple #7
0
#Shift Reduce Parser for s6:
s6_sr_grammar = nltk.CFG.fromstring('''
S -> Pronoun VP | S PP
NP -> Det N | NP PP | NNS PP 
VP -> V NP
PP -> P NP 
Pronoun -> 'He'
P -> 'in' | 'with'
Det -> 'the' | 'some'
V -> 'eats'
NNS -> 'pasta' 
N ->  'anchovies' | 'restaurant'
''')

s6_sr = nltk.ShiftReduceParser(s6_sr_grammar, trace=2)
for tree in s6_sr.parse(s6):
    print(tree)

#Shift Reduce Parser for s7:
s7_sr_grammar = nltk.CFG.fromstring('''
S -> Pronoun VP | S PP
NP -> Det N | NP PP 
VP -> V NNS | VP PP
PP -> P NP 
Pronoun -> 'He'
P -> 'in' | 'with'
Det -> 'the' | 'some' | 'a'
V -> 'eats'
NNS -> 'pasta' 
N ->  'fork' | 'restaurant'
Exemple #8
0
... PP -> P NP
... NP -> Det N | Det N PP | 'I'
... VP -> V NP | VP PP
... Det -> 'an' | 'my'
... N -> 'elephant' | 'pajamas'
... V -> 'shot'
... P -> 'in'
... """)

sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
parser = nltk.ChartParser(groucho_grammar)
trees = parser.nbest_parse(sent)
for tree in trees:
    print(tree)

sr_parse = nltk.ShiftReduceParser(groucho_grammar)
sent = 'Mary saw a dog'.split()
print(sr_parse.parse(sent))

#wfst:符合文法的子串链表

#8.6 文法开发
from nltk.corpus import treebank

t = treebank.parsed_sents('wsj_0001.mrg')[0]
print(t)


def filter(tree):
    child_nodes = [
        child.node for child in tree if isinstance(child, nltk.tree)
Exemple #9
0
import nltk

grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

sent = "Mary saw Bob".split()
parser = nltk.ChartParser(grammar)
rd_parser = nltk.RecursiveDescentParser(grammar)
sr_parser = nltk.ShiftReduceParser(grammar)
trace_sr_parser = nltk.ShiftReduceParser(grammar1, trace=2)

for tree in sr_parser.parse(sent):
    print(tree)
    tree.pretty_print()
Exemple #10
0
PNNom -> PNDet Adj
Adj -> 'heavy'
PREP -> 'to' | 'with' |'on'
SNDet -> 'A' | 'The' | 'a' | 'the'
PNDet -> 'many' | 'The' | 'Some' | 'the'
SN -> 'boy' | 'chair' | 'book' | 'man' | 'telescope' | 'hill'
PN -> 'boys'
IV -> 'left'
TV -> 'eats' | 'loves' | 'love' | 'gave' | 'likes' | 'moves' | 'saw' | 'knows' | 'eat'
CONJ -> 'and'
"""

g = nltk.CFG.fromstring(sg)

# Bottom-up  parser
sr_parser = nltk.ShiftReduceParser(g, trace=2)


# Parse sentences and observe the behavior of the parser
def parse_sentence(sent):
    tokens = sent.split()
    trees = sr_parser.parse(tokens)
    for tree in trees:
        print(tree)


'''
parse_sentence("John left")
# should be:
# (S (NP John) (VP (IV left)))
parse_sentence("John eats bread")
def shiftParser():

    sr_parse = nltk.ShiftReduceParser(grammar1)
    sent = 'Mary saw a dog'.split()
    print sr_parse.parse(sent)
Exemple #12
0
P -> 'on'
""")
sent = "the angry bear chased the frightened little squirrel".split()
rd_parser = nltk.RecursiveDescentParser(grammar2)
for tree in rd_parser.parse(sent):
    tree.draw()

#递归下降解析器
rd_parser = nltk.RecursiveDescentParser(grammar2)
sent = 'Joe saw a bear'.split()
for t in rd_parser.parse(sent):
    print(t)
#NP -> NP PP

#移进-归约解析器
sr_parse = nltk.ShiftReduceParser(grammar2, trace=2)
sent = 'Joe saw a bear'.split()
for t in sr_parse.parse(sent):
    print(t)

#图表分析
groucho_grammar = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
""")
Exemple #13
0
import nltk

enGrammar = nltk.data.load('grammars/test.cfg')
enRdParser = nltk.RecursiveDescentParser(enGrammar)
enSRParser = nltk.ShiftReduceParser(enGrammar)
#enLCParser = nltk

#filGrammar = nltk.data.load("")
#filRdParser = nltk.RecursiveDescentParser(filGrammar)
#filSRParser = nltk.ShiftReduceParser(filGrammar)


def rdParseEng(sents):
    return enRdParser.parse(sents)


def srParseEng(sents):
    return enSRParser.parse(sents)


def lcParseEng(sents):
    return enLCParser.parse(sents)


#def srParseFil(sents):
#	return filSRParser.parse(sents)
grammar = nltk.CFG.fromstring("""
    S -> NP VP | VP PP
    NP -> Det Nom | PropN | NP PP | PRP | N
    Nom -> Adj Nom | N | NNS
    VP -> V NP | V S | VP PP | VBZ NP PP
    PP -> P NP
    PropN -> "Bill" | "Bob"
    Det -> "the" | "a" | "an" | "some"
    N -> "bear" | "squirrel" | "park" | "river" | "dog" | "block" | "table" | "restaurant" | "pasta" | "fork"
    Adj -> "angry" | "frightened" | "furry"
    V -> "chased" | "saw" | "put" | "eats" | "eat" | "chase" | "Put"
    P -> "on" | "in" | "along" | "with" 
    PRP -> "He"
    VBZ -> "eats"
    NNS -> "anchovies"
  """)

S6 = nltk.word_tokenize("He eats pasta with some anchovies in the restaurant")
S7 = nltk.word_tokenize("He eats pasta with a fork in the restaurant")

parser = nltk.ShiftReduceParser(grammar, trace=2)

print("!!!!!!!! S6: !!!!!!!! ")
trees_S6 = parser.parse(S6)
for tree in trees_S6:
    print(tree)

print("!!!!!!!! S7: !!!!!!!! ")
trees_S7 = parser.parse(S7)
for tree in trees_S7:
    print(tree)