예제 #1
0
파일: chapter8.py 프로젝트: hbdhj/python
def contextFreeGrammar():
    print "page 298 Context-Free Grammar"
    print "=============== A Simple Grammar ==============="
    grammar1 = nltk.parse_cfg("""
        S -> NP VP
        VP -> V NP | V NP PP  
        PP -> P NP  
        V -> "saw" | "ate" | "walked"  
        NP -> "John" | "Mary" | "Bob" | Det N | Det N PP  
        Det -> "a" | "an" | "the" | "my"  
        N -> "man" | "dog" | "cat" | "telescope" | "park"  
        P -> "in" | "on" | "by" | "with"  
        """)
    sent = "Mary saw Bob".split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for tree in rd_parser.nbest_parse(sent):
        print tree

    print "=============== Writing Your Own Grammars ==============="
    grammar1 = nltk.data.load('file:mygrammar.cfg')
    sent = "Mary saw Bob".split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for tree in rd_parser.nbest_parse(sent):
        print tree

    print "=============== Recursion in Syntactic Structure ==============="
    grammar2 = nltk.parse_cfg("""  S  -> NP VP  NP -> Det Nom | PropN  Nom -> Adj Nom | N  VP -> V Adj | V NP | V S | V NP PP  PP -> P NP  PropN -> 'Buster' | 'Chatterer' | 'Joe'  Det -> 'the' | 'a'  N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'  Adj  -> 'angry' | 'frightened' |  'little' | 'tall'  V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'  P -> 'on'  """)
예제 #2
0
def generate_grammar(sentence):
    grammar = "\n".join([r for r, freq in frequent_rules])
    for (word, pos_tag) in sentence:
        grammar += "%s -> '%s' \n" % (pos_tag, word)

    #print grammar
    return nltk.parse_cfg(grammar)
 def __init__(self, blackboard, tense="present", person=1):
     super(SentenceExpert, self).__init__(blackboard, "Sentence Expert",
                                          tense, person, 5)
     self.eva = ["be", "look", "feel"]
     self.atv = ["like", "hate", "love", "know", "need", "see"]
     """ eva - emotional verb active
         evp - emotional verb passive
         ej - emotion adjective
         en - emotional noun
         atv - attitude verb
     """
     self.grammar = nltk.parse_cfg("""
         S -> P | EP | Person ATV NP
         P -> NP VP 
         EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP
         ENP ->  EN OF NP 
         NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN
         VP -> V | V ERB | ERB V
         Det -> 'the'
         N -> 'n'
         V -> 'v' 
         EVA -> 'eva'
         EVP -> 'makes' 
         EN -> 'en'
         EJ -> 'ej'
         JJ -> 'adj'
         ERB -> 'erb'
         ATV -> 'atv'
         Person -> 'person'
         Pron -> 'pron'
         OF -> 'of'
         CC -> 'and' | 'but' | 'because' | 'so'
         """)
예제 #4
0
    def build_grammar(self):
        '''Use the corpus data and return a NLTK grammar.'''

        grammer_def = self.build_grammar_text().getvalue()
        grammar = nltk.parse_cfg(grammer_def.encode('utf8'))

        return grammar
예제 #5
0
파일: cfg.py 프로젝트: Arttii/TextBlob
def demo():
    from nltk import Nonterminal, parse_cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]

    grammar = parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print(grammar)
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
예제 #6
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, parse_cfg

    grammar = parse_cfg(
        """
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """
    )

    sent = "I saw a man in the park".split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print p
	def test_ctor(self):
		# arrange
		testGrammar = """
S -> NP VP

VP -> VP PP
VP -> V NP
VP -> 'eats'

PP -> P NP

NP -> Det N
NP -> 'she'

V -> 'eats'

P -> 'with'

N -> 'fish'
N -> 'fork'

Det -> 'a'
"""
		grammar = nltk.parse_cfg(testGrammar)

		sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork']

		# act
		inst = cyk.Cyk(sent, grammar.productions())

		# assert
		self.assertTrue(inst != None)
		self.assertTrue(inst.sentence == sent)
    def __init__(self, blackboard, tense = "present", person = 1):
        super(SentenceExpert, self).__init__(blackboard, "Sentence Expert", tense, person,5)
        self.eva = ["be", "look", "feel"]
        self.atv = ["like", "hate", "love", "know", "need", "see"]

        """ eva - emotional verb active
            evp - emotional verb passive
            ej - emotion adjective
            en - emotional noun
            atv - attitude verb
        """
        self.grammar = nltk.parse_cfg("""
            S -> P | EP | Person ATV NP
            P -> NP VP 
            EP -> Person EVA EJ | NP EVP Pron EJ | ENP VP
            ENP ->  EN OF NP 
            NP -> Det N | Det JJ N | Det EJ JJ N | Det EJ N | Det EN
            VP -> V | V ERB | ERB V
            Det -> 'the'
            N -> 'n'
            V -> 'v' 
            EVA -> 'eva'
            EVP -> 'makes' 
            EN -> 'en'
            EJ -> 'ej'
            JJ -> 'adj'
            ERB -> 'erb'
            ATV -> 'atv'
            Person -> 'person'
            Pron -> 'pron'
            OF -> 'of'
            CC -> 'and' | 'but' | 'because' | 'so'
            """)
	def __init__(self, cfgGrammar):
		self.pb = productionBuilder.ProductionBuilder()

		self.grammar = nltk.parse_cfg(cfgGrammar)
		self.terminalTransformProductions = []
		self.nonTerminalTransformProductions = []
		self.singleNonTerminalTransformProductions = []
예제 #10
0
def Solution_parse(args):
  try:
    print "Parser option: %s " % args.parseOption
    gstring = open('solutiongrammar.cfg', 'r').read()
    grammar1 = nltk.parse_cfg(gstring)
    #print grammar1 , '\n'
    
    if (args.parseOption == 'rd'):
      parser = nltk.RecursiveDescentParser(grammar1)
    elif(args.parseOption == 'sr'):
      parser = nltk.ShiftReduceParser(grammar1)
    elif(args.parseOption == 'ec'):
      parser = nltk.parse.EarleyChartParser(grammar1)
    elif(args.parseOption == 'td'):
      parser = nltk.parse.TopDownChartParser(grammar1)
    elif(args.parseOption == 'bu'):
      parser = nltk.parse.BottomUpChartParser(grammar1)
    else:
      raise Exception("Unknown parseOption: %s" % args.parseOption)

    i = 0
    for line in open('inputfile.txt','r'):
      i += 1
      pass
      if i == 1:
        print line
        sent = wordpunct_tokenize(line)
        print sent , '\n'
        pp = parser.parse(sent)
        print pp, '\n'
        pass

  except Exception, err:
    sys.stderr.write('ERROR: %s\n' % str(err))
    raise
예제 #11
0
파일: parser.py 프로젝트: eberle1080/newman
def parse(wordlist, grammar, generator):
    """
    Parse this thang. Call off to nltk's chart parser (which is
    the only one fast enough to parse the massive grammar). Only
    use the top best tree. If no parse tree is found, the program
    dies. The pass along the tree for actual symantic analysis,
    and then print out the parse and we're done!
    """

    import nltk

    try:
        gr = nltk.parse_cfg(grammar)
        parts = [w.reduced() for w in wordlist]

        parser = nltk.BottomUpChartParser(gr)
        trees = parser.nbest_parse(parts)

        classifiers = ClassifierCollection(generator)
        ct = 0
        for tree in trees:
            rparse(tree, classifiers, False)
            ct += 1
            break

        if ct == 0:
            raise ParserException('No parse trees found')

        classifiers.finish()
        classifiers.pprint()

    except ValueError, e:
        raise ParserException(str(e))
예제 #12
0
def parseSimile(tokensWithIndices):
    #The grammar used to check a simile
    grammar = nltk.parse_cfg("""
    S -> NP "like" NP | "ADJ" "like" "NP" | NP V "like" NP | "EX" "like" "NP" | NP "as" "ADJ" "as" NP | V "as" "ADJ" "as" NP |OTH
    NP -> N | "ADJ" N | "DET" NP 
    N -> "NP" | "PRO" | "N"
    V -> "VD" | "V" | "VG"
    OTH -> "OTH" "PUNC" "FW" "WH" "TO" "NUM" "ADV" "VD" "VG" "L" "VN" "N" "P" "S" "EX" "V" "CNJ" "UH" "PRO" "MOD"  
    """)  
    tokens = map(lambda i: i[0], tokensWithIndices)
    indices = map(lambda i: i[1], tokensWithIndices)
    parser = nltk.ChartParser(grammar)
    simile_indices = []
    start_token = 0
    while (start_token < len(tokens) - 2):
        end_token = start_token + 2 #can't have simile smaller than 4 words
        simile = False
        while ( (not simile) and (end_token <= len(tokens))):
            if (len(parser.nbest_parse(tokens[start_token:end_token])) > 0): #If a parse tree was formed
                simile_indices.extend(indices[start_token:end_token])
                start_token = end_token
                simile = True            
            else:    
                end_token += 1
        start_token += 1
    return simile_indices
예제 #13
0
파일: grammar.py 프로젝트: chfoo/CompFacts
    def build_grammar(self):
        '''Use the corpus data and return a NLTK grammar.'''

        grammer_def = self.build_grammar_text().getvalue()
        grammar = nltk.parse_cfg(grammer_def.encode('utf8'))

        return grammar
예제 #14
0
def cfgMatch ( nlQuery ):
    terminalList = [ 'find','search','display','tell','faculty','student','staff','other' ]
    grammar = nltk.parse_cfg("""
                    S -> A B
                    A -> 'find'|'search'|'display'|'tell'
                    B -> 'faculty'|'student'|'staff'|'other'
                    """)
# Since grammar crashes if a non term not in grammar is used.
# We have to check and report error if such a word is used anywhere
##################################################################
# Check and errors reporting here
    tokenizedList = list( word_tokenize( nlQuery  ) )
    for word in tokenizedList:
        if word not in terminalList:
            print "ERROR"
            return -1
##################################################################
    parser = nltk.RecursiveDescentParser ( grammar )
    parseTree = parser.nbest_parse ( tokenizedList, 1 )

    for tree in parseTree:
        print tree
        for elem in tree:
            for i in tree.node:
                print i
예제 #15
0
def demo():
    from nltk import Nonterminal, parse_cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]
    
    grammar = parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print grammar
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()
예제 #16
0
def build_grammar():
	return nltk.parse_cfg('''
		S -> NP VP
		S -> S Conj S
		NP -> Pronoun
		NP -> Name
		NP -> Article Noun
		NP -> Number
		NP -> NP PP
		NP -> NP RelClause
		VP -> Verb
		VP -> Verb NP
		VP -> Verb Adj
		VP -> VP PP
		PP -> Prep NP
		RelClause -> 'that' VP
		Article -> 'the' | 'a' | 'an' | 'this' | 'that'
		Prep -> 'to' | 'in' | 'on' | 'near'
		Conj -> 'and' | 'or' | 'but'
		Pronoun -> 'I' | 'you' | 'he' | 'me' | 'him'
		Verb -> 'book' | 'include' | 'prefer' | 'walk'
		None -> 'book' | 'flight' | 'meal'
		Name -> 'John' | 'Mary' | 'Boston'
		Adj -> 'first' | 'earliest' | 'cheap'
		''')
예제 #17
0
def demo():
    """
    A demonstration of the recursive descent parser.
    """

    from nltk import parse, parse_cfg
    
    grammar = parse_cfg("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)
    
    for prod in grammar.productions():
        print prod
    
    sent = 'I saw a man in the park'.split()
    parser = parse.RecursiveDescentParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print p
예제 #18
0
def generate_grammar(sentence):
    grammar = "\n".join([r for r, freq in frequent_rules])
    for (word, pos_tag) in sentence:
        grammar += "%s -> '%s' \n" %(pos_tag, word)

    #print grammar
    return nltk.parse_cfg(grammar)
	def test_returnRelevantTuples_2(self):
		# arrange
		testGrammar = """
S -> NP VP

VP -> VP PP
VP -> V NP
VP -> 'eats'

PP -> P NP

NP -> Det N
NP -> 'she'

V -> 'eats'

P -> 'with'

N -> 'fish'
N -> 'fork'

Det -> 'a'
"""
		grammar = nltk.parse_cfg(testGrammar)

		sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork']

		inst = cyk.Cyk(sent, grammar.productions())

		# act		
		inst.executeAlgorithm()

		# assert
		self.assertTrue(inst.isInGrammar())
예제 #20
0
def test():
	import nltk
	grammar1 = nltk.parse_cfg("""
	
	""")
	rd_parser = nltk.Recursive Descent Parser(grammar1)

	sent = "Lee ran away home".split()
	t=rd_parser.n best_parse(sent)
예제 #21
0
def test():
  import nltk
	grammar1 = nltk.parse_cfg("""
	
	""")
	sr_parse = nltk.Shift Reduce Parser(grammar1)

	sent = "Lee ran away home".split()
	return sr_parse.parse(sent)
 def __init__(self, blackboard):
     super(ComparisonExpert, self).__init__(blackboard,
                                            "Comparison Expert",
                                            importance=3)
     self.grammar = nltk.parse_cfg("""
         S -> AS JJ AS Det N | JJ LIKE Det N
         JJ -> 'adj'
         N -> 'n'
         Det -> 'det'
         LIKE -> 'like'
         AS -> 'as'
         """)
 def __init__(self, blackboard, tense = "present"):
     super(RhetoricalExpert, self).__init__(blackboard, "Rhetorical Expert", tense, 3)
     self.grammar = nltk.parse_cfg("""
         S -> WHAT BE Det NP | WHY BE Det N SO JJ
         NP -> JJ N | N
         JJ -> 'adj'
         N -> 'n'
         Det -> 'the'
         BE -> 'be'
         SO -> 'so'
         WHAT -> 'what'
         WHY -> 'why'
         """)
 def __init__(self, blackboard, tense="present", person=3):
     super(MetaphoreExpert, self).__init__(blackboard, "Metaphore Expert", tense=tense, person=person, importance=2)
     self.grammar = nltk.parse_cfg(
         """
         S -> Person BE LIKE NP 
         NP -> Det JJ N | Det N
         Person -> 'person'
         JJ -> 'adj'
         N -> 'n'
         Det -> 'the'
         BE -> 'be'
         LIKE -> 'like'
         """
     )
예제 #25
0
 def __init__(self, blackboard, tense="present"):
     super(RhetoricalExpert, self).__init__(blackboard, "Rhetorical Expert",
                                            tense, 3)
     self.grammar = nltk.parse_cfg("""
         S -> WHAT BE Det NP | WHY BE Det N SO JJ
         NP -> JJ N | N
         JJ -> 'adj'
         N -> 'n'
         Det -> 'the'
         BE -> 'be'
         SO -> 'so'
         WHAT -> 'what'
         WHY -> 'why'
         """)
예제 #26
0
def sentence_parse_example():
    groucho_grammar = nltk.parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
  """)
    sent = ["I", "shot", "an", "elephant", "in", "my", "pajamas"]
    parser = nltk.ChartParser(groucho_grammar)
    trees = parser.nbest_parse(sent)
    for tree in trees:
        print tree
예제 #27
0
def chart_parsing():
    groucho_grammar = nltk.parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
  """)
    tokens = "I shot an elephant in my pajamas".split()
    wfst0 = _chart_init_wfst(tokens, groucho_grammar)
    _chart_display(wfst0, tokens)
    wfst1 = _chart_complete_wfst(wfst0, tokens, groucho_grammar, trace=True)
    _chart_display(wfst1, tokens)
예제 #28
0
 def __init__(self, blackboard, tense="present", person=3):
     super(MetaphoreExpert, self).__init__(blackboard,
                                           "Metaphore Expert",
                                           tense=tense,
                                           person=person,
                                           importance=2)
     self.grammar = nltk.parse_cfg("""
         S -> Person BE LIKE NP 
         NP -> Det JJ N | Det N
         Person -> 'person'
         JJ -> 'adj'
         N -> 'n'
         Det -> 'the'
         BE -> 'be'
         LIKE -> 'like'
         """)
예제 #29
0
def chart_parse(in_file, grammar_file, out_file):
    text = unicode(open(in_file, 'r').read(), errors='ignore')
    output = open(out_file, 'w')
    grammar_string = unicode(open(grammar_file, 'r').read(), errors='ignore')
    try:
        grammar = nltk.parse_cfg(grammar_string)
        parser = nltk.ChartParser(grammar)
        sentences = nltk.sent_tokenize(text)
        for sentence in sentences:
            words = nltk.word_tokenize(sentence)
            tree = parser.parse(words)
            output.write(tree.pprint())
            output.write('\n')
    except Exception, e:
        message = "Error with parsing. Check the input files are correct and the grammar contains every word in the input sequence. \n----\n" + str(e)
        sys.stderr.write(message)
        sys.exit()
예제 #30
0
def another_test():
    grammar = nltk.parse_cfg("""
S -> NP VP
NP -> 'DT' 'NN'
VP -> 'VB' | 'VBP'
VP -> 'VB' 'NN'
""")

    # Make your POS sentence into a list of tokens.
    sentence = "DT NN VB NN".split(" ")

    # Load the grammar into the ChartParser.
    cp = nltk.ChartParser(grammar)

    # Generate and print the nbest_parse from the grammar given the sentence tokens.
    for tree in cp.nbest_parse(sentence):
        print(tree)
def simpleGrammar():

    grammar1 = nltk.parse_cfg("""
       S -> NP VP
       VP -> V NP | V NP PP
       PP -> P NP
       V -> "saw" | "ate" | "walked"
       NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
       Det -> "a" | "an" | "the" | "my"
       N -> "man" | "dog" | "cat" | "telescope" | "park"
       P -> "in" | "on" | "by" | "with"
       """)

    sent = "Mary saw Bob".split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for tree in rd_parser.nbest_parse(sent):
        print tree
예제 #32
0
def process2(s):
    tokens = nltk.word_tokenize(s)
    tagged = nltk.pos_tag(tokens)

    grammar = nltk.parse_cfg("""
		S -> NP VP
		PP -> P NP
		NP -> Det N | Det N PP | 'I'
		VP -> V NP | VP PP
 		Det -> 'an' | 'my'
 		N -> 'elephant' | 'pajamas'
		V -> 'shot'
		P -> 'in'
	""")
    parser = nltk.ChartParser(grammar)
    trees = parser.nbest_parse(tagged)
    return trees
예제 #33
0
def chart_parsing():
    groucho_grammar = nltk.parse_cfg(
        """
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
  """
    )
    tokens = "I shot an elephant in my pajamas".split()
    wfst0 = _chart_init_wfst(tokens, groucho_grammar)
    _chart_display(wfst0, tokens)
    wfst1 = _chart_complete_wfst(wfst0, tokens, groucho_grammar, trace=True)
    _chart_display(wfst1, tokens)
def ambiguity():

    groucho_grammar = nltk.parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)

    sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
    parser = nltk.ChartParser(groucho_grammar)
    trees = parser.nbest_parse(sent)
    for tree in trees:
        print tree
예제 #35
0
def sentence_parse_example():
    groucho_grammar = nltk.parse_cfg(
        """
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
  """
    )
    sent = ["I", "shot", "an", "elephant", "in", "my", "pajamas"]
    parser = nltk.ChartParser(groucho_grammar)
    trees = parser.nbest_parse(sent)
    for tree in trees:
        print tree
def simpleGrammar():

    grammar1 = nltk.parse_cfg(
        """
       S -> NP VP
       VP -> V NP | V NP PP
       PP -> P NP
       V -> "saw" | "ate" | "walked"
       NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
       Det -> "a" | "an" | "the" | "my"
       N -> "man" | "dog" | "cat" | "telescope" | "park"
       P -> "in" | "on" | "by" | "with"
       """
    )

    sent = "Mary saw Bob".split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for tree in rd_parser.nbest_parse(sent):
        print tree
def recursiveSyntacticStructure():

    grammar2 = nltk.parse_cfg("""
    S  -> NP VP
    NP -> Det Nom | PropN
    Nom -> Adj Nom | N
    VP -> V Adj | V NP | V S | V NP PP
    PP -> P NP
    PropN -> 'Buster' | 'Chatterer' | 'Joe'
    Det -> 'the' | 'a'
    N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
    Adj  -> 'angry' | 'frightened' |  'little' | 'tall'
    V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'
    P -> 'on'
    """)

    rd_parser = nltk.RecursiveDescentParser(grammar1)
    sent = 'Mary saw a dog'.split()
    for t in rd_parser.nbest_parse(sent):
        print t
예제 #38
0
def parsing_types():
    grammar = nltk.parse_cfg("""
    S -> NP VP
    VP -> V NP | V NP PP
    PP -> P NP
    V -> "saw" | "ate" | "walked"
    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
    Det -> "a" | "an" | "the" | "my"
    N -> "man" | "dog" | "cat" | "telescope" | "park"
    P -> "in" | "on" | "by" | "with"
  """)
    sent = "Mary saw a dog".split()
    rd_parser = nltk.RecursiveDescentParser(grammar)
    print "==== recursive descent ===="
    for t in rd_parser.nbest_parse(sent):
        print t
    sr_parser = nltk.ShiftReduceParser(grammar)
    print "==== shift reduce ===="
    for t in sr_parser.nbest_parse(sent):
        print t
예제 #39
0
파일: cky.py 프로젝트: dqd/heap
def cky(sentence, verbose):
    """
    Perform the syntactic analysis on the sentence. The function uses
    the Cocke–Kasami–Younger algorithm. If a word in the sentence is
    not recognized, the function terminate the program with an error
    message.

    @param sentence: any correctly formed Czech sentence.
    @type sentence: String
    @param verbose: verbose output.
    @type verbose: Bool
    """
    if verbose:
        print "Lexical analysis..."

    lexical = []

    for word in nltk.tokenize.WordPunctTokenizer().tokenize(utfize(sentence)):
        classes = morph(word)

        if not classes:
            print "Error: the word '%s' is not recognized by the morphological analyser Ajka." % word
            sys.exit(1)

        lexical.append((word, classes))

    if verbose:
        for l in lexical:
            print "%s: %s" % (l[0], u", ".join(l[1]))

        print "\nLoading the grammar..."

    try:
        f = open(GRAMMAR_FILE, "r")

        grammar = nltk.parse_cfg(f.read())

        f.close()
    except IOError, err:
        print "Error: %s." % err
        sys.exit(1)
예제 #40
0
파일: cfgtest.py 프로젝트: mwhite/compling
def test(grammarText, sentences):
	"""Test the coverage of a CFG grammar.
	
	grammarText -- the grammar string
	sentences -- a list of sentences to test, with invalid ones prefixed with '*'
	
	"""	
	
	valid_sentences = [s for s in sentences if s[0] != '*']
	invalid_sentences = [s[1:] for s in sentences if s[0] == '*']
	parser = ChartParser(parse_cfg(grammarText))

	for sentence in valid_sentences:
		parses = parser.nbest_parse(sentence.split())
		print(sentence + "\n" + "\n".join(map(str, parses)) + "\n")
		assert parses, "Valid sentence failed to parse."

	for sentence in invalid_sentences:
		print("*" + sentence)
		parses = parser.nbest_parse(sentence.split())
		assert parses == [], "Invalid sentence parsed successfully."
예제 #41
0
파일: run.py 프로젝트: myrlund/tdt4275-nlp
def ex_a():
    from part2_cfg import simple_grammar
    grammar = nltk.parse_cfg(simple_grammar)

    parsers = (nltk.RecursiveDescentParser,
               nltk.ShiftReduceParser,
               nltk.EarleyChartParser,
               nltk.BottomUpChartParser)

    for parser_class in parsers:
        print "Testing parser:", parser_class.__name__
        parser = parser_class(grammar, trace=2)

        i = "a man saw a man with a cat"
        trees = parser.nbest_parse(i.split())

        print "\n%d trees matches input '%s':" % (len(trees), i)
        for tree in trees:
            print tree

        print ""
예제 #42
0
def simple_cfg():
    #  grammar = nltk.parse_cfg("""
    #    S -> NP VP
    #    VP -> V NP | V NP PP
    #    PP -> P NP
    #    V -> "saw" | "ate" | "walked"
    #    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
    #    Det -> "a" | "an" | "the" | "my"
    #    N -> "man" | "dog" | "cat" | "telescope" | "park"
    #    P -> "in" | "on" | "by" | "with"
    #  """)
    # also can load grammar from text file
    # grammar = nltk.data.load("file:mygrammar.cfg")
    grammar = nltk.parse_cfg(
        """
    S -> NP VP
    NP -> Det Nom | PropN
    Nom -> Adj Nom | N
    VP -> V Adj | V NP | V S | V NP PP
    PP -> P NP
    PropN -> 'Buster' | 'Chatterer' | 'Joe'
    Det -> 'the' | 'a'
    N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
    Adj -> 'angry' | 'frightened' | 'little' | 'tall'
    V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put'
    P -> 'on'
  """
    )
    #  sent = "Mary saw Bob".split()
    # structural ambiguity - 2 parse trees for this.
    # prepositional phrase attach ambiguity.
    #  sent = "the dog saw a man in a park".split()
    # For second grammar
    #  sent = "the angry bear chased the frightened little squirrel".split()
    sent = "Chatterer said Buster thought the tree was tall".split()
    #  rd_parser = nltk.RecursiveDescentParser(grammar, trace=2) # for debug
    # NOTE: production rules need to be right-recursive, ie X -> Y X
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for tree in rd_parser.nbest_parse(sent):
        print tree
예제 #43
0
def cfg_demo():
    """
    A demonstration showing how C{ContextFreeGrammar}s can be created and used.
    """

    from nltk import nonterminals, Production, parse_cfg

    # Create some nonterminals
    S, NP, VP, PP = nonterminals('S, NP, VP, PP')
    N, V, P, Det = nonterminals('N, V, P, Det')
    VP_slash_NP = VP / NP

    print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP / NP]
    print '    S.symbol() =>', ` S.symbol() `
    print

    print Production(S, [NP])

    # Create some Grammar Productions
    grammar = parse_cfg("""
      S -> NP VP
      PP -> P NP
      NP -> Det N | NP PP
      VP -> V NP | VP PP
      Det -> 'a' | 'the'
      N -> 'dog' | 'cat'
      V -> 'chased' | 'sat'
      P -> 'on' | 'in'
    """)

    print 'A Grammar:', ` grammar `
    print '    grammar.start()       =>', ` grammar.start() `
    print '    grammar.productions() =>',
    # Use string.replace(...) is to line-wrap the output.
    print ` grammar.productions() `.replace(',', ',\n' + ' ' * 25)
    print

    print 'Coverage of input words by a grammar:'
    print grammar.covers(['a', 'dog'])
    print grammar.covers(['a', 'toy'])
예제 #44
0
def translate2(q, tname='T'):
    global T2, T3, T4, T5, T6, GR

    T2 = time.time()

    # tokenization
    l = tokenize(q)
    tokens = [a[1] for a in l]
    assert (tokens[0] == '//')
    T3 = time.time()

    # build grammar
    GR = grammar_text
    for typ, t in l:
        if typ == 's':
            GR += "Qname -> '" + t + "'\n"
    grammar = parse_cfg(GR)
    parser = parse.ChartParser(grammar, parse.TD_STRATEGY)
    T4 = time.time()

    # chart-parse the query
    trees = parser.nbest_parse(tokens)
    if not trees:
        T5 = T6 = time.time()
        return None, None
    tree = trees[0]
    T5 = time.time()

    # translate the parse tree
    r = Trans(tree, SerialNumber(), tname=tname).getSql()
    T6 = time.time()

    try:
        r1 = TransFlat(tree, SerialNumber(), tname=tname).getSql()
    except:
        r1 = None

    r1 = TransFlat(tree, SerialNumber(), tname=tname).getSql()
    return r, r1
예제 #45
0
def translate2(q,tname='T'):
    global T2, T3, T4, T5, T6, GR
    
    T2 = time.time()

    # tokenization
    l = tokenize(q)
    tokens = [a[1] for a in l]
    assert(tokens[0] == '//')
    T3 = time.time()

    # build grammar
    GR = grammar_text
    for typ, t in l:
        if typ == 's':
            GR += "Qname -> '" + t + "'\n"
    grammar = parse_cfg(GR)
    parser = parse.ChartParser(grammar, parse.TD_STRATEGY)
    T4 = time.time()

    # chart-parse the query
    trees = parser.nbest_parse(tokens)
    if not trees:
        T5 = T6 = time.time()
        return None, None
    tree = trees[0]
    T5 = time.time()

    # translate the parse tree
    r = Trans(tree,SerialNumber(),tname=tname).getSql()
    T6 = time.time()

    try:
        r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql()
    except:
        r1 = None
   
    r1 = TransFlat(tree,SerialNumber(),tname=tname).getSql()
    return r, r1
예제 #46
0
def parsing_types():
    grammar = nltk.parse_cfg(
        """
    S -> NP VP
    VP -> V NP | V NP PP
    PP -> P NP
    V -> "saw" | "ate" | "walked"
    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
    Det -> "a" | "an" | "the" | "my"
    N -> "man" | "dog" | "cat" | "telescope" | "park"
    P -> "in" | "on" | "by" | "with"
  """
    )
    sent = "Mary saw a dog".split()
    rd_parser = nltk.RecursiveDescentParser(grammar)
    print "==== recursive descent ===="
    for t in rd_parser.nbest_parse(sent):
        print t
    sr_parser = nltk.ShiftReduceParser(grammar)
    print "==== shift reduce ===="
    for t in sr_parser.nbest_parse(sent):
        print t
	def test_returnRelevantTuples_1(self):
		# arrange
		testGrammar = """
S -> NP VP

VP -> VP PP
VP -> V NP
VP -> 'eats'

PP -> P NP

NP -> Det N
NP -> 'she'

V -> 'eats'

P -> 'with'

N -> 'fish'
N -> 'fork'

Det -> 'a'
"""
		grammar = nltk.parse_cfg(testGrammar)

		sent = ['she', 'eats', 'a', 'fish', 'with', 'a', 'fork']

		inst = cyk.Cyk(sent, grammar.productions())

		# act		
		pairs = inst.getAcceptablePairs(0, 1)

		# assert
		self.assertTrue(len(pairs) == 2)
		self.assertTrue(pairs[0][0] == "NP")
		self.assertTrue(pairs[0][1] == "VP")

		self.assertTrue(pairs[1][0] == "NP")
		self.assertTrue(pairs[1][1] == "V")
def recursiveSyntacticStructure():

    grammar2 = nltk.parse_cfg(
        """
    S  -> NP VP
    NP -> Det Nom | PropN
    Nom -> Adj Nom | N
    VP -> V Adj | V NP | V S | V NP PP
    PP -> P NP
    PropN -> 'Buster' | 'Chatterer' | 'Joe'
    Det -> 'the' | 'a'
    N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
    Adj  -> 'angry' | 'frightened' |  'little' | 'tall'
    V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'
    P -> 'on'
    """
    )

    rd_parser = nltk.RecursiveDescentParser(grammar1)
    sent = "Mary saw a dog".split()
    for t in rd_parser.nbest_parse(sent):
        print t
예제 #49
0
def simple_cfg():
    #  grammar = nltk.parse_cfg("""
    #    S -> NP VP
    #    VP -> V NP | V NP PP
    #    PP -> P NP
    #    V -> "saw" | "ate" | "walked"
    #    NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
    #    Det -> "a" | "an" | "the" | "my"
    #    N -> "man" | "dog" | "cat" | "telescope" | "park"
    #    P -> "in" | "on" | "by" | "with"
    #  """)
    # also can load grammar from text file
    # grammar = nltk.data.load("file:mygrammar.cfg")
    grammar = nltk.parse_cfg("""
    S -> NP VP
    NP -> Det Nom | PropN
    Nom -> Adj Nom | N
    VP -> V Adj | V NP | V S | V NP PP
    PP -> P NP
    PropN -> 'Buster' | 'Chatterer' | 'Joe'
    Det -> 'the' | 'a'
    N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
    Adj -> 'angry' | 'frightened' | 'little' | 'tall'
    V -> 'chased' | 'saw' | 'said' | 'thought' | 'was' | 'put'
    P -> 'on'
  """)
    #  sent = "Mary saw Bob".split()
    # structural ambiguity - 2 parse trees for this.
    # prepositional phrase attach ambiguity.
    #  sent = "the dog saw a man in a park".split()
    # For second grammar
    #  sent = "the angry bear chased the frightened little squirrel".split()
    sent = "Chatterer said Buster thought the tree was tall".split()
    #  rd_parser = nltk.RecursiveDescentParser(grammar, trace=2) # for debug
    # NOTE: production rules need to be right-recursive, ie X -> Y X
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for tree in rd_parser.nbest_parse(sent):
        print tree
예제 #50
0
def app():
    """
    Create a recursive descent parser demo, using a simple grammar and
    text.
    """
    from nltk import parse_cfg
    grammar = parse_cfg("""
    # Grammatical productions.
        S -> NP VP
        NP -> Det N PP | Det N
        VP -> V NP PP | V NP | V
        PP -> P NP
    # Lexical productions.
        NP -> 'I'
        Det -> 'the' | 'a'
        N -> 'man' | 'park' | 'dog' | 'telescope'
        V -> 'ate' | 'saw'
        P -> 'in' | 'under' | 'with'
    """)

    sent = 'the dog saw a man in the park'.split()

    RecursiveDescentApp(grammar, sent).mainloop()
예제 #51
0
def demo():
    """
    A demonstration of the shift-reduce parser.
    """

    from nltk import parse, parse_cfg

    grammar = parse_cfg("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | V NP PP
    PP -> P NP
    NP -> 'I'
    N -> 'man' | 'park' | 'telescope' | 'dog'
    Det -> 'the' | 'a'
    P -> 'in' | 'with'
    V -> 'saw'
    """)

    sent = 'I saw a man in the park'.split()

    parser = parse.ShiftReduceParser(grammar, trace=2)
    for p in parser.nbest_parse(sent):
        print(p)
예제 #52
0
# to run this script, please use Python's interactive mode, i.e.
# python -i nltk_chartparser_app.py 

import nltk

words = ["I", "shot", "an", "elephant", "in", "my", "pajamas"]

grammar = nltk.parse_cfg("""
 S -> NP VP
 PP -> P NP
 NP -> Det N | 'I' | NP PP
 VP -> V NP | VP PP
 Det -> 'an' | 'my'
 N -> 'elephant' | 'pajamas'
 V -> 'shot'
 P -> 'in'
 """)

nltk.app.chartparser_app.ChartParserApp(grammar, words)
예제 #53
0
@author: coco wang
@license: Apache Licence 
@contact: [email protected]
@site: 
@software: PyCharm
@file: D17.py
@time: 2018/1/17 0017 上午 9:37
"""
# 分析句子结构
import nltk

groucho_grammar = nltk.parse_cfg("""
... S -> NP VP
... PP -> P NP
... NP -> Det N | Det N PP | 'I'
... VP -> V NP | VP PP
... Det -> 'an' | 'my'
... N -> 'elephant' | 'pajamas'
... V -> 'shot'
... P -> 'in'
... """)

sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
parser = nltk.ChartParser(groucho_grammar)
trees = parser.nbest_parse(sent)
for tree in trees:
    print(tree)

sr_parse = nltk.ShiftReduceParser(groucho_grammar)
sent = 'Mary saw a dog'.split()
print(sr_parse.parse(sent))
예제 #54
0
print tree

nltk.draw.tree.TreeView(tree)
raw_input()

# --------------------------- OWN CFG -------------------------------- #

#sent = word_tokenize(sent)

tags = pos_tag(sent, 1)
print tags
raw_input()

own_grammar = nltk.parse_cfg("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP
VP -> V NP | VP PP
NP -> 'NP'
VP -> 'VP'
PP -> 'P'
""")

rd_parser = nltk.RecursiveDescentParser(own_grammar)

for p in rd_parser.nbest_parse(sent):
    print p
    nltk.draw.tree.TreeView(p)
    raw_input()
예제 #55
0
  element -> """ + _orjoin(elements) + """
  digit -> """ + _orjoin(digits) + """
  phase -> """ + _orjoin(phases) + """
  number -> digit | digit number
  group -> suffixed | suffixed group
  paren_group_round -> '(' group ')'
  paren_group_square -> '[' group ']'
  plus_minus -> '+' | '-'
  number_suffix -> number
  ion_suffix -> '^' number plus_minus | '^' plus_minus
  suffix -> number_suffix | number_suffix ion_suffix | ion_suffix
  unsuffixed -> element | paren_group_round | paren_group_square

  suffixed -> unsuffixed | unsuffixed suffix
"""
parser = nltk.ChartParser(nltk.parse_cfg(grammar))


def _clean_parse_tree(tree):
    ''' The parse tree contains a lot of redundant
    nodes. E.g. paren_groups have groups as children, etc. This will
    clean up the tree.
    '''
    def unparse_number(n):
        ''' Go from a number parse tree to a number '''
        if len(n) == 1:
            rv = n[0][0]
        else:
            rv = n[0][0] + unparse_number(n[1])
        return rv
예제 #56
0
import nltk

#a simple context-free grammar
grammar1 = nltk.parse_cfg("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | DEt N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
""")
sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)
for tree in rd_parser.nbest_parse(sent):
    print tree

#create my own grammar, grammar can be written on an independent file
#grammar1 = nltk.data.load('file:mygrammar.cfg')
#...
예제 #57
0
import nltk

grammar = nltk.parse_cfg(open("grammar.cfg"))
parser = nltk.ChartParser(grammar)


def parse(sentence):
    sentence = nltk.word_tokenize(sentence)
    return parser.nbest_parse(sentence)[0]


if __name__ == "__main__":
    print parse("the man saw the dog with the telescope")
예제 #58
0
from nltk import pos_tag
from nltk.tokenize import wordpunct_tokenize
from collections import OrderedDict
import random

funct_dict = OrderedDict({})

grammar1 = nltk.parse_cfg("""
    Sent  -> NP VP | NP VP END
    NP -> Det Nom | PropN | Det NP | N | PR | PR Nom
    Nom -> Adj Nom | N
    VP -> V Adj | V NP | V S | V NP PP | V Prep NP | V | V CC V
    PP -> Prep NP

    PropN -> 'NNP' | 'NNPS'
    Det -> 'DT'
    N -> 'NN' | 'NNS'
    Adj  -> 'JJ' | 'JJR' |  'JJS'
    V ->  'VB'  | 'VBD' | 'VBG' | 'VBN' | 'VBP' | 'VBZ'
    Prep -> 'TO' | 'IN'
    CC -> 'CC'
    PR -> 'PRP' | 'PRP$'
    RB -> 'RB' | 'RBR' | 'RBS'
    END -> '.' | '?' | '!'
    """)


def add_func_to_dict(name=None):
    def wrapper(func):
        function_name = name
        if function_name is None:
            function_name = func.__name__
예제 #59
0
from nltk import parse_cfg
from nltk import parse
from nltk import Tree

grammar = parse_cfg('''
   S     -> WHO QP QM | WHICH Nom QP QM
   QP    -> VP | DO NP T
   VP    -> I | T NP | BE A | BE NP | VP AND VP
   NP    -> P | AR Nom | Nom
   Nom   -> AN | AN Rel
   AN    -> N | A AN
   Rel   -> WHO VP | NP T
   N     -> "Ns" | "Np"
   I    -> "Is" | "Ip"
   T    -> "Ts" | "Tp"
   A     -> "A"
   P     -> "P"
   BE    -> "BEs" | "BEp"
   DO    -> "DOs" | "DOp"
   AR    -> "AR"
   WHO   -> "WHO"
   WHICH -> "WHICH"
   AND   -> "AND"
   QM    -> "?"
   ''')

chartpsr = parse.ChartParser(grammar)


def all_parses(wlist, lx):