Ejemplo n.º 1
0
def cfg():
    g=nltk.CFG.fromstring("""
        n -> '旗下'|'下'
        o -> 'RESUMEDOCSSTARTFLAG'
    """)
    nltk.RecursiveDescentParser(g)
    sent=['旗下','RESUMEDOCSSTARTFLAG']
    rd_parser = nltk.RecursiveDescentParser(g)       #递归下降解析器  
    for tree in rd_parser.parse(sent):  
        print(tree)     
Ejemplo n.º 2
0
def parse_maverick_command(command):
    """ Parse Maverick Command text."""

    # extract new words and numbers
    words = set(
        [match.group(0) for match in re.finditer(r"[a-zA-Z]+", command)])
    numbers = set([match.group(0) for match in re.finditer(r"\d+", command)])

    # Make a local copy of productions
    local_maverick_productions = list(maverickRecognizerProductions)

    # Add a production for every words and number
    local_maverick_productions.extend(
        [literal_production("WORD", word) for word in words])
    local_maverick_productions.extend(
        [literal_production("NUMBER", number) for number in numbers])

    # Make a local copy of the grammar with extra productions
    local_maverick_grammar = CFG(maverickRecognizerGrammar.start(),
                                 local_maverick_productions)

    # Load grammar into a mavericzk_NLU_parser
    maverick_nlu_parser = nltk.RecursiveDescentParser(local_maverick_grammar)

    command_tokens = command.split()

    return maverick_nlu_parser.parse(command_tokens)
Ejemplo n.º 3
0
def cfgMatch ( nlQuery ):
    terminalList = [ 'find','search','display','tell','faculty','student','staff','other' ]
    grammar = nltk.parse_cfg("""
                    S -> A B
                    A -> 'find'|'search'|'display'|'tell'
                    B -> 'faculty'|'student'|'staff'|'other'
                    """)
# Since grammar crashes if a non term not in grammar is used.
# We have to check and report error if such a word is used anywhere
##################################################################
# Check and errors reporting here
    tokenizedList = list( word_tokenize( nlQuery  ) )
    for word in tokenizedList:
        if word not in terminalList:
            print "ERROR"
            return -1
##################################################################
    parser = nltk.RecursiveDescentParser ( grammar )
    parseTree = parser.nbest_parse ( tokenizedList, 1 )

    for tree in parseTree:
        print tree
        for elem in tree:
            for i in tree.node:
                print i
def customGrammar():

    grammar1 = nltk.data.load('file:mygrammar.cfg')
    sent = "Mary saw Bob".split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for tree in rd_parser.nbest_parse(sent):
        print tree
Ejemplo n.º 5
0
def check_validity(sentence):
    sent_split = sent.split()
    rd_parser = nltk.RecursiveDescentParser(load_grammar)
    for tree_struc in rd_parser.parse(sent_split):
        s = str(tree_struc)
        return s
    return False
Ejemplo n.º 6
0
def meth2():
    grammar = nltk.parse()
    sent = "My life is brilliant. I saw an angle of that I'm sure. She smiled at me on the subway.".split(
    )
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for p in rd_parser.nbest_parse(sent):
        print(p)
Ejemplo n.º 7
0
def test_grammar(grammar, sentences):
    grammar1 = nltk.data.load("file:{}".format(grammar))
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    for i, sent in enumerate(sentences):
        print("Satz {}: {}".format(i, sent))
        for tree in rd_parser.parse(sent.split()):
            tree.draw()  # oder tree.pretty_print()
Ejemplo n.º 8
0
def parse(text):
    sent = word_tokenize(text)
    grammar = nltk.data.load('file:PSgrammar.cfg')
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for tree in rd_parser.parse(sent):
        print(tree)
        tree.draw()
Ejemplo n.º 9
0
def syntactic_filter_fast(sentences, bot_dict):
    u"""Only sentences with an underlying structure
    matching a given content-free grammar are passed
    along for random selection. Filters responses
    through looking up the part of speech for input words
    in a local lexicon and recursively mapping
    phrase structures."""
    output_sentences = []
    print "Before syntax filter there were " + str(
        len(sentences)) + " sentences."
    for sentence in sentences:
        print "=================="
        print str(sentence) + "\n"
        tokens = nltk.tokenize.wordpunct_tokenize(sentence)
        justTags = []
        # print self.pos_lexicon_word_pos
        for word in tokens[:-1]:
            tag = random.choice(bot_dict[word])
            justTags.append(tag)
        justTags.append(tokens[-1])
        print str(justTags) + "\n"
        rd_parser = nltk.RecursiveDescentParser(grammar1)
        try:
            if len(rd_parser.nbest_parse(justTags)) > 0:
                output_sentences.append(sentence)
        except ValueError:
            pass
    print "After the syntax filter there were " + str(
        len(output_sentences)) + " sentences."
    print output_sentences
    return output_sentences
Ejemplo n.º 10
0
def syntactic_filter(sentences, bot_dict):
    u"""Only sentences with an underlying structure
    matching a given content-free grammar are passed
    along for random selection. Filters responses
    through part of speech tagging and
    recursive structure lookup."""
    output_sentences = []
    print "Before syntax filter there were " + str(
        len(sentences)) + " sentences."
    for sentence in sentences:
        print "=================="
        print str(sentence) + "\n"
        tokens = nltk.tokenize.wordpunct_tokenize(sentence)
        posTagged = nltk.pos_tag(tokens)
        justTags = []
        for word, tag in posTagged:
            justTags.append(tag)
        print str(justTags) + "\n"
        rd_parser = nltk.RecursiveDescentParser(grammar1)
        try:
            if len(rd_parser.nbest_parse(justTags)) > 0:
                output_sentences.append(sentence)
        except ValueError:
            pass
    print "After the syntax filter there were " + str(
        len(output_sentences)) + " sentences."
    print output_sentences
    return output_sentences
Ejemplo n.º 11
0
 def top_down(self, grammar, text):
     print("Text : {}".format(text))
     sent = text.split()
     rd_parser = nltk.RecursiveDescentParser(grammar)
     for tree in rd_parser.parse(sent):
         print(tree)
     print("------------------")
Ejemplo n.º 12
0
def main():

    initialize()

    # sentence = "you give us information about your location"
    # sentence = "you give information honestly and immediately"
    sentence = "we collecting and storing information permanently"
    # sentence = "you thinking and we are collecting information honestly"
    # sentence = "All work and no play makes jack dull boy"
    # sentence = "we may also collect technical information to help us identify your device for fraud prevention and diagnostic purposes"

    labeled_sentence = do_labeling(sentence, labels.dictionary)
    sentence_dictionary = sentence_to_dictionary(
        labeled_sentence, dictionary_to_bag(labels.dictionary))
    nltk_grammar = combine_grammars((
        convert_grammar(grammar.glob),
        convert_dictionary(sentence_dictionary),
    ))

    print(f"\n # ==================== #\n"
          f" # NLTK GRAMMAR IS      #\n"
          f" # ==================== #\n\n"
          f"{nltk_grammar}")

    binary_grammar = nltk.CFG.fromstring(nltk_grammar)
    rd_parser = nltk.RecursiveDescentParser(binary_grammar)

    print("\n # ===================== #\n"
          " # SENTENCES TREES ARE   #\n"
          " # ===================== #\n")

    labeled_tokens = labeled_sentence.split()
    for tree in rd_parser.parse(labeled_tokens):
        print(tree)
Ejemplo n.º 13
0
def parse_maverick_command(command, i):
    """ Parse Maverick Command text."""

    # extract new words and numbers with other characters such as "," and "."
    words = set([
        match.group(0) for match in re.finditer(
            r'''(['()""\w.]+|\.+|\?+|\,+|\!+|\:+|\;+|\$?\d+(\.\d+)?%?)''',
            command)
    ])
    numbers = set(
        [match.group(0) for match in re.finditer("[-+]?\d+[\.]?\d*", command)])
    # Make a local copy of productions

    if (i == 5):
        local_maverick_productions = list(maverickRecognizerProductions5)
    elif (i == 4):
        local_maverick_productions = list(maverickRecognizerProductions4)
    elif (i == 3):
        local_maverick_productions = list(maverickRecognizerProductions3)
    elif (i == 2):
        local_maverick_productions = list(maverickRecognizerProductions2)
    elif (i == 1):
        local_maverick_productions = list(maverickRecognizerProductions1)

    # Add a production for every words and number
    local_maverick_productions.extend(
        [literal_production("WORD", word) for word in words])
    local_maverick_productions.extend(
        [literal_production("NUMBER", number) for number in numbers])
    # Make a local copy of the grammar with extra productions
    if (i == 5):
        local_maverick_grammar = CFG(maverickRecognizerGrammar5.start(),
                                     local_maverick_productions)
    elif i == 4:
        local_maverick_grammar = CFG(maverickRecognizerGrammar4.start(),
                                     local_maverick_productions)
    elif i == 3:
        local_maverick_grammar = CFG(maverickRecognizerGrammar3.start(),
                                     local_maverick_productions)
    elif i == 2:
        local_maverick_grammar = CFG(maverickRecognizerGrammar2.start(),
                                     local_maverick_productions)
    else:
        local_maverick_grammar = CFG(maverickRecognizerGrammar1.start(),
                                     local_maverick_productions)

    # Load grammar into a mavericzk_NLU_parser
    maverick_nlu_parser = nltk.RecursiveDescentParser(local_maverick_grammar)

    command_tokens = command.split()

    try:
        ss = maverick_nlu_parser.parse(command_tokens)
        if not ss is None:
            return ss
        else:
            return "Note solved"
    except Exception as e:
        return traceback.format_exception(*sys.exc_info())
Ejemplo n.º 14
0
def validate_lexicon(grammar, lexicon, ignored_tags):
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for entry in lexicon:
        marker_list = [field.tag for field in entry if field.tag not in ignored_tags]
        if list(rd_parser.parse(marker_list)):
            print('+', ':'.join(marker_list))
        else:
            print('-', ':'.join(marker_list))
Ejemplo n.º 15
0
def run():
    grammar = nltk.CFG.fromstring(grammar_cfg)
    parser = nltk.RecursiveDescentParser(grammar)

    text = "John ate a cat"

    for tree in parser.parse(text.split()):
        print(tree)
Ejemplo n.º 16
0
def parse(s):
    grammar = """
    S -> 'a' S 'b' | 'a' 'b' 
    """        
    grammar = nltk.CFG.fromstring(grammar)
    s_tok = tokenize(s.strip())
    parser = nltk.RecursiveDescentParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Ejemplo n.º 17
0
def parse(tokens):
    grammar1 = nltk.data.load('file:grammar/mygrammar_v3.cfg')
    rd_parser = nltk.RecursiveDescentParser(grammar1)

    trees = rd_parser.parse(tokens)
    print "No. of parse trees = " + str(len(trees))
    for i in range(len(trees)):
        query_tree = trees[i]
        print query_tree
        query_tree.draw()
Ejemplo n.º 18
0
 def make_tree_from_sent(self, sentence=None, grammar=None):
     if sentence is None:
         sentence = self.sentence
     if grammar is None:
         grammar = self.grammar
     # parsing sentence
     rd_parser = nltk.RecursiveDescentParser(grammar)
     for p in rd_parser.parse(sentence):
         tree = p
         return tree
Ejemplo n.º 19
0
def validate_lexicon(grammar, lexicon, ignored_tags):
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for entry in lexicon:
        marker_list = [
            field.tag for field in entry if field.tag not in ignored_tags
        ]
        if rd_parser.nbest_parse(marker_list):
            print "+", ':'.join(marker_list)  # [_accepted-entries]
        else:
            print "-", ':'.join(marker_list)  # [_rejected-entries]
Ejemplo n.º 20
0
def grammar_score(transcript):
    """
	This is the evaluation function - it need a grammar and the speech you want 
	to evaluate.
	"""
    cfg_string1 = """
	S -> NPS VP | 'IN' NPS VP | 'DT' NPS VP | 'EX' NPS VP
	NPS -> NP | NP 'CC' NP | 'WRB' NP
	NP -> Pronoun | ProperNoun | Det| Det Nominal | Nominal
	Pronoun -> 'WP' | 'WP$' | 'PRP' | 'PRP$'
	ProperNoun -> 'NNP' | 'NNPS'
	Det -> 'CD' | 'DT' | 'WDT' | 'PDT' | 'TO'
	Nominal -> 'NN' | 'NNS'
	VP -> Verb | Verb NPS | Verb NPS PP | Verb PP | Verb VP | toVerb Adverb | Verb Adj | Verb NPS 'VBG'
	Verb -> 'MD' 'VB' | 'VBD' | 'VBN' | 'VBP' | 'VBZ' | 'VBG'
	Adverb -> 'RB'| 'RBS' | 'RBR'
	toVerb -> 'TO' Verb
	Adj -> 'JJ' | 'JJR' | Det 'JJS'
	PP -> Preposition NPS
	Preposition -> 'IN'
	"""
    grammar = nltk.CFG.fromstring(cfg_string1)

    rdp = nltk.RecursiveDescentParser(grammar)
    correct = 0
    incorrect = 0

    # get the sentences by themselves
    sentences = []
    sentence = []
    for tag in [
            word_tup[1]
            for word_tup in nltk.pos_tag(nltk.word_tokenize(transcript))
    ]:
        if tag in [',', "'", '!', '?', ':', "" '``' "", "\'\'"]:
            pass
        elif tag == 'RP':
            sentence.append('TO')
        elif tag != '.':
            sentence.append(tag)
        else:
            sentences.append(sentence)
            sentence = []
    if len(sentence) != 0:
        sentences.append(sentence)

    for sentence in sentences:
        parsed = rdp.parse(sentence)
        if len(list(parsed)) > 0:
            correct += 1
            for tree in rdp.parse(sentence):
                print(tree)
        else:
            incorrect += 1
    return correct / (correct + incorrect)
Ejemplo n.º 21
0
def recursive_descent_parser(grammar, sentence, trace=2):
    """ recursive_descent_parser takes grammar and sentence as input and 
    parses the sentence according to the grammar using recursive descent parsing technique.
    
    """
    # Loads the Recursive Descent Parser with the grammar provided
    rdp = nltk.RecursiveDescentParser(grammar, trace=trace)
    # Parses the sentence and outputs a parse tree based on the grammar
    parse = rdp.parse(sentence.split())
    #t = next(parse)
    return next(parse)
Ejemplo n.º 22
0
def parse(s):
    grammar = """
    S -> '0' S |'1' S1 | '0'
    S1 -> '0' S2 |'1' S | '1'
    S2 -> '0' S1 |'1' S2
    """
    grammar = nltk.CFG.fromstring(grammar)
    s_tok = tokenize(s.strip())
    parser = nltk.RecursiveDescentParser(grammar)
    tree = [t for t in parser.parse(s_tok)][:1]
    return tree
Ejemplo n.º 23
0
 def __init__(self, name, chars, max_length, grammar_str: str):
     super().__init__(name, chars, max_length)
     self._grammar = CFG.fromstring(grammar_str)
     self._parser = nltk.RecursiveDescentParser(self._grammar)
     # then, some initializations for random generation.
     self._prod = dict(
     )  # mapping: non-terminal symbols --> list of productions
     for production in self._grammar.productions():
         if production.lhs() in self._prod:
             self._prod[production.lhs()].append(production.rhs())
         else:
             self._prod[production.lhs()] = [production.rhs()]
Ejemplo n.º 24
0
 def build_parser(self):
     grammar_str = """
     S -> N V O
     N -> {}
     V -> {}
     O -> {}
     """.format(" | ".join(['"' + x + '"' for x in self.nouns]),
                " | ".join(['"' + x + '"' for x in self.verbs]),
                " | ".join(['"' + x + '"' for x in self.objects]))
     grammar = nltk.CFG.fromstring(grammar_str)
     parser = nltk.RecursiveDescentParser(grammar)
     return parser
Ejemplo n.º 25
0
def check(txt):
    sent = txt.split()
    rd_parser = nltk.RecursiveDescentParser(grammar1)
    print("111")
    try:
        print("222")
        for tree in rd_parser.parse(sent):
            print("333")
            print(tree)
            return True
    except:
        print("444")
        pass
Ejemplo n.º 26
0
def checkGrammar(txt):
    # this grammar checking function is adapted from https://www.nltk.org/book/ch08.html
    grammarStr = generateMyGrammar()
    grammar2 = nltk.CFG.fromstring(grammarStr)
    # below cited from python nltk module https://www.nltk.org/book/ch08.html
    sent = txt.split()
    rd_parser = nltk.RecursiveDescentParser(grammar2)
    try:
        for tree in rd_parser.parse(sent):
            print(tree)
            return True
    except:
        pass
    def parse(self, sentence):
        grammar = generate_grammar(sentence)
        self.recursive_np_chunker = nltk.RecursiveDescentParser(grammar)

        res = self.chunker.parse(sentence)
        print "initial: \n%s\n" % res.pprint()

        current = res
        last = None
        while current != last:
            last = current
            current = self.recursive_np_chunker.parse(last)
            print "intermediate: \n%s\n" % current.pprint()

        print "final: \n%s\n" % current.pprint()
        return current
Ejemplo n.º 28
0
def get_syntax_tree(text, tokens):
    grouped_tokens = group_tokens(tokens)
    rules = add_tokens_to_rules(grouped_tokens, RULES)

    # Descomentar as duas linhas abaixo para fazer debug, em caso o programa não consiga processar a árvore sintáctica.
    # print(rules)
    # print(tokens)

    grammar = nltk.CFG.fromstring(rules)

    sent = text.split()
    rd_parser = nltk.RecursiveDescentParser(grammar)
    for tree in rd_parser.parse(sent):
        print("Tree: ")
        print(tree)
        break
Ejemplo n.º 29
0
    def structure(self):
        """
		:return: the grammatical structure of this sentence
		"""
        if True:
            rd_parser = nltk.RecursiveDescentParser(grammar1)
            for tree in rd_parser.parse([w.word for w in self.children]):
                print(tree)
        else:
            from nltk.data import find
            model_dir = find('models/bllip_wsj_no_aux').path
            print('Loading BLLIP Parsing models...')
            bllip = BllipParser(model_dir)
            print('Done.')
            tree = bllip.parse_one([w.word for w in self.children])
            print tree
Ejemplo n.º 30
0
def job2(words, depth):
	grammar = nltk.CFG.fromstring("""
		S -> NP VP
		VP -> VBD NP | VBD NP PP
		PP -> IN NP
		NP -> DT NN | DT NN PP
		DT -> "the" | "a"
		NN -> "boy" | "dog" | "rod"
		VBD -> "saw"
		IN -> "with"
		""")
	words = nltk.word_tokenize("the boy saw the dog with a rod")
	tags = nltk.pos_tag(words)
	rd_parser = nltk.RecursiveDescentParser(grammar)
	for tree in rd_parser.parse(words):
		print(tree)