def cfg(): g=nltk.CFG.fromstring(""" n -> '旗下'|'下' o -> 'RESUMEDOCSSTARTFLAG' """) nltk.RecursiveDescentParser(g) sent=['旗下','RESUMEDOCSSTARTFLAG'] rd_parser = nltk.RecursiveDescentParser(g) #递归下降解析器 for tree in rd_parser.parse(sent): print(tree)
def parse_maverick_command(command): """ Parse Maverick Command text.""" # extract new words and numbers words = set( [match.group(0) for match in re.finditer(r"[a-zA-Z]+", command)]) numbers = set([match.group(0) for match in re.finditer(r"\d+", command)]) # Make a local copy of productions local_maverick_productions = list(maverickRecognizerProductions) # Add a production for every words and number local_maverick_productions.extend( [literal_production("WORD", word) for word in words]) local_maverick_productions.extend( [literal_production("NUMBER", number) for number in numbers]) # Make a local copy of the grammar with extra productions local_maverick_grammar = CFG(maverickRecognizerGrammar.start(), local_maverick_productions) # Load grammar into a mavericzk_NLU_parser maverick_nlu_parser = nltk.RecursiveDescentParser(local_maverick_grammar) command_tokens = command.split() return maverick_nlu_parser.parse(command_tokens)
def cfgMatch ( nlQuery ): terminalList = [ 'find','search','display','tell','faculty','student','staff','other' ] grammar = nltk.parse_cfg(""" S -> A B A -> 'find'|'search'|'display'|'tell' B -> 'faculty'|'student'|'staff'|'other' """) # Since grammar crashes if a non term not in grammar is used. # We have to check and report error if such a word is used anywhere ################################################################## # Check and errors reporting here tokenizedList = list( word_tokenize( nlQuery ) ) for word in tokenizedList: if word not in terminalList: print "ERROR" return -1 ################################################################## parser = nltk.RecursiveDescentParser ( grammar ) parseTree = parser.nbest_parse ( tokenizedList, 1 ) for tree in parseTree: print tree for elem in tree: for i in tree.node: print i
def customGrammar(): grammar1 = nltk.data.load('file:mygrammar.cfg') sent = "Mary saw Bob".split() rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.nbest_parse(sent): print tree
def check_validity(sentence): sent_split = sent.split() rd_parser = nltk.RecursiveDescentParser(load_grammar) for tree_struc in rd_parser.parse(sent_split): s = str(tree_struc) return s return False
def meth2(): grammar = nltk.parse() sent = "My life is brilliant. I saw an angle of that I'm sure. She smiled at me on the subway.".split( ) rd_parser = nltk.RecursiveDescentParser(grammar) for p in rd_parser.nbest_parse(sent): print(p)
def test_grammar(grammar, sentences): grammar1 = nltk.data.load("file:{}".format(grammar)) rd_parser = nltk.RecursiveDescentParser(grammar1) for i, sent in enumerate(sentences): print("Satz {}: {}".format(i, sent)) for tree in rd_parser.parse(sent.split()): tree.draw() # oder tree.pretty_print()
def parse(text): sent = word_tokenize(text) grammar = nltk.data.load('file:PSgrammar.cfg') rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.parse(sent): print(tree) tree.draw()
def syntactic_filter_fast(sentences, bot_dict): u"""Only sentences with an underlying structure matching a given content-free grammar are passed along for random selection. Filters responses through looking up the part of speech for input words in a local lexicon and recursively mapping phrase structures.""" output_sentences = [] print "Before syntax filter there were " + str( len(sentences)) + " sentences." for sentence in sentences: print "==================" print str(sentence) + "\n" tokens = nltk.tokenize.wordpunct_tokenize(sentence) justTags = [] # print self.pos_lexicon_word_pos for word in tokens[:-1]: tag = random.choice(bot_dict[word]) justTags.append(tag) justTags.append(tokens[-1]) print str(justTags) + "\n" rd_parser = nltk.RecursiveDescentParser(grammar1) try: if len(rd_parser.nbest_parse(justTags)) > 0: output_sentences.append(sentence) except ValueError: pass print "After the syntax filter there were " + str( len(output_sentences)) + " sentences." print output_sentences return output_sentences
def syntactic_filter(sentences, bot_dict): u"""Only sentences with an underlying structure matching a given content-free grammar are passed along for random selection. Filters responses through part of speech tagging and recursive structure lookup.""" output_sentences = [] print "Before syntax filter there were " + str( len(sentences)) + " sentences." for sentence in sentences: print "==================" print str(sentence) + "\n" tokens = nltk.tokenize.wordpunct_tokenize(sentence) posTagged = nltk.pos_tag(tokens) justTags = [] for word, tag in posTagged: justTags.append(tag) print str(justTags) + "\n" rd_parser = nltk.RecursiveDescentParser(grammar1) try: if len(rd_parser.nbest_parse(justTags)) > 0: output_sentences.append(sentence) except ValueError: pass print "After the syntax filter there were " + str( len(output_sentences)) + " sentences." print output_sentences return output_sentences
def top_down(self, grammar, text): print("Text : {}".format(text)) sent = text.split() rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.parse(sent): print(tree) print("------------------")
def main(): initialize() # sentence = "you give us information about your location" # sentence = "you give information honestly and immediately" sentence = "we collecting and storing information permanently" # sentence = "you thinking and we are collecting information honestly" # sentence = "All work and no play makes jack dull boy" # sentence = "we may also collect technical information to help us identify your device for fraud prevention and diagnostic purposes" labeled_sentence = do_labeling(sentence, labels.dictionary) sentence_dictionary = sentence_to_dictionary( labeled_sentence, dictionary_to_bag(labels.dictionary)) nltk_grammar = combine_grammars(( convert_grammar(grammar.glob), convert_dictionary(sentence_dictionary), )) print(f"\n # ==================== #\n" f" # NLTK GRAMMAR IS #\n" f" # ==================== #\n\n" f"{nltk_grammar}") binary_grammar = nltk.CFG.fromstring(nltk_grammar) rd_parser = nltk.RecursiveDescentParser(binary_grammar) print("\n # ===================== #\n" " # SENTENCES TREES ARE #\n" " # ===================== #\n") labeled_tokens = labeled_sentence.split() for tree in rd_parser.parse(labeled_tokens): print(tree)
def parse_maverick_command(command, i): """ Parse Maverick Command text.""" # extract new words and numbers with other characters such as "," and "." words = set([ match.group(0) for match in re.finditer( r'''(['()""\w.]+|\.+|\?+|\,+|\!+|\:+|\;+|\$?\d+(\.\d+)?%?)''', command) ]) numbers = set( [match.group(0) for match in re.finditer("[-+]?\d+[\.]?\d*", command)]) # Make a local copy of productions if (i == 5): local_maverick_productions = list(maverickRecognizerProductions5) elif (i == 4): local_maverick_productions = list(maverickRecognizerProductions4) elif (i == 3): local_maverick_productions = list(maverickRecognizerProductions3) elif (i == 2): local_maverick_productions = list(maverickRecognizerProductions2) elif (i == 1): local_maverick_productions = list(maverickRecognizerProductions1) # Add a production for every words and number local_maverick_productions.extend( [literal_production("WORD", word) for word in words]) local_maverick_productions.extend( [literal_production("NUMBER", number) for number in numbers]) # Make a local copy of the grammar with extra productions if (i == 5): local_maverick_grammar = CFG(maverickRecognizerGrammar5.start(), local_maverick_productions) elif i == 4: local_maverick_grammar = CFG(maverickRecognizerGrammar4.start(), local_maverick_productions) elif i == 3: local_maverick_grammar = CFG(maverickRecognizerGrammar3.start(), local_maverick_productions) elif i == 2: local_maverick_grammar = CFG(maverickRecognizerGrammar2.start(), local_maverick_productions) else: local_maverick_grammar = CFG(maverickRecognizerGrammar1.start(), local_maverick_productions) # Load grammar into a mavericzk_NLU_parser maverick_nlu_parser = nltk.RecursiveDescentParser(local_maverick_grammar) command_tokens = command.split() try: ss = maverick_nlu_parser.parse(command_tokens) if not ss is None: return ss else: return "Note solved" except Exception as e: return traceback.format_exception(*sys.exc_info())
def validate_lexicon(grammar, lexicon, ignored_tags): rd_parser = nltk.RecursiveDescentParser(grammar) for entry in lexicon: marker_list = [field.tag for field in entry if field.tag not in ignored_tags] if list(rd_parser.parse(marker_list)): print('+', ':'.join(marker_list)) else: print('-', ':'.join(marker_list))
def run(): grammar = nltk.CFG.fromstring(grammar_cfg) parser = nltk.RecursiveDescentParser(grammar) text = "John ate a cat" for tree in parser.parse(text.split()): print(tree)
def parse(s): grammar = """ S -> 'a' S 'b' | 'a' 'b' """ grammar = nltk.CFG.fromstring(grammar) s_tok = tokenize(s.strip()) parser = nltk.RecursiveDescentParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def parse(tokens): grammar1 = nltk.data.load('file:grammar/mygrammar_v3.cfg') rd_parser = nltk.RecursiveDescentParser(grammar1) trees = rd_parser.parse(tokens) print "No. of parse trees = " + str(len(trees)) for i in range(len(trees)): query_tree = trees[i] print query_tree query_tree.draw()
def make_tree_from_sent(self, sentence=None, grammar=None): if sentence is None: sentence = self.sentence if grammar is None: grammar = self.grammar # parsing sentence rd_parser = nltk.RecursiveDescentParser(grammar) for p in rd_parser.parse(sentence): tree = p return tree
def validate_lexicon(grammar, lexicon, ignored_tags): rd_parser = nltk.RecursiveDescentParser(grammar) for entry in lexicon: marker_list = [ field.tag for field in entry if field.tag not in ignored_tags ] if rd_parser.nbest_parse(marker_list): print "+", ':'.join(marker_list) # [_accepted-entries] else: print "-", ':'.join(marker_list) # [_rejected-entries]
def grammar_score(transcript): """ This is the evaluation function - it need a grammar and the speech you want to evaluate. """ cfg_string1 = """ S -> NPS VP | 'IN' NPS VP | 'DT' NPS VP | 'EX' NPS VP NPS -> NP | NP 'CC' NP | 'WRB' NP NP -> Pronoun | ProperNoun | Det| Det Nominal | Nominal Pronoun -> 'WP' | 'WP$' | 'PRP' | 'PRP$' ProperNoun -> 'NNP' | 'NNPS' Det -> 'CD' | 'DT' | 'WDT' | 'PDT' | 'TO' Nominal -> 'NN' | 'NNS' VP -> Verb | Verb NPS | Verb NPS PP | Verb PP | Verb VP | toVerb Adverb | Verb Adj | Verb NPS 'VBG' Verb -> 'MD' 'VB' | 'VBD' | 'VBN' | 'VBP' | 'VBZ' | 'VBG' Adverb -> 'RB'| 'RBS' | 'RBR' toVerb -> 'TO' Verb Adj -> 'JJ' | 'JJR' | Det 'JJS' PP -> Preposition NPS Preposition -> 'IN' """ grammar = nltk.CFG.fromstring(cfg_string1) rdp = nltk.RecursiveDescentParser(grammar) correct = 0 incorrect = 0 # get the sentences by themselves sentences = [] sentence = [] for tag in [ word_tup[1] for word_tup in nltk.pos_tag(nltk.word_tokenize(transcript)) ]: if tag in [',', "'", '!', '?', ':', "" '``' "", "\'\'"]: pass elif tag == 'RP': sentence.append('TO') elif tag != '.': sentence.append(tag) else: sentences.append(sentence) sentence = [] if len(sentence) != 0: sentences.append(sentence) for sentence in sentences: parsed = rdp.parse(sentence) if len(list(parsed)) > 0: correct += 1 for tree in rdp.parse(sentence): print(tree) else: incorrect += 1 return correct / (correct + incorrect)
def recursive_descent_parser(grammar, sentence, trace=2): """ recursive_descent_parser takes grammar and sentence as input and parses the sentence according to the grammar using recursive descent parsing technique. """ # Loads the Recursive Descent Parser with the grammar provided rdp = nltk.RecursiveDescentParser(grammar, trace=trace) # Parses the sentence and outputs a parse tree based on the grammar parse = rdp.parse(sentence.split()) #t = next(parse) return next(parse)
def parse(s): grammar = """ S -> '0' S |'1' S1 | '0' S1 -> '0' S2 |'1' S | '1' S2 -> '0' S1 |'1' S2 """ grammar = nltk.CFG.fromstring(grammar) s_tok = tokenize(s.strip()) parser = nltk.RecursiveDescentParser(grammar) tree = [t for t in parser.parse(s_tok)][:1] return tree
def __init__(self, name, chars, max_length, grammar_str: str): super().__init__(name, chars, max_length) self._grammar = CFG.fromstring(grammar_str) self._parser = nltk.RecursiveDescentParser(self._grammar) # then, some initializations for random generation. self._prod = dict( ) # mapping: non-terminal symbols --> list of productions for production in self._grammar.productions(): if production.lhs() in self._prod: self._prod[production.lhs()].append(production.rhs()) else: self._prod[production.lhs()] = [production.rhs()]
def build_parser(self): grammar_str = """ S -> N V O N -> {} V -> {} O -> {} """.format(" | ".join(['"' + x + '"' for x in self.nouns]), " | ".join(['"' + x + '"' for x in self.verbs]), " | ".join(['"' + x + '"' for x in self.objects])) grammar = nltk.CFG.fromstring(grammar_str) parser = nltk.RecursiveDescentParser(grammar) return parser
def check(txt): sent = txt.split() rd_parser = nltk.RecursiveDescentParser(grammar1) print("111") try: print("222") for tree in rd_parser.parse(sent): print("333") print(tree) return True except: print("444") pass
def checkGrammar(txt): # this grammar checking function is adapted from https://www.nltk.org/book/ch08.html grammarStr = generateMyGrammar() grammar2 = nltk.CFG.fromstring(grammarStr) # below cited from python nltk module https://www.nltk.org/book/ch08.html sent = txt.split() rd_parser = nltk.RecursiveDescentParser(grammar2) try: for tree in rd_parser.parse(sent): print(tree) return True except: pass
def parse(self, sentence): grammar = generate_grammar(sentence) self.recursive_np_chunker = nltk.RecursiveDescentParser(grammar) res = self.chunker.parse(sentence) print "initial: \n%s\n" % res.pprint() current = res last = None while current != last: last = current current = self.recursive_np_chunker.parse(last) print "intermediate: \n%s\n" % current.pprint() print "final: \n%s\n" % current.pprint() return current
def get_syntax_tree(text, tokens): grouped_tokens = group_tokens(tokens) rules = add_tokens_to_rules(grouped_tokens, RULES) # Descomentar as duas linhas abaixo para fazer debug, em caso o programa não consiga processar a árvore sintáctica. # print(rules) # print(tokens) grammar = nltk.CFG.fromstring(rules) sent = text.split() rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.parse(sent): print("Tree: ") print(tree) break
def structure(self): """ :return: the grammatical structure of this sentence """ if True: rd_parser = nltk.RecursiveDescentParser(grammar1) for tree in rd_parser.parse([w.word for w in self.children]): print(tree) else: from nltk.data import find model_dir = find('models/bllip_wsj_no_aux').path print('Loading BLLIP Parsing models...') bllip = BllipParser(model_dir) print('Done.') tree = bllip.parse_one([w.word for w in self.children]) print tree
def job2(words, depth): grammar = nltk.CFG.fromstring(""" S -> NP VP VP -> VBD NP | VBD NP PP PP -> IN NP NP -> DT NN | DT NN PP DT -> "the" | "a" NN -> "boy" | "dog" | "rod" VBD -> "saw" IN -> "with" """) words = nltk.word_tokenize("the boy saw the dog with a rod") tags = nltk.pos_tag(words) rd_parser = nltk.RecursiveDescentParser(grammar) for tree in rd_parser.parse(words): print(tree)