def test1(): nt1 = Nonterminal('NP') nt2 = Nonterminal('VP') print nt1.symbol() S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') prod1 = Production(S, [NP, VP]) prod2 = Production(NP, [DT, NP]) print prod1.lhs() print prod1.rhs() print prod1 == Production(S, [NP, VP]) print prod1 == prod2
def _expand_node(self, production: nltk.Production): current_node = self.stack.pop() if production.lhs() == current_node.label(): self._append(current_node, production.rhs()) self.actions.append(production) else: self.stack.append(current_node) raise ValueError( f'Rule is not applicable: {production}, stack: {self.stack}.')
def build_tree(self, back, row, col, root): """Given a back-pointer matrix, a row/column entry point into the back-pointer matrix, and the root label, recursively builds and returns the most probable syntactic parse tree rooted at the entry point.""" a = self.index[root] # Base case - lexical productions if root in (Production.lhs(n) for n in self.terminals()): return Tree(root, [back[row + 1, row + 1, a]]) # Recursive case - nonlexical productions else: try: k, b, c = back[row, col, a] left, right = [back, row, k, b], [back, k, col, c] return Tree(root, [self.build_tree(*left), self.build_tree(*right)]) except TypeError: # In case the input is unlicensed by the PCFG return Tree(None, []) except Exception as e: raise e
""" # 非终结符 nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print((nonterminal1 == nonterminal2)) print((nonterminal2 == nonterminal3)) print((nonterminal1 == nonterminal3)) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') # 产生式 production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP, NP, PP]) print(production1.lhs(), production1.rhs()) print(production2.lhs(), production2.rhs()) print(production3.lhs(), production3.rhs()) # 语法解析 gram1 = nltk.data.load('grammars/large_grammars/atis.cfg') # print(gram1) sent = nltk.data.load('grammars/large_grammars/atis_sentences.txt') sent = nltk.parse.util.extract_test_sentences(sent) testingsent = sent[25] sent = testingsent[0] """FAQ. 递归下降分析 增量式 earley算法 通过保存增量解析步骤的结果和确保每一个解析函数在同一个输入位置只被调用一次,就可以把任意解析表达文法转化成一个Packrat Parser, 可以实现线性的时间复杂度解析,其代价是足够大量的空间占用。 形式语言->编译原理 https://zh.wikipedia.org/zh-cn/解析表达文法 人工智能NLP语法解析 https://www.evget.com/serializedetail/479
import nltk from nltk import Nonterminal, nonterminals, Production, CFG nonterminal1 = Nonterminal('NP') nonterminal2 = Nonterminal('VP') nonterminal3 = Nonterminal('PP') print(nonterminal1.symbol()) print(nonterminal2.symbol()) print(nonterminal3.symbol()) print(nonterminal1==nonterminal2) print(nonterminal2==nonterminal3) print(nonterminal1==nonterminal3) S, NP, VP, PP = nonterminals('S, NP, VP, PP') N, V, P, DT = nonterminals('N, V, P, DT') production1 = Production(S, [NP, VP]) production2 = Production(NP, [DT, NP]) production3 = Production(VP, [V, NP,NP,PP]) print(production1.lhs()) print(production1.rhs()) print(production3.lhs()) print(production3.rhs()) print(production3 == Production(VP, [V,NP,NP,PP])) print(production2 == production3)
def __init__(self, grammar): super(PCKYParser, self).__init__() self.grammar = self.load_grammar(grammar) self.index = CodeBook( {Production.lhs(p) for p in self.grammar.productions()})