def binarize(cls, tree): # collapse t = Tree.fromstring(tree) # chomsky normal form transformation Tree.collapse_unary(t, collapsePOS=True, collapseRoot=True) Tree.chomsky_normal_form(t) s = cls._format(t) return s
def collectSymbols(sample, cnf=False): def dfs(root, sample): if root not in sample: return Tree(root, []) else: return Tree(root, [dfs(child, sample) for child in sample[root]]) tree = dfs("ROOT", sample) if cnf: Tree.chomsky_normal_form(tree) out = set() for production in tree.productions(): lhs, rhs = production.lhs(), production.rhs() if rhs: for nonterminal in rhs: out.add(re.sub(r"-\d+", '', nonterminal.symbol())) return out
def extract_CNF_rules(self): for i in range(len(self.all_lines)): t = Tree.fromstring(self.all_lines[i]) Tree.chomsky_normal_form(t) self.CNF_rules += t.productions()
def __binarize__(const_t_nltk: Tree): # chomsky normal form transformation Tree.chomsky_normal_form(const_t_nltk)