Beispiel #1
0
 def binarize(cls, tree):
     # collapse
     t = Tree.fromstring(tree)
     # chomsky normal form transformation
     Tree.collapse_unary(t, collapsePOS=True, collapseRoot=True)
     Tree.chomsky_normal_form(t)
     s = cls._format(t)
     return s
Beispiel #2
0
def collectSymbols(sample, cnf=False):
    def dfs(root, sample):
        if root not in sample:
            return Tree(root, [])
        else:
            return Tree(root, [dfs(child, sample) for child in sample[root]])

    tree = dfs("ROOT", sample)
    if cnf:
        Tree.chomsky_normal_form(tree)

    out = set()
    for production in tree.productions():
        lhs, rhs = production.lhs(), production.rhs()
        if rhs:
            for nonterminal in rhs:
                out.add(re.sub(r"-\d+", '', nonterminal.symbol()))

    return out
Beispiel #3
0
 def extract_CNF_rules(self):
     for i in range(len(self.all_lines)):
         t = Tree.fromstring(self.all_lines[i])
         Tree.chomsky_normal_form(t)
         self.CNF_rules += t.productions()
 def __binarize__(const_t_nltk: Tree):
     # chomsky normal form transformation
     Tree.chomsky_normal_form(const_t_nltk)