Esempio n. 1
0
 def make_sub_tree(span):
     ret = ConstTree("X")
     ret.word_span = span
     if span[1] - span[0] == 1:
         return wrap_word(span)
     else:
         return ret
Esempio n. 2
0
 def random_merge(node):
     children = node.child
     for child_node in children:
         if isinstance(child_node, ConstTree):
             random_merge(child_node)
         else:
             assert len(children) == 1
     while len(children) > 2:
         idx = random_obj.randint(0, len(children) - 2)
         tree_a = children[idx]
         tree_b = children[idx + 1]
         new_tree = ConstTree("X")
         new_tree.word_span = (tree_a.word_span[0], tree_b.word_span[1])
         new_tree.child = [tree_a, tree_b]
         children[idx] = new_tree
         children.pop(idx + 1)
Esempio n. 3
0
 def wrap_word(span):
     ret = ConstTree("X")
     ret.word_span = span
     ret.child.append(words[span[0]])
     return ret
Esempio n. 4
0
def fuzzy_cfg(cfg, names):
    random_obj = Random(45)
    spans = {i[0] for i in names}
    words = list(cfg.generate_words())

    def wrap_word(span):
        ret = ConstTree("X")
        ret.word_span = span
        ret.child.append(words[span[0]])
        return ret

    def make_sub_tree(span):
        ret = ConstTree("X")
        ret.word_span = span
        if span[1] - span[0] == 1:
            return wrap_word(span)
        else:
            return ret

    sub_trees = [make_sub_tree(i) for i in spans]
    sub_trees.sort(key=lambda x: x.word_span[1] - x.word_span[0], reverse=True)

    top_trees = []
    while len(sub_trees) > 1:
        this_tree = sub_trees[-1]
        parent_tree = None
        for other_tree in sub_trees[:-1]:
            if span_overlap(this_tree.word_span, other_tree.word_span):
                if parent_tree is None or span_overlap(other_tree.word_span,
                                                       parent_tree.word_span):
                    parent_tree = other_tree
        if parent_tree is None:
            top_trees.append(this_tree)
        else:
            parent_tree.child.append(this_tree)
        sub_trees.pop()

    if len(sub_trees) == 0:
        root = sub_trees[0]
        if root.word_span[1] - root.word_span[0] != len(words):
            new_root = ConstTree("X")
            new_root.child.append(root)
            root = new_root
    else:
        root = ConstTree("X")
        root.word_span = (0, len(words))
        root.child = sub_trees

    def sort_and_fill_blank(node):
        if not node.child:
            node.child = [
                wrap_word((i, i + 1)) for i in range(*node.word_span)
            ]
        elif isinstance(node.child[0], ConstTree):
            node.child.sort(key=lambda x: x.word_span)
            new_child_list = []
            for i in range(node.word_span[0], node.child[0].word_span[0]):
                new_child_list.append(wrap_word((i, i + 1)))
            for child_node, next_child_node in zip_longest(
                    node.child, node.child[1:]):
                new_child_list.append(child_node)
                end = next_child_node.word_span[
                    0] if next_child_node is not None else node.word_span[1]
                for i in range(child_node.word_span[1], end):
                    new_child_list.append(wrap_word((i, i + 1)))
            origin_children = node.child
            node.child = new_child_list
            for child in origin_children:
                sort_and_fill_blank(child)

    sort_and_fill_blank(root)

    def random_merge(node):
        children = node.child
        for child_node in children:
            if isinstance(child_node, ConstTree):
                random_merge(child_node)
            else:
                assert len(children) == 1
        while len(children) > 2:
            idx = random_obj.randint(0, len(children) - 2)
            tree_a = children[idx]
            tree_b = children[idx + 1]
            new_tree = ConstTree("X")
            new_tree.word_span = (tree_a.word_span[0], tree_b.word_span[1])
            new_tree.child = [tree_a, tree_b]
            children[idx] = new_tree
            children.pop(idx + 1)

    random_merge(root)
    root.populate_spans_internal()
    return root