def make_sub_tree(span): ret = ConstTree("X") ret.word_span = span if span[1] - span[0] == 1: return wrap_word(span) else: return ret
def random_merge(node): children = node.child for child_node in children: if isinstance(child_node, ConstTree): random_merge(child_node) else: assert len(children) == 1 while len(children) > 2: idx = random_obj.randint(0, len(children) - 2) tree_a = children[idx] tree_b = children[idx + 1] new_tree = ConstTree("X") new_tree.word_span = (tree_a.word_span[0], tree_b.word_span[1]) new_tree.child = [tree_a, tree_b] children[idx] = new_tree children.pop(idx + 1)
def wrap_word(span): ret = ConstTree("X") ret.word_span = span ret.child.append(words[span[0]]) return ret
def fuzzy_cfg(cfg, names): random_obj = Random(45) spans = {i[0] for i in names} words = list(cfg.generate_words()) def wrap_word(span): ret = ConstTree("X") ret.word_span = span ret.child.append(words[span[0]]) return ret def make_sub_tree(span): ret = ConstTree("X") ret.word_span = span if span[1] - span[0] == 1: return wrap_word(span) else: return ret sub_trees = [make_sub_tree(i) for i in spans] sub_trees.sort(key=lambda x: x.word_span[1] - x.word_span[0], reverse=True) top_trees = [] while len(sub_trees) > 1: this_tree = sub_trees[-1] parent_tree = None for other_tree in sub_trees[:-1]: if span_overlap(this_tree.word_span, other_tree.word_span): if parent_tree is None or span_overlap(other_tree.word_span, parent_tree.word_span): parent_tree = other_tree if parent_tree is None: top_trees.append(this_tree) else: parent_tree.child.append(this_tree) sub_trees.pop() if len(sub_trees) == 0: root = sub_trees[0] if root.word_span[1] - root.word_span[0] != len(words): new_root = ConstTree("X") new_root.child.append(root) root = new_root else: root = ConstTree("X") root.word_span = (0, len(words)) root.child = sub_trees def sort_and_fill_blank(node): if not node.child: node.child = [ wrap_word((i, i + 1)) for i in range(*node.word_span) ] elif isinstance(node.child[0], ConstTree): node.child.sort(key=lambda x: x.word_span) new_child_list = [] for i in range(node.word_span[0], node.child[0].word_span[0]): new_child_list.append(wrap_word((i, i + 1))) for child_node, next_child_node in zip_longest( node.child, node.child[1:]): new_child_list.append(child_node) end = next_child_node.word_span[ 0] if next_child_node is not None else node.word_span[1] for i in range(child_node.word_span[1], end): new_child_list.append(wrap_word((i, i + 1))) origin_children = node.child node.child = new_child_list for child in origin_children: sort_and_fill_blank(child) sort_and_fill_blank(root) def random_merge(node): children = node.child for child_node in children: if isinstance(child_node, ConstTree): random_merge(child_node) else: assert len(children) == 1 while len(children) > 2: idx = random_obj.randint(0, len(children) - 2) tree_a = children[idx] tree_b = children[idx + 1] new_tree = ConstTree("X") new_tree.word_span = (tree_a.word_span[0], tree_b.word_span[1]) new_tree.child = [tree_a, tree_b] children[idx] = new_tree children.pop(idx + 1) random_merge(root) root.populate_spans_internal() return root