Example #1
0
    def __call__(self, x):
        if not isinstance(x, Results):
            x = Results(x)

        if x.idx is None:
            n = len(any_value(x.module_output.logits))
            tree = Tree()
            batch_node = tree.create_node(tag='batch', identifier='batch')
            for i in range(n):
                inp_tree = Tree()
                inp_node = inp_tree.create_node(tag=f'inp {i}', identifier=f'inp_{i}.{self.prefix}')
                x_for_id = Results(x.module_output, i)
                out = TreeExplanation(inp_tree, inp_node, x_for_id, f'inp_{i}.')
                out = self.flow(self, x_for_id, out)
                tree.paste(batch_node.identifier, out.tree)

            if len(self.prefix) == 0:
                return tree
            return TreeExplanation(tree, start_node=batch_node, results=x, prefix=self.prefix)

        tree = Tree()
        start_node = tree.create_node(tag=self.task_name, identifier=f'inp_{x.idx}.{self.prefix}.{self.task_name}')
        out = TreeExplanation(tree, start_node=start_node, results=x, prefix=f'inp_{x.idx}.{self.prefix}')
        out = self.flow(self, x, out)

        no_new_nodes_added = (len(out.tree.nodes) == 1) and (out.start_node.identifier in out.tree)
        # if all the nodes of a nested flow are empty, remove the whole flow node.
        if no_new_nodes_added:
            out.tree = Tree()
        if len(self.prefix) == 0:
            return out.tree
        return out
Example #2
0
def DecitionTreeLearning(examples, attributes, pexamples):
    if len(examples) == 0:
        return getPlurality(pexamples)
    elif isSameClassification(examples):
        SameClassTree = Tree()
        if examples[0][len(examples[0]) - 1] == 'e':
            SameClassTree.create_node("Edible")
        else:
            SameClassTree.create_node("Poison")
        return SameClassTree
    elif len(attributes) == 0:
        return getPlurality(examples)
    else:
        node = getImportant(examples, attributes)
        tree = Tree()
        tree.create_node(str(getPropertyName(node)),
                         str(node))  #make the identifier be str
        subExamples = getSubExamples(examples, node)
        attributes.remove(node)
        for key, value in subExamples.items():
            nodeIdentifier = getPropertyName(node) + "_" + key
            tree.create_node(key, nodeIdentifier, str(node))
            subtree = DecitionTreeLearning(value, attributes, examples)
            tree.paste(nodeIdentifier, subtree)
        return tree
Example #3
0
def merge_trees(t1, t2, tick):
  t = Tree()
  identifier = -tick # using negative numbers as identifiers, positive numbers are ids for the leaf nodes
  name = "new_cluster_%s" % tick
  t.create_node(name, identifier)
  t.paste(identifier, t1)
  t.paste(identifier, t2)
  return t, name
Example #4
0
def create_tree(indexed_titles, root, children=None):
  t = Tree()
  identifier = indexed_titles[root]
  t.create_node(root, identifier)
  if children:
    for sub_tree in children:
      t.paste(identifier, sub_tree)
  return t
Example #5
0
    def test_paste_duplicate_nodes(self):
        t1 = Tree()
        t1.create_node(identifier='A')
        t2 = Tree()
        t2.create_node(identifier='A')
        t2.create_node(identifier='B', parent='A')

        with self.assertRaises(ValueError) as e:
            t1.paste('A', t2)
        self.assertEqual(e.exception.args,
                         ("Duplicated nodes ['A'] exists.", ))
Example #6
0
def absorb(t1: tl.Tree, t2: tl.Tree) -> tl.Tree:
    # work with copies.
    t1 = tl.Tree(tree=t1, deep=True)
    t2 = tl.Tree(tree=t2, deep=True)

    # reset all the identifiers:
    t1 = reset_ids(t1)
    t2 = reset_ids(t2)

    t1.paste(t1.root, t2)

    return t1
Example #7
0
def generateTree(rootFile):
    files = rootFile.load()

    rootId = rootFile.id
    rootTag = rootFile.tag()
    myTree = Tree()
    myTree.create_node(rootTag, rootId)

    for file in files:
        myTree.paste(rootId, generateTree(file))

    return myTree
def branch(partition, vertex, partidx):
    """Append a branch attached to a partition to an existing partition tree"""
    subTree = Tree()
    leaf = partition.split(vertex, partidx)
    subTree.create_node(leaf.PaintedVertices, leaf)
    if leaf.isatomic():
        return subTree

    # recurse onto children nodes to build partition tree depth first
    for v in leaf.Parts[leaf.nextsplitting()]:
        subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting()))

    return subTree
Example #9
0
 def run(self, args):
     tree = self._login_info_manager.tree()
     if not args[0] in tree:
         return
     tmp_tree = Tree()
     tmp_tree.create_node(args[0], '')
     has_leef = False
     for tree_node in tree.children(args[0]):
         tmp_tree.paste('', tree.subtree(tree_node.identifier))
         has_leef = True
     if not has_leef:
         tmp_tree.get_node('').tag = tree.get_node(args[0]).tag
     tmp_tree.show()
Example #10
0
def help_func_return(grammar, tokens, function=None):
    tree = Tree()
    return_node = Node(tag=tokens[0][1])
    tree.add_node(return_node, parent=None)

    skip_tokens = 0
    if (tokens[1][0] != ';'):
        expr_help_out = help_func_expression(grammar,
                                             tokens[1:],
                                             function=function)
        tree.paste(return_node.identifier, expr_help_out[0])
        skip_tokens = expr_help_out[1]

    return [tree, skip_tokens + 2]
Example #11
0
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree:
    # work with copies.
    t1 = tl.Tree(tree=t1, deep=True)
    t2 = tl.Tree(tree=t2, deep=True)

    # reset all the identifiers:
    t1 = reset_ids(t1)
    t2 = reset_ids(t2)

    # paste all the children of t2 into the root of t1
    for child in t2.children(t2.root):
        t1.paste(t1.root, t2.subtree(child.identifier))

    return t1
Example #12
0
        def get_bonus_tree(self):
            bonus_tree=Tree()
            doc_tree=self.get_tree()
            c_nid=''
            b_nid=''
            b_data=''
            # 寻找条件
            for nid in self.level_one:
                if '条件'in doc_tree.get_node(nid).data[0]:
                    c_nid=nid
                    break
            # 寻找优惠
            for nid in self.level_one:
                for key in self.keywords:
                    if key in doc_tree.get_node(nid).data[0]:
                        b_nid=nid
                        break
            if b_nid!='' and c_nid!='':
                for node in doc_tree.expand_tree(nid=b_nid, mode=Tree.DEPTH):
                    if node==b_nid:
                        p_nodedata=doc_tree[node].data.copy()
                        del p_nodedata[0]
                        # 把‘四.奖励标准’过滤掉
                        b_data += ','.join(p_nodedata)
                        continue
                    b_data+=','.join(doc_tree[node].data)
            #  创建优惠树
                bonus_tree.create_node(identifier='root',tag=b_data,data=b_data)
                bonus_tree.create_node('partition', 'partition', parent='root')


                # 父节点可能会有信息,因为有可能他没有序号就是一段话
                p_data=doc_tree.get_node(c_nid).data.copy()

                    # 把‘二.申请条件’和‘满足以下条件’删除
                if len(p_data)>=2:
                    if len(p_data[0])<8 :
                        del p_data[0]
                    if ':' in p_data[0]:
                        del p_data[0]
                    p_data=','.join(p_data)
                    if p_data:
                        bonus_tree.create_node(identifier=c_nid, tag=p_data, data=p_data,parent='partition')
                # 可以继续改进,这里用树的复制其实是浪费了资源
                new_tree=self.copy_tree(doc_tree.subtree(c_nid),'')
                for children in new_tree.children(''+'_'+c_nid):
                    bonus_tree.paste('partition', new_tree.subtree(children.identifier))
            return bonus_tree
    def _join_trees(self, subtree_1: Tree, subtree_2: Tree):

        top_tree = Tree()
        arg_1_span = subtree_1.root.split('-')
        arg_2_span = subtree_2.root.split('-')
        start = int(arg_1_span[0])
        end = int(arg_2_span[1])
        identifier = '{}-{}'.format(start, end)
        span = Span(height=100,
                    span=(start, end),
                    content=self._tokens[start:end],
                    constant=None)
        top_tree.create_node(identifier=identifier, data=span)
        top_tree.paste(nid=identifier, new_tree=subtree_1)
        top_tree.paste(nid=identifier, new_tree=subtree_2)
        return top_tree
Example #14
0
def run_parser(tokens,
               grammar,
               look_for_brace=False,
               root_name="program",
               clear_symbol_table=False):
    # Create dictionary of symbol tables
    global __symbol_tables
    if (clear_symbol_table):
        __symbol_tables = {}
    # Create base abstract syntax tree
    tree = Tree()
    # create root node
    root = Node(tag=root_name)
    tree.add_node(root, parent=None)
    num_tokens_to_skip = 0
    list_of_tokens = []

    for i in range(0, len(tokens)):
        if (num_tokens_to_skip > 0):
            num_tokens_to_skip -= 1
            continue

        if (look_for_brace and tokens[i][0] == "}"):
            break
        list_of_tokens.append(tokens[i])  # append token and metadata

        result = check_rules("program", list_of_tokens, grammar)
        if (result[0] > 1):  #matches more than one possible rule
            continue
        elif (result[0] == 1):  #matches one possible rule
            help_fun_tuple = help_func_manager(
                result, grammar, tokens[i - len(list_of_tokens) + 1:])
            sub_tree = help_fun_tuple[0]
            num_tokens_to_skip = help_fun_tuple[1] - len(list_of_tokens)

            tree.paste(root.identifier, sub_tree)
            #call helper function
            list_of_tokens = []
        elif (result[0] == 0):
            #matches zero rules. parser crash
            tree.show(key=lambda x: x.identifier, line_type='ascii')
            print("ERRONEOUS RESULT:", result)
            print("ERRONEOUS TOKEN LIST:", list_of_tokens)
            raise Exception(errors.ERR_NO_RULE + " '" + tokens[i][0] +
                            "' on line " + str(tokens[i][2]))

    return [tree, num_tokens_to_skip, __symbol_tables]
Example #15
0
def _random(max_depth=5, min_width=1, max_width=2, offset=(0, )):
    tree = Tree()
    root = tree.create_node(identifier=offset)
    if max_depth == 0:
        return tree
    elif max_depth == 1:
        nb = random.randint(min_width, max_width)
        for i in range(nb):
            tree.create_node(identifier=offset + (i, ), parent=offset)
    else:
        nb = random.randint(min_width, max_width)
        for i in range(nb):
            subtree = _random(max_depth=max_depth - 1,
                              max_width=max_width,
                              offset=offset + (i, ))
            tree.paste(offset, subtree)
    return tree
Example #16
0
    def test_shallow_paste(self):
        t1 = Tree()
        n1 = t1.create_node(identifier='A')

        t2 = Tree()
        n2 = t2.create_node(identifier='B')

        t3 = Tree()
        n3 = t3.create_node(identifier='C')

        t1.paste(n1.identifier, t2)
        self.assertEqual(t1.to_dict(), {'A': {'children': ['B']}})
        t1.paste(n1.identifier, t3)
        self.assertEqual(t1.to_dict(), {'A': {'children': ['B', 'C']}})

        self.assertEqual(t1.level(n1.identifier), 0)
        self.assertEqual(t1.level(n2.identifier), 1)
        self.assertEqual(t1.level(n3.identifier), 1)
Example #17
0
def create_tree(adict):
    tree = Tree()
    if type(adict) is dict:
        # print ("procdessing dict to tree...")
        root = list(adict.keys())[0]
        # print (root)
        tree.create_node(timestamp_node(root), root, data=time())
        for node in list(adict.values()):
            # print ("processing a node of the dict values")
            if type(node) is dict:
                newTree = create_tree(node)
                tree.paste(root, newTree)
            elif type(node) is list:
                for item in node:
                    newTree = create_tree(item)
                    tree.paste(root, newTree)
            else:
                tree.create_node(timestamp_node(node), node, parent=root, data=time())
    else:
        tree.create_node(timestamp_node(adict), adict, data=time())
    return tree
Example #18
0
  def crossOver(individualA, individualB):
    tree = None

    while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH:
      treeA = Tree(tree = individualA.tree, deep=True)
      treeB = Tree(tree = individualB.tree, deep=True)
      regenerate_ids(treeA)
      regenerate_ids(treeB)
      removedNode = random.choice(treeA.all_nodes())
      addedNode = random.choice(treeB.all_nodes())

      addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True)

      if treeA.root == removedNode.identifier:
        tree = addedSubtree

      else:
        parent = treeA.parent(removedNode.identifier)
        treeA.remove_subtree(removedNode.identifier)
        treeA.paste(parent.identifier, addedSubtree)
        tree = treeA

    return Individual(tree)
Example #19
0
def swap(tree):
    internalNodes = [n for n in tree.all_nodes_itr() if n.var != None]
    if len(internalNodes) == 1: return tree
    internalNodes.remove(tree[0])
    cNode = random.choice(internalNodes)
    tagc = (cNode.identifier, cNode.var, cNode.split)
    pid = cNode.bpointer
    tree1 = Tree(tree, deep=True)
    sub = tree1.remove_subtree(pid)
    tags = recurTag(sub, pid)
    tagp = tags[0]
    tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2])
    tags[0] = (tagp[0], tagc[1], tagc[2])
    string = f'{mi} swap {t}: {tags[0]}; '
    try:
        sub1 = genTree(tree[pid], tags)
    except IndexError:
        print(string + 'unswappable')
        return tree
    #rTransit = 1
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rStruct
    print(string + f'{r.round(4)}')
    if random.uniform(0, 1) < r:
        if pid > 0:
            gpid = tree[pid].bpointer
            tree1.paste(gpid, sub1)
            tree1[gpid].fpointer = sorted(tree1[gpid].fpointer)
        else:
            tree1 = sub1
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree
 def _combine_trees(self, subtree_1: Tree, subtree_2: Tree):
     subtree_2.paste(nid=subtree_2.root, new_tree=subtree_1)
     return subtree_2
Example #21
0
#     print tree[node].tag

print(sep + "Let me introduce Diane family only:")
sub_t = tree.subtree("diane")
sub_t.show()

print(sep + "Children of Diane")
for child in tree.is_branch("diane"):
    print(tree[child].tag)

print(sep + "OOhh~ new members join Jill's family:")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste("jill", new_tree)
tree.show()

print(sep + "They leave after a while:")
tree.remove_node(1)
tree.show()

print(sep + "Now Jill moves to live with Grand-x-father Harry:")
tree.move_node("jill", "harry")
tree.show()

print(sep + "A big family for George to send message to the oldest Harry:")
for node in tree.rsearch("george"):
    print(tree[node].tag)
########NEW FILE########
__FILENAME__ = folder_tree
Example #22
0
    def _follow_bps(self,
                    derivation: Derivation,
                    sentence: List[str],
                    collected_spans: List[CollectedSpan] = None):
        """Transforms back-pointers collected in CKY into a span tree."""
        if not derivation.split:  # Stopping criteria for unary derivations.
            tree = Tree()
            start = derivation.span[0]
            end = derivation.span[1]
            identifier = '{}-{}'.format(start, end)
            span = Span(span=derivation.span,
                        content=sentence[start:end],
                        constant=derivation.category)
            tree.create_node(identifier=identifier, data=span)
            if collected_spans is not None:
                collected_spans.append(
                    CollectedSpan(category=derivation.category,
                                  span=(start + 1, end + 1)))
            return tree
        # follow left back pointer
        left_tree = self._follow_bps(derivation=derivation.left_bp,
                                     sentence=sentence,
                                     collected_spans=collected_spans)
        # follow right back pointer
        right_tree = self._follow_bps(derivation=derivation.right_bp,
                                      sentence=sentence,
                                      collected_spans=collected_spans)

        # merge children to tree
        top_tree = Tree()
        left_tree_span = left_tree.root.split('-')
        right_tree_span = right_tree.root.split('-')
        start = int(left_tree_span[0])
        end = int(right_tree_span[1])
        identifier = '{}-{}'.format(start, end)
        span = Span(span=(start, end),
                    content=sentence[start:end],
                    constant=derivation.category)
        top_tree.create_node(identifier=identifier, data=span)
        top_tree.paste(nid=identifier, new_tree=left_tree)
        if derivation.middle_bp is not None:
            middle_tree = self._follow_bps(derivation=derivation.middle_bp,
                                           sentence=sentence,
                                           collected_spans=collected_spans)
            top_tree.paste(nid=identifier, new_tree=middle_tree)
        top_tree.paste(nid=identifier, new_tree=right_tree)

        if collected_spans is not None:
            if derivation.category == SPAN_LABEL:
                # Make sure both spans are not some combination of not_span
                if not derivation.middle_bp or derivation.middle_bp.category != NOT_SPAN_LABEL:
                    # add 1 to match label spans
                    collected_spans.append(
                        CollectedSpan(category=SPAN_LABEL,
                                      span=(derivation.left_bp.span[0] + 1,
                                            derivation.right_bp.span[1] + 1)))
            else:
                # add 1 to match label spans
                collected_spans.append(
                    CollectedSpan(category=derivation.category,
                                  span=(derivation.left_bp.span[0] + 1,
                                        derivation.right_bp.span[1] + 1)))
        return top_tree
    leaf = partition.split(vertex, partidx)
    subTree.create_node(leaf.PaintedVertices, leaf)
    if leaf.isatomic():
        return subTree

    # recurse onto children nodes to build partition tree depth first
    for v in leaf.Parts[leaf.nextsplitting()]:
        subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting()))

    return subTree


from treelib import Node, Tree
tree = Tree()
tree.create_node(P0.PaintedVertices, P0)  # root node

if not P0.isatomic():
    for v in P0.Parts[P0.nextsplitting()]:
        tree.paste(P0, branch(P0, v, P0.nextsplitting()))

tree.show()
for node in tree.leaves():
    # print(node.identifier.permutation())
    P = node.identifier
    sG = P.applyautomorphism()
    print(lexifyedges(sG))

# P1 = tree.leaves()[0].identifier
# p = P1.permutation()
# G1 = P1.applyautomorphism()
Example #24
0
print("#"*4 + "Let me introduce Diane family only")
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 

print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')

print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()
print('\n')

print("#"*4 + "We are sorry they are gone accidently :(")
tree.remove_node(1)
tree.show()
print('\n')

print("#"*4 + "Now Jill moves to live with Grand-x-father Harry")
tree.move_node('jill', 'harry')
tree.show()
print('\n')

print("#"*4 + "A big family for George to talk to Grand-x-father Harry")
for node in tree.rsearch('george', filter=lambda x: x != 'harry'):
    def create_ast(self, filename):
        """ Create an ast for a given file

            Arguments :
                filename : The name of the file to parse
        """

        # Create parser
        if self.is_64_bit:
            opcache = opcache_parser_64.OPcacheParser(filename)
        else:
            opcache = opcache_parser.OPcacheParser(filename)

        # Create syntax tree
        ast = Tree()
        ast.create_node("script", "script")
        ast.create_node("main_op_array", "main_op_array", parent="script")
        ast.create_node("function_table", "function_table", parent="script")
        ast.create_node("class_table", "class_table", parent="script")

        # Get main structures
        main_op_array = opcache['script']['main_op_array']
        functions = opcache['script']['function_table']['buckets']
        classes = opcache['script']['class_table']['buckets']

        # Main OP array
        for idx, opcode in enumerate(main_op_array['opcodes']):
            opcode = OPcode(str(idx), opcode, main_op_array, opcache, self.is_64_bit)
            ast.paste("main_op_array", opcode)

        # Function Table
        for function in functions:

            # Create function node
            function_name = function['key']['val']
            function_id = function_name + "_function"
            ast.create_node(function_name, function_id, parent="function_table")

            # Iterate over opcodes
            op_array = function['val']['op_array']
            for idx, opcode in enumerate(op_array['opcodes']):
                opcode = OPcode(str(idx), opcode, op_array, opcache, self.is_64_bit)
                ast.paste(function_id, opcode)

        # Class Table
        for class_ in classes:

            # Check for real classes
            if class_['val']['u1']['type'] == IS_PTR:

                # Create class node
                class_name = class_['key']['val']
                class_id = class_name + "_class"
                ast.create_node(class_name, class_id, parent="class_table")

                # Function Table
                for function in class_['val']['class']['function_table']['buckets']:

                    # Create function node
                    function_name = function['key']['val']
                    class_function_id = function_name + "_class_function"
                    ast.create_node(function_name, class_function_id, parent=class_id)

                    # Iterate over opcodes
                    for idx, opcode in enumerate(function['val']['op_array']['opcodes']):
                        opcode = OPcode(str(idx), opcode, function['val']['op_array'], opcache)
                        ast.paste(class_function_id, opcode)


        return ast
Example #26
0
    def create_ast(self, filename):
        """ Create an ast for a given file

            Arguments :
                filename : The name of the file to parse
        """

        # Create parser
        opcache = OPcacheParser(filename)

        # Create syntax tree
        ast = Tree()
        ast.create_node("script", "script")
        ast.create_node("main_op_array", "main_op_array", parent="script")
        ast.create_node("function_table", "function_table", parent="script")
        ast.create_node("class_table", "class_table", parent="script")

        # Get main structures
        main_op_array = opcache['script']['main_op_array']
        functions = opcache['script']['function_table']['buckets']
        classes = opcache['script']['class_table']['buckets']

        # Main OP array
        for idx, opcode in enumerate(main_op_array['opcodes']):
            opcode = OPcode(str(idx), opcode, main_op_array, opcache)
            ast.paste("main_op_array", opcode)

        # Function Table
        for function in functions:

            # Create function node
            function_name = function['key']['val']
            function_id = function_name + "_function"
            ast.create_node(function_name,
                            function_id,
                            parent="function_table")

            # Iterate over opcodes
            op_array = function['val']['op_array']
            for idx, opcode in enumerate(op_array['opcodes']):
                opcode = OPcode(str(idx), opcode, op_array, opcache)
                ast.paste(function_id, opcode)

        # Class Table
        for class_ in classes:

            # Check for real classes
            if class_['val']['u1']['type'] == IS_PTR:

                # Create class node
                class_name = class_['key']['val']
                class_id = class_name + "_class"
                ast.create_node(class_name, class_id, parent="class_table")

                # Function Table
                for function in class_['val']['class']['function_table'][
                        'buckets']:

                    # Create function node
                    function_name = function['key']['val']
                    class_function_id = function_name + "_class_function"
                    ast.create_node(function_name,
                                    class_function_id,
                                    parent=class_id)

                    # Iterate over opcodes
                    for idx, opcode in enumerate(
                            function['val']['op_array']['opcodes']):
                        opcode = OPcode(str(idx), opcode,
                                        function['val']['op_array'], opcache)
                        ast.paste(class_function_id, opcode)

        return ast
Example #27
0
from treelib import Tree, Node
if __name__ == '__main__':
    # 树的创建,每个节点都有唯一的identifier作为标记,可以手动指定
    tree = Tree()
    # 增加树的节点,tag是树输出时的显示,identifier是唯一标志,根节点可以不指定父
    tree.create_node(tag='root', identifier='root', data=0)
    tree.create_node(tag='1_child',
                     identifier='1_child',
                     data=1,
                     parent='root')
    tree.create_node(tag='2_child',
                     identifier='2_child',
                     data=2,
                     parent='root')
    tree.create_node(tag='3_child',
                     identifier='3_child',
                     data=3,
                     parent='1_child')

    # 树的粘贴,需要注意的是这个nid是tree的identifier,不是tree2的
    tree2 = Tree()
    tree2.create_node(tag='tutu', identifier='tutu', data=0)
    tree.paste(nid='root', new_tree=tree2)

    # 删除树的节点
    tree.remove_node('tutu')
    # 移动树的节点
    tree.move_node('3_child', 'root')
    # 打印树的结构
    tree.show()
Example #28
0
def analyze(token):
    def getNext():
        i = 0
        while True:
            yield i
            i += 1

    import pandas as pd
    from collections import deque

    from treelib import Tree, Node
    import re

    tokens = []
    for l in token:
        s = re.split(r' ', l)
        tokens.append(s[0])
    tokens.append('$')
    test = deque(tokens)
    rr = {
        'r1': 'E=E+T',
        'r2': 'E=T',
        'r3': 'T=T*F',
        'r4': 'T=F',
        'r5': 'F=(E)',
        'r6': 'F=i'
    }

    class CompiledError(StandardError):
        def __init__(self, arg):
            self.arg = arg

        def __str__(self):
            return self.arg

    state = [0]
    symbolic = ['$']
    ast = []
    table = pd.read_csv('table.csv', index_col=0, na_filter=False)
    ltest = len(test)
    while (len(test) != 0):
        if test[0] not in table.columns:
            raise CompiledError('%s, unrecognized token at %d' %
                                (test[0], ltest - len(test)))
        ins = table.loc[state[-1], test[0]]
        if len(ins) == 0:
            raise CompiledError('%s complie failed at %d, unexpected token' %
                                (test[0], ltest - len(test)))
        if ins[0] == 's':
            state.append(int(ins[1:]))
            tree = Tree()
            tree.create_node(test[0], getNext())
            ast.append(tree)
            symbolic.append(test.popleft())
        elif ins[0] == 'r':
            rule = rr[ins]
            print rule
            li = list(rule)
            li.reverse()
            temptree = []
            for i in li:
                if i != '=':
                    symbolic.pop()
                    state.pop()
                    temptree.append(ast.pop())
                else:
                    break
            symb = rule[:rule.find('=')]
            symbolic.append(symb)
            state.append(int(table.loc[state[-1], symbolic[-1]]))
            tree = Tree()
            ii = getNext()
            tree.create_node(symb, ii)
            for tri in temptree:
                tree.paste(ii, tri)
            ast.append(tree)
        elif ins == 'AC':
            print 'succeed'
            tree = ast.pop()
            tree.show()
            return
    raise CompiledError('%s compiled failed at %d, unexpected token' %
                        (test[0], ltest - len(test)))
Example #29
0
def help_func_funDeclaration(grammar, tokens):
    #first token is return type
    #the next token is the name
    #the third token is the (
    #sometime after that should be a
    #)
    assert (len(tokens) >= 4)

    tree = Tree()

    # organization nodes
    return_node = Node(tag="return_type")
    params_node = Node(tag="params")
    body_node = Node(tag="func_body")

    # create root node
    if (tokens[1][0] == "_start"):
        raise Exception("Function name _start is reserved for assembly")
    elif (tokens[1][0] == "main"):
        tokens[1][0] = "_start"
    func_name = tokens[1][0]
    func_root = Node(tag="func:" + func_name)
    return_type = Node(tag=tokens[0][0])
    # Create symbol subtable
    __symbol_tables[func_name] = {}

    # Assemble basic subtree
    tree.add_node(func_root, parent=None)
    tree.add_node(return_node, parent=func_root)
    tree.add_node(params_node, parent=func_root)

    tree.add_node(return_type, parent=return_node)

    # Create and add params nodes
    params = []
    var_case = 0  # 0 = empty, 1 = void, 2 = variables
    for i in range(3, len(tokens), 3):
        if (i == 3 and tokens[i][0] == 'void'):
            var_case = 1
            break
        elif (tokens[i][0] == ")"):
            break
        else:
            try:
                params.append((tokens[i][0], tokens[i + 1][0]))
                # i+0 = type
                # i+1 = name
                # i+3 = comma if it exists
                var_case = 2
            except:
                raise Exception(errors.ERR_BAD_FUNC_PAR + " '" + tokens[i][0] +
                                "' on line " + str(tokens[i][2]))
            if (tokens[i + 2][0] != ','):
                break

    for param in params:
        type_node = Node(tag=param[0])
        name_node = Node(tag=param[1])

        tree.add_node(type_node, parent=params_node)
        tree.add_node(name_node, parent=type_node)
        #check grammar rules

    # Create and add body
    body_tokens = []
    skip_tokens = 0
    if (var_case % 3 == 0):
        # Empty parameters
        body_tokens = tokens[5:]
        skip_tokens = 5
        pass
    elif (var_case % 3 == 1):
        # Void parameter
        body_tokens = tokens[6:]
        skip_tokens = 6
        pass
    elif (var_case % 3 == 2):
        # Has paremeters
        body_tokens = tokens[4 + (3 * (len(params))):]
        skip_tokens = 4 + (3 * (len(params)))
        pass

    #call help_func_block
    #parser_out = run_parser(body_tokens, grammar, look_for_brace=True, root_name="func_body") #may be off by one
    block_out = help_func_block(grammar,
                                body_tokens,
                                root_name="func_body",
                                function=func_name)
    body_tree = block_out[0]
    skip_tokens += block_out[1]
    tree.paste(func_root.identifier, body_tree)

    return [tree, skip_tokens]
Example #30
0
def help_func_block(grammar, tokens, root_name="block", function=None):

    #go line by line
    #if }
    #return tree
    #if {
    #recursive help_func_block
    #grab up to till first ;
    #call expression handeler on that sub list
    #returns a tree which is appended

    tree = Tree()
    root_node = Node(tag=root_name)
    tree.add_node(root_node, parent=None)

    func_flag_no_init = 0
    func_flag_init = 0
    func_flag = 0
    front_index = 0
    num_tokens_to_skip = 0

    i = 0
    while (i < len(tokens)):
        if (tokens[i][0] == "}"):
            return [tree, num_tokens_to_skip + 1]

        elif (tokens[i][0] == "{"):
            result = help_func_block(grammar,
                                     tokens[i + 1:],
                                     function=function)

            front_index += 1 + result[1]
            i += 1 + result[1]
            num_tokens_to_skip += 1 + result[1]

            tree.paste(root_node.identifier, result[0])

        elif (tokens[i][0] in ["if", "while"]):
            if_node = Node(tag=tokens[i][0])
            tree.add_node(if_node, parent=root_node)

            if_cond = Node(tag="condition")
            tree.add_node(if_cond, parent=if_node)

            first_bracket = -1
            for token in tokens[i:]:
                if (token[0] == '{'):
                    first_bracket = tokens.index(token)
                    break
                elif (token[0] == '}'):
                    # Break to throw error if unmatched
                    break
            if (first_bracket < 0):
                raise Exception(tokens[i][0] + " without body '{' on line " +
                                str(tokens[i][2]))

            cond_result = help_func_expression(grammar,
                                               tokens[i + 2:first_bracket - 1],
                                               function=function)
            body_result = help_func_block(grammar,
                                          tokens[first_bracket + 1:],
                                          root_name="condition_body",
                                          function=function)

            # Increment i, num_tokens_to_skip, and front_index
            if_skip = 1  # if/while
            if_skip += 1  # opening bracket
            if_skip += 2  # parens
            if_skip += cond_result[1]
            if_skip += body_result[1]

            num_tokens_to_skip += if_skip
            front_index += if_skip
            i += if_skip
            tree.paste(if_cond.identifier, cond_result[0])
            tree.paste(if_node.identifier, body_result[0])

        elif (tokens[i][0] == "return"):
            result = help_func_return(grammar, tokens[i:], function=function)
            front_index += result[1]
            i += result[1]
            num_tokens_to_skip += result[1]

            tree.paste(root_node.identifier, result[0])

        elif (tokens[i][0] == ";"):
            back_index = i

            expr_tokens = tokens[front_index:back_index]

            # Remove leading and trailing ( and )
            while (len(expr_tokens) > 0
                   and (expr_tokens[0][0] == '(' or expr_tokens[0][0] == ')')):
                expr_tokens.pop(0)
                num_tokens_to_skip += 1
            while (len(expr_tokens) > 0 and
                   (expr_tokens[-1][0] == '(' or expr_tokens[-1][0] == ')')):
                expr_tokens.pop(-1)
                num_tokens_to_skip += 1

            if (len(expr_tokens) > 0):
                if (len(expr_tokens) == 2
                        and expr_tokens[0][1] == 'typeSpecifier'
                        and expr_tokens[1][1] == 'ID'):
                    func_flag = 1
                    func_flag_no_init = 1
                    # print("This is a variable declaration with no intilization")
                    var_type = expr_tokens[0][0]
                    var_name = expr_tokens[1][0]
                    __symbol_tables[function][var_name] = var_type
                    expr_tokens.pop(0)
                elif (len(expr_tokens) > 2
                      and expr_tokens[0][1] == 'typeSpecifier'
                      and expr_tokens[1][1] == 'ID'
                      and expr_tokens[2][1] == '='):
                    func_flag = 1
                    # print("This is a variable declaration with intilization")
                    var_type = expr_tokens[0][0]
                    var_name = expr_tokens[1][0]
                    __symbol_tables[function][var_name] = var_type
                    expr_tokens.pop(0)
                if (func_flag == 1):
                    tmp_tree = Tree()
                    tmp_tree_root = Node(tag=var_type)
                    tmp_tree.add_node(tmp_tree_root, parent=None)
                    tmp_tree.add_node(Node(tag=var_name), parent=tmp_tree_root)

                result = help_func_expression(grammar,
                                              expr_tokens,
                                              function=function)

                if (func_flag == 1):
                    result[1] += 1
                front_index = back_index + 1
                i += 1
                num_tokens_to_skip += 1 + result[1]
                if (func_flag_no_init != 1):
                    tree.paste(root_node.identifier, result[0])
                if (func_flag == 1):
                    # pass
                    tree.paste(root_node.identifier, tmp_tree)
                func_flag_no_init = 0
                func_flag_init = 0
                func_flag = 0
                tmp_tree = None

        else:
            i += 1

    # Iterated through tokens without closing '}'
    raise Exception(errors.ERR_NO_BLOCK_END + " on line " +
                    str(tokens[i - 1][2]))
Example #31
0
def help_func_expression(grammar, tokens, function=None):

    tokens_skip = 0
    # Remove leading and trailing ( and )
    while (len(tokens) > 0 and (tokens[0][0] == '(' or tokens[0][0] == ')')):
        tokens.pop(0)
        tokens_skip += 1
    while (len(tokens) > 0 and (tokens[-1][0] == '(' or tokens[-1][0] == ')')):
        tokens.pop()
        tokens_skip += 1

    # Check for subexpression denoted by parentheses
    op_depth = []
    depth = 0
    paren_open = -1
    paren_close = -1
    for i in range(len(tokens)):
        if (tokens[i][0] == ';'):
            break  # End of expression
        elif (tokens[i][0] == '('):
            depth += 1
            paren_open = i
        elif (tokens[i][0] == ')'):
            paren_close = i
            depth -= 1
        op_depth.append(depth)

    # Find the lowest precedence operator
    lowest_prec_op = []
    op_precedence = {
        "&&": 50,
        "||": 40,
        "shiftop": 35,
        "mulop": 30,
        "sumop": 20,
        "relop": 10,
        "=": 0,
        "+=": 0,
        "-=": 0,
        "*=": 0,
        "\=": 0,
        "%=": 0
    }

    for token in tokens:
        if (token[0] == ';'):
            break
        elif (token[1] in op_precedence):

            if (len(lowest_prec_op) == 0):
                lowest_prec_op = token
            else:
                cur_token_depth = op_depth[tokens.index(token)]
                lowest_prec_depth = op_depth[tokens.index(lowest_prec_op)]

                if (cur_token_depth < lowest_prec_depth):
                    # Higher depth guarantees replacement
                    lowest_prec_op = token
                elif (cur_token_depth == lowest_prec_depth
                      and op_precedence[token[1]] <=
                      op_precedence[lowest_prec_op[1]]):
                    # To replace, must be on same depth and lower precedence
                    lowest_prec_op = token

    if (len(lowest_prec_op) == 0):
        # Check if "expression" is just a single constant
        if (len(tokens) > 1 and tokens[0][1] == "ID" and tokens[1][1] == "("):
            # Create node with function name
            tree = Tree()
            if (tokens[0][0] == "_start"):
                raise Exception(
                    "Function name _start is reserved for assembly")
            elif (tokens[0][0] == "main"):
                tokens[0][0] = "_start"
            call_node = Node(tag="func:" + tokens[0][0])
            tree.add_node(call_node, parent=None)
            tokens_skip += 2

            # Children of node are function parameters
            # Iterate through tokens to find each parameter
            # Parameters split on ',' with depth=0
            depth = 0
            token_depth = []
            end_point = -1

            for i in range(2, len(tokens)):
                tokens_skip += 1
                if (tokens[i][0] == "("):
                    depth += 1
                elif (tokens[i][0] == ")"):
                    depth -= 1

                token_depth.append(depth)

                if (depth < 0):
                    # End ) found
                    end_point = i
                    break

            if (end_point < 0):
                end_point = len(tokens)
                #raise Exception("No ending ')' for function call '" +
                #    tokens[0][0] + "'")

            # Find split points for the expressions
            split_points = [2]
            for i in range(len(token_depth)):
                if (token_depth[i] == 0 and tokens[i + 2][0] == ','):
                    split_points.append(i + 3)

            split_points.append(end_point)

            func_params = []
            for i in range(len(split_points) - 1):
                # print([split_points[i], split_points[i+1]])
                func_params.append(tokens[split_points[i]:split_points[i + 1]])

            # Add parameters to tree
            for p in func_params:
                # Needs to call expression handler to evaluate parameters
                # - Currently, operators in function calls are lower prec than the function for some reason
                # - Exceptions caused by nesting function calls
                param_node = Node(tag=p[0][0])
                tree.add_node(param_node, parent=call_node)

            return [tree, tokens_skip]
        elif ((tokens[0][1] == "NUMCONST" or tokens[0][1] == "FLOATCONST"
               or tokens[0][1] == "CHARCONST" or tokens[0][1] == "STRINGCONST"
               or tokens[0][1] == "true" or tokens[0][1] == "false"
               or tokens[0][1] == "ID")):
            # Check for no-parameter function
            if (tokens[0][0] in __symbol_tables.keys()):
                # Found a function call without parameters
                tokens_skip += 1
                tree = Tree()
                if (tokens[0][0] == "_start"):
                    raise Exception(
                        "Function name _start is reserved for assembly")
                elif (tokens[0][0] == "main"):
                    tokens[0][0] = "_start"
                value_node = Node(tag="func:" + tokens[0][0])
                tree.add_node(value_node, parent=None)
                return [tree, tokens_skip]
            else:
                # Expression is a constant or named variable
                tokens_skip += 1
                tree = Tree()
                value_node = Node(tag=tokens[0][0])
                tree.add_node(value_node, parent=None)
                return [tree, tokens_skip]
        else:
            raise Exception("Unknown token sequence: " + str(tokens))

    # Lowest precedence operator found
    # Lowest precedence operator is root.
    tree = Tree()
    op_node = Node(tag=lowest_prec_op[0])
    tree.add_node(op_node, parent=None)

    # Recursive calls to make left and right subtrees
    tokens_skip += 1

    tokens_l = tokens[:tokens.index(lowest_prec_op)]
    tokens_r = tokens[tokens.index(lowest_prec_op) + 1:]

    has_tokens_l = False
    for token in tokens_l:
        if (token[0] != '(' and token[0] != ')'):
            has_tokens_l = True
            break

    has_tokens_r = False
    for token in tokens_r:
        if (token[0] != '(' and token[0] != ')'):
            has_tokens_r = True
            break

    if (len(tokens_l) > 0 and has_tokens_l):
        expr_l = help_func_expression(grammar, tokens_l, function=function)
        tree.paste(op_node.identifier, expr_l[0])
        tokens_skip += expr_l[1]
    else:
        tokens_skip += len(tokens_l)

    if (len(tokens_r) > 0 and has_tokens_r):
        expr_r = help_func_expression(grammar, tokens_r, function=function)
        tree.paste(op_node.identifier, expr_r[0])
        tokens_skip += expr_r[1]
    else:
        tokens_skip += len(tokens_r)

    return [tree, tokens_skip]
Example #32
0
sub_t = tree.subtree('diane')
sub_t.show()
print('\n') 


print("#"*4 + "Children of Diane")
print tree.is_branch('diane')
print('\n')


print("#"*4 + "OOhh~ new members enter Jill's family")
new_tree = Tree()
new_tree.create_node("n1", 1)  # root node
new_tree.create_node("n2", 2, parent=1)
new_tree.create_node("n3", 3, parent=1)
tree.paste('jill', new_tree)
tree.show()
print('\n')


print("#"*4 + "We are sorry they are gone accidently :(")
tree.remove_node(1)
tree.show()
print('\n')


print("#"*4 + "Now Jill moves to live with Grand-x-father Harry")
tree.move_node('jill', 'harry')
tree.show()
print('\n')
Example #33
0
class Group(ElementWithAttributes):
    def __init__(self):
        super(Group, self).__init__()
        self.type = DATA_DIR_TYPES.GROUP

        self.path = None
        self.tree = Tree()

    def __getitem__(self, item):

        if item not in self.tree:
            rsplit = item.rsplit("/", maxsplit=1)
            if len(rsplit) == 1:
                item_0 = self.tree.root
                key = rsplit[0]
            else:
                item_0, key = rsplit
            if item_0 in self.tree:
                node = self.tree[item_0]
                if (isinstance(node.data, ElementWithAttributes)
                        and key in node.data.attrs):
                    return node.data.attrs[
                        key]  # ### RETURN attribute value ###

            raise KeyError(f"{item} is not a valid key")

        node = self.tree[item]

        if isinstance(node.data, Group):
            # rebuild tree with reduced identifiers
            stree = self.tree.subtree(item)
            for n in stree.all_nodes_itr():
                if n.predecessor(stree.identifier) is None:
                    parent = None
                else:
                    parent = n.predecessor(stree.identifier).split(
                        item, maxsplit=1)[1]
                node.data.tree.create_node(n.tag,
                                           n.identifier.split(item,
                                                              maxsplit=1)[1],
                                           parent,
                                           data=n.data)

        elif isinstance(node.data, DataSet):
            if node.data.df.empty:
                if self.path is None:
                    raise GroupError(
                        f"{item} is not loaded yet and this element is not linked to a File or Group"
                    )
                node.data.df = pd.read_parquet(self.path / item / DATA_FILE)

        return node.data

    def __setitem__(self, key, value):

        if key in self.tree:
            raise KeyError(f"{key} already exists")

        rsplit = key.rsplit("/", maxsplit=1)
        if len(rsplit) == 1:
            item_0 = self.tree.root
            key_1 = rsplit[0]
        else:
            item_0, key_1 = rsplit

        if item_0 is not None and item_0 not in self.tree:
            raise KeyError(f"Parent key {item_0} does not exist")

        dd_type = None
        if isinstance(value, Group):
            dd_type = value.type
            new_tree = Tree()
            for node in value.tree.all_nodes_itr():
                if node.parent is None:
                    parent = None
                else:
                    parent = key + "/" + node.parent
                new_tree.create_node(node.tag,
                                     key + "/" + node.identifier,
                                     parent=parent,
                                     data=node.data)
                value.tree = new_tree
            self.tree.create_node(tag=key_1,
                                  identifier=key,
                                  parent=item_0,
                                  data=value)
            self.tree.paste(key, new_tree)

        elif isinstance(value, DataSet):
            dd_type = DATA_DIR_TYPES.DATASET
            self.tree.create_node(tag=key_1,
                                  identifier=key,
                                  parent=item_0,
                                  data=value)
            if self.path is not None:
                value.df.to_parquet(self.path / key / DATA_FILE)

        elif isinstance(value, Raw):
            pass
        elif isinstance(value, Attribute):
            pass
        else:
            raise ValueError(f"{value} is not a valid type for DataDir")

        # write ddir and attributes file if self is linked
        if isinstance(value, ElementWithAttributes) and self.path is not None:
            (self.path / key).mkdir()
            _write_ddir_json(self.path / key, dd_type=dd_type)
            json.dump(value.attrs,
                      (self.path / key / ATTRIBUTES_FILE).open("w"),
                      indent=4)

    def link(self, path):
        self.path = path
Example #34
0
class TreeT(object):
    def __init__(self, max_id=0):
        self.tree = Tree()

    def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None):
        # starts by ['(', 'pos']
        pos_tag = line[1]
        if parent_id is None:
            pos_id = 0
        else:
            pos_id = max_id
            max_id += 1

        self.tree.create_node(pos_tag, pos_id, parent_id, TreeData())

        parent_id = pos_id
        total_offset = 2

        if line[2] != '(':
            # sub-tree is leaf
            # line[0:3] = ['(', 'pos', 'word', ')']
            word_tag = line[2]
            self.tree.create_node(word_tag, leaf_id, parent_id, TreeData())
            return 4, max_id, leaf_id + 1

        line = line[2:]

        while line[0] != ')':
            offset, max_id, leaf_id = self.from_ptb_to_tree(
                line, max_id, leaf_id, parent_id)
            total_offset += offset
            line = line[offset:]

        return total_offset + 1, max_id, leaf_id

    def add_height(self, tree_dep):

        for n in self.tree.all_nodes():
            n.data.leaves = []

        for leaf in self.tree.leaves():
            lid = leaf.identifier
            hid = tree_dep[lid]
            if hid == self.tree.root:
                self.tree[lid].data.height = self.tree.depth(self.tree[lid])
                for cid in [
                        p for p in self.tree.paths_to_leaves() if lid in p
                ][0]:
                    self.tree[cid].data.leaves += [lid]
            else:
                height = -1
                cid = lid
                cond = True
                while cond:
                    self.tree[cid].data.leaves += [lid]
                    height += 1
                    cid = self.tree.parent(cid).identifier
                    cid_leaves = [l.identifier for l in self.tree.leaves(cid)]
                    cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid]
                    cond = set(cid_l_dep).issubset(set(cid_leaves))
                self.tree[lid].data.height = height

        x_nodes = [
            n.identifier for n in self.tree.all_nodes() if n.data.leaves == []
        ]
        for x_node in x_nodes[::-1]:
            min_id = min(self.tree.children(x_node),
                         key=lambda c: c.data.height)
            _lid = min_id.data.leaves[0]
            self.tree[_lid].data.height += 1
            self.tree[x_node].data.leaves += [_lid]

        return True

    def _from_tree_to_ptb(self, nid):
        nid = self.tree.subtree(nid).root
        if self.tree[nid].is_leaf():
            return ' (' + self.tree[nid].tag + ' ' + self.tree[
                nid].data.word + ')'

        res = ' (' + self.tree[nid].tag

        for c_nid in sorted(self.tree.children(nid),
                            key=lambda x: x.identifier):
            res += self._from_tree_to_ptb(c_nid.identifier)

        return res + ')'

    def from_tree_to_ptb(self):
        return self._from_tree_to_ptb(self.tree.root)

    def from_tag_to_tree(self, tag, word, pos_id=0):
        parent_id = None
        for tag_nodes in tag:
            if tag_nodes[0] in [CL, CR]:
                c_side = tag_nodes[0]
                _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else ['']
            else:
                c_side = ''
                _tag_nodes = tag_nodes
            self.tree.create_node(_tag_nodes[0],
                                  pos_id,
                                  parent=parent_id,
                                  data=TreeData(comb_side=c_side))

            parent_id = pos_id
            pos_id += 1
            for tag_node in _tag_nodes[1:]:
                self.tree.create_node(tag_node[1:],
                                      pos_id,
                                      parent=parent_id,
                                      data=TreeData(miss_side=tag_node[0]))
                pos_id += 1
        for l in self.tree.leaves():
            if l.data.miss_side == '':
                l.data.word = word
                break
        return pos_id

    @memoize
    def is_combine_to(self, side):
        return self.tree[self.tree.root].data.comb_side == side

    @memoize
    def is_combine_right(self):
        return self.is_combine_to(CR)

    @memoize
    def is_combine_left(self):
        return self.is_combine_to(CL)

    @memoize
    def is_complete_tree(self):
        return all([n.data.miss_side == '' for n in self.tree.all_nodes()])

    @memoize
    def get_missing_leaves_to(self, miss_val, side):
        return [
            l.identifier for l in self.tree.leaves(self.tree.root)
            if l.data.miss_side == side and l.tag == miss_val
        ]

    @memoize
    def get_missing_leaves_left(self, miss_val):
        return self.get_missing_leaves_to(miss_val, L)

    @memoize
    def get_missing_leaves_right(self, miss_val):
        return self.get_missing_leaves_to(miss_val, R)

    @memoize
    def root_tag(self):
        return self.tree[self.tree.root].tag

    @memoize
    def is_no_missing_leaves(self):
        return all(
            [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)])

    @memoize
    def combine_tree(self, _tree, comb_leaf):
        self.tree.paste(comb_leaf, _tree.tree)
        self.tree.link_past_node(comb_leaf)
        return self

    def tree_to_path(self, nid, path):

        # Stop condition
        if self.tree[nid].is_leaf():
            path[nid] = []
            return nid, self.tree[nid].data.height

        # Recursion
        flag = CR
        for child in self.tree.children(nid):
            cid = child.identifier
            leaf_id, height = self.tree_to_path(cid, path)

            if (height == 0):
                # Reached end of path can add flag
                path[leaf_id].insert(0, flag)
                # path[leaf_id].append(flag)

            if height > 0:
                path[leaf_id].insert(0, nid)
                # only single child will have height>0
                # and its value will be the one that is returned
                # to the parent
                ret_leaf_id, ret_height = leaf_id, height - 1

                # once we reached a height>0, it means that
                # this path includes the parent, and thus flag
                # direction should flip
                flag = CL

        return ret_leaf_id, ret_height

    def path_to_tags(self, path):
        tags = []
        for p in path:
            _res = []
            _p = copy.copy(p)
            if _p[0] in [CL, CR]:
                _res.append(_p[0])
                _p = _p[1:]
            while _p[:-1]:
                el_p = _p.pop(0)
                _res.append(self.tree[el_p].tag)
                for c in self.tree.children(el_p):
                    if c.identifier != _p[0]:
                        _res.append(R + c.tag if c.identifier > _p[0] else L +
                                    c.tag)
            _res.append(self.tree[_p[0]].tag)
            tags.append(_res)
        return tags

    def path_to_words(self, path):
        return [self.tree[k].tag for k in path]

    def from_tree_to_tag(self):
        path = {}
        self.tree_to_path(self.tree.root, path)
        return {
            'tags': self.path_to_tags(path.values()),
            'words': self.path_to_words(path.keys())
        }

    def from_ptb_to_tag(self, line, max_id, depend):
        self.from_ptb_to_tree(line, max_id)
        self.add_height(depend)
        path = {}
        self.tree_to_path(self.tree.root, path)
        return self.path_to_tags(path.values())
Example #35
0
def change(tree):
    nidInternal = nidValid(tree)
    choices = [getChoice(tree, n) for n in nidInternal]
    n_choices = map(lambda L: sum([len(i) for i in L]), choices)
    choiceDic = {
        a: b
        for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1
    }
    choices1 = list(choiceDic.keys())
    nid = random.choice(choices1)
    p = tree[nid].data.shape[1]
    x0 = tree[nid].var
    s0 = tree[nid].split
    choices = choiceDic[nid]  # choose nid to split
    if s0 in choices[x0 - 1]:
        choices[x0 - 1].remove(s0)  # remove original split option
    choices2 = [i for i in range(p - 1)
                if len(choices[i]) > 0]  # choose var to split
    x = random.choice(choices2)
    choices3 = choices[x]  # choose value to split
    x += 1
    s = random.choice(choices3)
    tree1 = Tree(tree, deep=True)
    pid = tree1[nid].bpointer
    sub = tree1.remove_subtree(nid)
    tags = recurTag(sub, nid)
    tags[0] = (nid, x, s)
    try:
        sub1 = genTree(sub[nid], tags)
    except IndexError:
        print(f'{mi} change {t}: {tags[0]}; unchangable')
        return tree
    if pid is not None:
        tree1.paste(pid, sub1)
        tree1[pid].fpointer = sorted(tree1[pid].fpointer)
    else:
        tree1 = sub1
    nidInternal1 = set(nidValid(tree1))
    choices1 = set(choices1)
    choices11 = nidInternal1.intersection(choices1)
    extra = nidInternal1 - choices1
    n_choices = map(lambda L: sum([len(i) for i in L]),
                    [getChoice(tree1, n) for n in extra])
    choices11 = list(choices11) + [
        a for (a, b) in zip(extra, n_choices) if b > 1
    ]
    choices31 = getChoice(tree1, nid, x0)[x0 - 1]
    n31 = len(choices31)
    if (sub1[nid].var == sub[nid].var) and (s0 in choices31):
        n31 -= 1
    rTransit = len(choices1) * len(choices3) / (len(choices11) * n31)
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rTransit * rStruct
    print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}')
    if random.uniform(0, 1) < r:
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree