Example #1
0
def share_any_edges(G: nx.Graph, t1: tl.Tree, t2: tl.Tree) -> bool:
    leaves1 = [t.tag for t in t1.leaves()]
    leaves2 = [t.tag for t in t2.leaves()]
    # if something in t1 has an edge to something in t2, return true
    for u, v in G.edges():
        if u in leaves1:
            if v in leaves2:
                return True
        if v in leaves1:
            if u in leaves2:
                return True
    # otherwise, return false.
    return False
Example #2
0
def endpoint_cal(swc_p, unit , sep = ","):
    """
        generate a multiBranch Tree from the swc file

    """
    print(unit, sep)
    coords, labels, ids, pars = coords_get(swc_p, unit, sep)
    #coords += 1
    if len(coords) == 0:
        print("{} is something wrong".format(swc_p))
        sys.exit(0)

    ftree = Tree()
    ftree.create_node(ids[0], ids[0], data = coords[0])

    for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]):
        #print(id_, par_)
        ftree.create_node(id_, id_, data = coord_, parent = par_)

    endpoint_coords = [x.data for x in ftree.leaves()]
    endpoint_coords.append(coords[0])

    branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2]
    
    endpoint_coords = np.array(endpoint_coords)
    branch_coords = np.array(branch_coords)
    coords = np.array(coords)
    return endpoint_coords, branch_coords, coords, ftree
Example #3
0
def dasgupta_cost(G: nx.Graph, T: tl.Tree) -> float:
    cost = 0
    for edge in G.edges:
        lca = get_lca(T, edge[0], edge[1])
        subtree_leaves = T.leaves(lca)
        cost += len(subtree_leaves)
    return cost
class EntityTree(object):
    def __init__(self):
        self.tree = Tree()

    def create_ent_tree(self,
                        ent_dct,
                        cls_name_map,
                        parent=None,
                        cls_data_map={}):
        '''递归,依据遍历标签间层级关系生成树'''
        tag = ent_dct['LabelName']
        name = cls_name_map.get(tag)
        data = cls_data_map.get(tag, 0)
        if not name:
            print("Error: tag %s not found in cls_name_map!" % tag)
            sys.exit(-1)

        if DEBUG:
            print("# of tree nodes: %d, tree height: %d, # of leaves: %d" %
                  (len(self.tree), self.tree.depth(), len(self.tree.leaves())))
        nd = self.tree.create_node(tag=name, parent=parent, data=data)
        if 'Subcategory' in ent_dct.keys():
            for dct in ent_dct['Subcategory']:
                self.create_ent_tree(dct,
                                     cls_name_map,
                                     parent=nd.identifier,
                                     cls_data_map=cls_data_map)
Example #5
0
    def get_attribute(self, ting, attribute_name=None):

        counter = NodeCounter()

        task_tree = Tree()
        task_tree.create_node(identifier=counter.current_count,
                              tag=ting.id,
                              data=ting)
        self._build_tree(task_tree=task_tree, ting=ting, counter=counter)

        temp_leaves = task_tree.leaves()
        task_leaves = []
        requirements = {}
        for t in temp_leaves:
            leaf = t.data["task"]
            task_leaves.append(leaf)
            req = leaf[FRECKLET_KEY_NAME].get("resources", {})
            for k, v in req.items():
                if isinstance(v, string_types) or not isinstance(v, Sequence):
                    v = [v]
                requirements.setdefault(k, []).extend(v)

        result = {
            "task_tree": task_tree,
            "task_leaves": task_leaves,
            "resources": requirements,
        }

        return MultiCacheResult(**result)
Example #6
0
class Scansion(object):
    """
        .src    : list of strings
    """

    #///////////////////////////////////////////////////////////////////////////
    def __init__(self, source_file):
        """
                Scansion.__init__

                source_file     : (src) source file's name.
        """
        self.htree = Tree()
        self.src = []

        # creating root node (level 0) :
        self.htree.create_node(tag           = "root",
                               identifier    = "root",
                               data          = Hypothesis(htree = self.htree,
                                                          level=0,
                                                          language=None,
                                                          src=source_file))

        # calling root node :
        msg(0, "Calling the root node.")
        stop = False
        while not stop:
            leaves_to_be_extended = [leave for leave in self.htree.leaves() if not leave.data.dead]

            for leave in leaves_to_be_extended:
                leave.data.go_on()

            if len(leaves_to_be_extended)==0:
                stop = True
Example #7
0
    def _build_tree(self, scores: ndarray, bin_edges: ndarray) -> Tree:

        # Build tree with specified number of children at each level
        tree = Tree()
        tree.add_node(Node())  # root node
        nodes_prev = [tree.get_node(tree.root)]
        for level in range(self.depth):
            nodes_current = []
            for node in nodes_prev:
                children = []
                for _ in range(self.n_children[level]):
                    child = Node()
                    tree.add_node(child, parent=node)
                    children.append(child)
                nodes_current.extend(children)
            nodes_prev = nodes_current

        assignments = np.digitize(scores, bin_edges) - 1

        # Store instance ids in leaves
        leaves = tree.leaves()
        for k, node in enumerate(leaves):
            instance_ids = np.where(assignments == k)[0]
            if instance_ids.size == 0:
                tree.remove_node(node.identifier)
            else:
                node.data = instance_ids

        # Prune empty leaves
        check_for_empty_leaves = True
        while check_for_empty_leaves:
            check_for_empty_leaves = False
            leaves = tree.leaves()
            for node in leaves:
                if node.data is None and len(node.successors(
                        tree.identifier)) == 0:
                    # Node is empty and has no siblings
                    tree.remove_node(node.identifier)
                    check_for_empty_leaves = True

        # Simplify tree: remove nodes that only have one child
        for nid in tree.expand_tree(mode=tree.WIDTH):
            children = tree.children(nid)
            if len(children) == 1:
                tree.link_past_node(nid)

        return tree
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}
        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) == 2
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            child_1 = self._node_to_subprog[children[0].data.span]
            child_2 = self._node_to_subprog[children[1].data.span]
            try:
                if child_1 and not child_2:  # child_2=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_1
                elif not child_1 and child_2:  # child_1=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_2
                elif not child_1 and not child_2:  # Both children are assigned with 'NO_LABEL'
                    self._node_to_subprog[node.data.span] = self._node_to_type(
                        node)  # ignore children and propagate parent
                else:
                    assert child_2.is_full(
                    )  # make sure child_2 value can be formed
                    self._node_to_subprog[node.data.span] = child_1.apply(
                        child_2)
            except Exception as e:
                try:
                    self._node_to_subprog[node.data.span] = child_2.apply(
                        child_1)
                except Exception as e:
                    raise Exception('final apply_exception: {}'.format(e))

            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return inner_program
def our_cost(G: nx.Graph, T: tl.Tree) -> float:
    T_leaves = [n.tag for n in T.leaves()]
    cost = 0
    for edge in G.edges:
        # only look at edges in this tree.
        if edge[0] in T_leaves and edge[1] in T_leaves:
            lca = get_lca(T, edge[0], edge[1])
            subtree = T.subtree(lca)
            subtree_leaves = subtree.leaves()
            for leaf in subtree_leaves:
                cost += subtree.level(leaf.identifier)
    return cost
Example #10
0
class Chain:
    def __init__(self):
        self.root = Tree()
        self.root.create_node(0, 0)  # Genesis block

    # Aggiunge un blocco ad una catena
    def add_block(self, block):
        node = self.root.create_node(block.epoch, hash(block), block.hash)
        node.data = block

    # Stampa graficamente la catena in forma di albero
    def print_chain(self):
        self.root.show()

    # restituisce i blocchi di tutta la blockchain, sottoforma di lista
    def return_nodes_data(self):
        nodes = self.root.all_nodes()
        nodes_data = []
        for i in range(1, len(nodes)):
            nodes_data.append(nodes[i].data)
        if len(nodes_data) == 0:
            nodes_data.append(0)
        return nodes_data

    # Resituisce la lista degli identificativi (numeri) delle foglie dell'albero (catena)
    def leaves(self):
        leaves = self.root.leaves(nid=None)
        leaves_identifiers = []
        for i in range(len(leaves)):
            leaves_identifiers.append(leaves[i].identifier)
        return leaves_identifiers

    # ritorna tutte le epoche associate ai nodi della blockchain
    '''
    def return_nodes(self):
        nodes = self.root.all_nodes()
        nodes_identifiers = []
        for i in range(len(nodes)):
            nodes_identifiers.append(nodes[i].tag)
        return nodes_identifiers
    '''

    # restituisce il blocco (oggetto) con l'epoca maggiore nella Chain
    def block_max_epoch(self):
        nodes = self.root.all_nodes()
        max_epoch = 0
        block_max = 0
        for i in range(len(nodes)):
            if nodes[i].tag > max_epoch:
                max_epoch = nodes[i].tag
                block_max = nodes[i].data
        return block_max
Example #11
0
def build_parser_tree(org_str, parse_str):
    level_dict = defaultdict(list)
    punctuation = "',.!?[]()%#@&1234567890"
    parse_tree = Tree()
    parse_list = parse_str.replace("(", " ( ").replace(")", " ) ").strip().split()
    #print(parse_list)
    #print(org_str)
    org_str_list = nltk.word_tokenize(org_str)
    left_bracket_counter = 0
    right_bracket_counter = 0
    level = 0
    for index, item in enumerate(parse_list):
        if item == "(":
            left_bracket_counter = left_bracket_counter + 1
            continue
        if item == ")":
            right_bracket_counter = right_bracket_counter + 1
            continue
        level = left_bracket_counter - right_bracket_counter
        try:
            if (item in ConstituencyParse and item not in punctuation) \
                    or (item not in org_str and item.isupper()):
                # 创建非叶子节点,如:ROOT、S、VP、NP
                if item == "ROOT":
                    parse_tree.create_node(item, str(index))
                    level_dict[str(level)].append(str(index))
                else:
                    parse_tree.create_node(item, str(index), parent=level_dict[str(level-1)][-1])
                    level_dict[str(level)].append(str(index))
            if item in org_str_list and item not in punctuation:
                # 创建叶子节点,每一个叶子节点都是句子中的一个单词
                parse_tree.create_node(item, str(index), parent=level_dict[str(level)][-1])
                level_dict[str(level+1)].append(str(index))
            elif item in punctuation:
                # 创建标点符号的相关节点
                if index > 0 and parse_list[index-1] == "(":
                    parse_tree.create_node(item, str(index), parent=level_dict[str(level-1)][-1])
                    level_dict[str(level)].append(str(index))
                else:
                    parse_tree.create_node(item, str(index), parent=level_dict[str(level)][-1])
                    level_dict[str(level+1)].append(str(index))
        except Exception as e:
            #print(str(e))
            return None, parse_list
    #parse_tree.show()

    # 叶子节点中有词性标签,说明建树出错
    for node in parse_tree.leaves():
        if node.tag in ConstituencyParse:
            print("建立句法树出错!")
            return None, parse_list
    return parse_tree, parse_list
Example #12
0
def generate(tree: Tree, parent: Node, token: Token, depth: int):
    node = tree.create_node(parent=parent, data=token)
    if isinstance(token, Terminal):
        return

    valid_rules = list(
        filter(lambda rule: checkRuleDepth(rule, depth),
               rule_table.getAllRules(token)))

    # print(token, valid_rules)
    rule = random.choice(valid_rules)
    list(
        map(lambda next_token: generate(tree, node, next_token, depth - 1),
            rule.rhs))

    node.data.text = ' '.join(
        list(map(lambda leaf: leaf.data.name, tree.leaves(node.identifier))))
Example #13
0
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}

        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) in [2, 3]
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            if len(children) == 2:
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_1, child_2, node)
            else:
                children.sort(key=lambda c: c.data.span[0])
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                child_3 = self._node_to_subprog[children[2].data.span]
                intermediate = self.merge_children(child_1, child_3, node)
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_2, intermediate, node)
            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return 'answer ( {} )'.format(inner_program)
Example #14
0
class BlockChain:
    def __init__(self):
        self.root = Tree()
        self.root.create_node(0, 0)  # Genesis block

    # Adds a block to a blockchain
    def add_block(self, block):
        node = self.root.create_node(block.epoch, hash(block), block.hash)
        node.data = block

    # Print the blockchain graphically
    def print_chain(self):
        self.root.show()

    # Returns the list containing all the blocks of the entire blockchain
    def return_nodes_data(self):
        nodes = self.root.all_nodes()
        nodes_data = []
        for i in range(1, len(nodes)):
            nodes_data.append(nodes[i].data)
        if len(nodes_data) == 0:
            nodes_data.append(0)
        return nodes_data

    # Returns the list of identifiers of the leaves of the tree (blockchain)
    def leaves(self):
        leaves = self.root.leaves(nid=None)
        leaves_identifiers = []
        for i in range(len(leaves)):
            leaves_identifiers.append(leaves[i].identifier)
        return leaves_identifiers

    # returns the Block object with the major epoch in the blockchain
    def block_max_epoch(self):
        nodes = self.root.all_nodes()
        max_epoch = 0
        block_max = 0
        for i in range(len(nodes)):
            if nodes[i].tag > max_epoch:
                max_epoch = nodes[i].tag
                block_max = nodes[i].data
        return block_max
Example #15
0
    def get_invoke_tree(self, method: MethodId, search_depth=3):
        tree = Tree(deep=search_depth, identifier=method.address)

        # Parent method with invoke address list
        tree.create_node(identifier=method, data=[])

        for _ in range(search_depth):
            for leaf in tree.leaves():
                uppers = self.apkinfo.find_upper_methods(leaf.identifier)
                for offset, upper in uppers:
                    bytecode = self.apkinfo.find_bytecode_by_addr(
                        upper.dexindex, offset)
                    if not tree.contains(upper):
                        tree.create_node(identifier=upper,
                                         data=[bytecode],
                                         parent=leaf)
                    else:
                        tree.get_node(upper).data.append(bytecode)

        return tree
Example #16
0
def generatePool(N,  maxDepth, tree, rule_table, debug=False):

	# Collecting all non-terminal nodes
	nodeIDs = []
	for nodeID in tree.expand_tree(mode=Tree.DEPTH):
		nodeType = tree[nodeID].data
		if isinstance(nodeType, PiRL.DataStructures.Token.NonTerminal):
			nodeIDs.append(nodeID) 

	neighbours = []
	for _ in range(N):
		# Making a deep copy
		newTree = Tree(tree=tree, deep=True)

		# Selecting a random non-terminal to be replaced
		nodeToReplaceID = random.choice(nodeIDs)
		
		# Generating a new subtree
		newSubTree = getProgTree(newTree[nodeToReplaceID].data, maxDepth - newTree.depth(nodeToReplaceID))

		# Replacing subtree
		try:
			newTree.replace_node(newTree[nodeToReplaceID].predecessor(newTree.identifier), nodeToReplaceID, newSubTree, deep=False)
		except Exception as e:
			# traceback.print_exc()
			if debug:
				print("Root node replaced") 
			newTree = newSubTree

		if debug:
			print("Generated neighbour:", end=' ')
			# newTree.show(data_property='str')
			for leaf in newTree.leaves():
			    print(leaf.data.name, end=' ')
			print("\n")		

		neighbours.append(newTree)

	return neighbours
Example #17
0
def swap(tree):
    internalNodes = [n for n in tree.all_nodes_itr() if n.var != None]
    if len(internalNodes) == 1: return tree
    internalNodes.remove(tree[0])
    cNode = random.choice(internalNodes)
    tagc = (cNode.identifier, cNode.var, cNode.split)
    pid = cNode.bpointer
    tree1 = Tree(tree, deep=True)
    sub = tree1.remove_subtree(pid)
    tags = recurTag(sub, pid)
    tagp = tags[0]
    tags[tags.index(tagc)] = (tagc[0], tagp[1], tagp[2])
    tags[0] = (tagp[0], tagc[1], tagc[2])
    string = f'{mi} swap {t}: {tags[0]}; '
    try:
        sub1 = genTree(tree[pid], tags)
    except IndexError:
        print(string + 'unswappable')
        return tree
    #rTransit = 1
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rStruct
    print(string + f'{r.round(4)}')
    if random.uniform(0, 1) < r:
        if pid > 0:
            gpid = tree[pid].bpointer
            tree1.paste(gpid, sub1)
            tree1[gpid].fpointer = sorted(tree1[gpid].fpointer)
        else:
            tree1 = sub1
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree
Example #18
0
    for bagIn in bagList:
        for i in range(rules[bag][bagIn]):
            cnt += 1
            t.create_node(bagIn, cnt, parent=nid)
    return cnt


# Part A: find all bags the shiny_gold bag could hide inside
count = 0
e = {'shiny_gold'}
while len(e) > count:
    count = len(e)
    e = willCarry(rules, e)

# Part B: build tree with bags, incrementing id counter for each
t = Tree()
t.create_node('shiny_gold', 0)
cnt = 0
cntLast = -1
while (cnt > cntLast):
    print("Node Count {}".format(cnt))
    cntLast = cnt
    for node in t.leaves():
        cnt = addBagSet(rules, node.tag, t, node.identifier, cnt)

# Display tree
#t.show()

print("The solution to part A is {0:d}".format(len(e) - 1))
print("The solution to part B is {0:d}".format(len(t.nodes) - 1))
Example #19
0
def hierarchy(fna_mapping, dist):
    pending = list(fna_mapping.keys())
    node_id = max(pending) + 1
    mapping = bidict.bidict()
    cls_dist = []
    cls_dist_temp = {}
    index = 0
    pending.sort()
    for i in pending:
        mapping[i] = index
        index += 1
    for i in range(0, len(pending)):
        temp1 = []
        for j in range(0, i):
            temp1.append(cls_dist_temp[(mapping[pending[j]],
                                        mapping[pending[i]])])
        for j in range(i, len(pending)):
            temp = cal_cls_dist(dist, fna_mapping[pending[i]],
                                fna_mapping[pending[j]])
            temp1.append(temp)
            cls_dist_temp[(mapping[pending[i]], mapping[pending[j]])] = temp
        cls_dist.append([np.array(temp1)])
    cls_dist = np.concatenate(cls_dist)
    cls_dist_recls = cls_dist.copy()
    mapping_recls = mapping.copy()
    tree_relationship = {}
    pending = set(pending)

    while (len(pending) > 1):
        (child_a, child_b) = divmod(np.argmax(cls_dist), cls_dist.shape[1])
        temp1 = [
            np.concatenate([[cls_dist[child_a]],
                            [cls_dist[child_b]]]).max(axis=0)
        ]
        cls_dist = np.concatenate([cls_dist, temp1], axis=0)
        temp1 = np.append(temp1, -1)
        temp1 = np.vstack(temp1)
        cls_dist = np.concatenate([cls_dist, temp1], axis=1)
        cls_dist = np.delete(cls_dist, [child_a, child_b], axis=0)
        cls_dist = np.delete(cls_dist, [child_a, child_b], axis=1)
        # change mapping
        cluster_a = mapping.inv[child_a]
        cluster_b = mapping.inv[child_b]  # cluster id
        tree_relationship[node_id] = (cluster_a, cluster_b)
        del mapping[cluster_a], mapping[cluster_b]
        pending.remove(cluster_a)
        pending.remove(cluster_b)
        pending = sorted(list(pending))
        for i in pending:
            if (mapping[i] > min([child_a, child_b])
                    and mapping[i] < max([child_a, child_b])):
                mapping[i] -= 1
            elif (mapping[i] > max([child_a, child_b])):
                mapping[i] -= 2
        mapping[node_id] = len(cls_dist) - 1
        pending = set(pending)
        pending.add(node_id)
        node_id += 1
    # build tree structure
    pending = list(pending)
    tree = Tree()
    T0 = Node(identifier=pending[0])
    tree.add_node(T0)
    while (len(pending) > 0):
        parent = pending[0]
        for i in tree_relationship[parent]:
            tree.add_node(Node(identifier=i), parent=parent)
            if (i in tree_relationship):
                pending.append(i)
        pending.remove(parent)

    # load depth info
    depths = {}
    depths_mapping = defaultdict(set)
    leaves = set(tree.leaves())
    for i in tree.all_nodes():
        depths[i] = tree.depth(node=i)
        if (i in leaves):
            depths_mapping[depths[i]].add(i)

    return cls_dist_recls, mapping_recls, tree, depths, depths_mapping
Example #20
0
def build_tree(arg):
    # read parameters
    start = time.time()
    dist_matrix_file = arg[0]
    cls_file = arg[1]
    tree_dir = arg[2]
    ksize = arg[3]
    params = arg[4]
    alpha_ratio = params[0]
    minsize = params[1]
    maxsize = params[2]
    max_cls_size = params[3]

    # save genomes info
    fna_seq = bidict.bidict()  # : 1
    fna_path = {}

    # read dist matrix (represented by similarity: 1-dist)
    # output: dist, fna_path, fna_seq
    f = open(dist_matrix_file, "r")
    lines = f.readlines()
    f.close()
    index = 0
    d = lines[0].rstrip().split("\t")[1:]
    bac_label = 0
    for i in lines[0].rstrip().split("\t")[1:]:
        temp = i[i.rfind('/') + 1:].split(".")[0]
        fna_seq[temp] = index
        fna_path[index] = i
        index += 1
    dist = []
    for line in lines[1:]:
        dist.append(
            [np.array(list(map(float,
                               line.rstrip().split("\t")[1:])))])
    dist = np.concatenate(dist)

    # read initial clustering results. fna_mapping, from 1 for indexing
    f = open(cls_file, 'r')
    lines = f.readlines()
    f.close()
    fna_mapping = defaultdict(set)
    for line in lines:
        temp = line.rstrip().split("\t")
        for i in temp[2].split(","):
            fna_mapping[int(temp[0])].add(fna_seq[i])
    if (len(lines) == 1):
        tree = Tree()
        kmer_sta = defaultdict(int)
        T0 = Node(identifier=list(fna_mapping.keys())[0])
        tree.add_node(T0)
        kmer_sta = defaultdict(int)
        kmer_index_dict = bidict.bidict()
        kmer_index = 1
        alpha_ratio = 1
        Lv = set()
        for i in fna_mapping[T0.identifier]:
            for seq_record in SeqIO.parse(fna_path[i], "fasta"):
                temp = str(seq_record.seq)
                for k in range(0, len(temp) - ksize):
                    forward = temp[k:k + ksize]
                    reverse = seqpy.revcomp(forward)
                    for kmer in [forward, reverse]:
                        try:
                            kmer_sta[kmer_index_dict[kmer]] += 1
                        except KeyError:
                            kmer_index_dict[kmer] = kmer_index
                            kmer_sta[kmer_index] += 1
                            kmer_index += 1
        alpha = len(fna_mapping[T0.identifier]) * alpha_ratio
        for x in kmer_sta:
            if (kmer_sta[x] >= alpha):
                Lv.add(x)
        print(T0.identifier, len(Lv))
        # save2file
        kmerlist = set()
        pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb'))
        f = open(tree_dir + "/tree_structure.txt", "w")
        os.system("mkdir " + tree_dir + "/kmers")
        os.system("mkdir " + tree_dir + "/overlapping_info")
        f.write("%d\t" % T0.identifier)
        f.close()
        os.system(f'cp {cls_file} {tree_dir}/')
        f = open(tree_dir + "/reconstructed_nodes.txt", "w")
        f.close()
        if (len(Lv) > maxsize):
            Lv = set(random.sample(Lv, maxsize))
        kmerlist = Lv
        length = len(Lv)
        f = open(tree_dir + "/kmers/" + str(T0.identifier), "w")
        for j in Lv:
            f.write("%d " % j)
        f.close()
        f = open(tree_dir + "/node_length.txt", "w")
        f.write("%d\t%d\n" % (T0.identifier, length))
        kmer_mapping = {}
        index = 0
        f = open(tree_dir + "/kmer.fa", "w")
        for i in kmerlist:
            f.write(">1\n")
            f.write(kmer_index_dict.inv[i])
            kmer_mapping[i] = index
            index += 1
            f.write("\n")
        f.close()

        # change index
        files = os.listdir(tree_dir + "/kmers")
        for i in files:
            f = open(tree_dir + "/kmers/" + i, "r")
            lines = f.readlines()
            if (len(lines) == 0):
                continue
            d = lines[0].rstrip().split(" ")
            d = map(int, d)
            f = open(tree_dir + "/kmers/" + i, "w")
            for j in d:
                f.write("%d " % kmer_mapping[j])
            f.close()
        end = time.time()
        print(
            '- The total running time of tree-based indexing struture building is ',
            str(end - start), ' s\n')
        return
    # initially build tree
    cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
        fna_mapping, dist)

    # initially extract k-mers
    kmer_index_dict = bidict.bidict()
    kmer_index = 1
    Lv = defaultdict(set)
    spec = defaultdict(set)  # k-mers <= alpha
    leaves = tree.leaves()
    for i in leaves:
        kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize,
                                   kmer_index_dict, kmer_index, Lv, spec,
                                   tree_dir, alpha_ratio, i.identifier)
    end = time.time()
    print('- The total running time of k-mer extraction is ', str(end - start),
          ' s\n')
    start = time.time()

    # leaf nodes check
    recls_label = 0

    leaves_check = []
    check_waitlist = reversed(leaves)
    while (True):
        if (recls_label):
            cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
                fna_mapping, dist)
            leaves = tree.leaves()
            temp = {}
            temp2 = []
            for i in check_waitlist:
                if (i in fna_mapping):
                    temp2.append(i)
            check_waitlist = temp2.copy()
            for i in check_waitlist:
                temp[tree.get_node(i)] = depths[tree.get_node(i)]
            check_waitlist = []
            a = sorted(temp.items(), key=lambda x: x[1], reverse=True)
            for i in a:
                check_waitlist.append(i[0])
            for i in fna_mapping:
                if (i not in Lv):
                    kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize,
                                               kmer_index_dict, kmer_index, Lv,
                                               spec, tree_dir, alpha_ratio, i)
        higher_union = defaultdict(set)
        for i in check_waitlist:
            diff, diff_nodes = get_leaf_union(depths[i], higher_union,
                                              depths_mapping, Lv, spec, i)
            kmer_t = Lv[i.identifier] - diff
            for j in diff_nodes:
                kmer_t = kmer_t - Lv[j.identifier]
            for j in diff_nodes:
                kmer_t = kmer_t - spec[j.identifier]
            print(str(i.identifier) + " checking", end="\t")
            print(len(kmer_t))
            if (len(kmer_t) < minsize):
                leaves_check.append(i)
        if (len(leaves_check) > 0):
            recls_label = 1
        else:
            break
        # re-clustering
        check_waitlist = []
        while (recls_label == 1):
            cluster_id = max(list(fna_mapping.keys())) + 1
            check_waitlist.append(cluster_id)
            leaf_a = leaves_check[0].identifier
            row_index = mapping[leaf_a]
            column_index = cls_dist[row_index].argmax()
            leaf_b = mapping.inv[column_index]  # (leaf_a, leaf_b)
            temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b]
            print(cluster_id, leaf_a, leaf_b, temp2)
            del fna_mapping[leaf_a], fna_mapping[leaf_b]
            if (leaf_a in Lv):
                del Lv[leaf_a], spec[leaf_a]
            if (leaf_b in Lv):
                del Lv[leaf_b], spec[leaf_b]
            del leaves_check[0]
            if (tree.get_node(leaf_b) in leaves_check):
                leaves_check.remove(tree.get_node(leaf_b))
            temp1 = [
                np.concatenate([[cls_dist[row_index]],
                                [cls_dist[column_index]]]).max(axis=0)
            ]
            cls_dist = np.concatenate([cls_dist, temp1], axis=0)
            temp1 = np.append(temp1, -1)
            temp1 = np.vstack(temp1)
            cls_dist = np.concatenate([cls_dist, temp1], axis=1)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1)
            # change mapping
            del mapping[leaf_a], mapping[leaf_b]
            pending = list(fna_mapping.keys())
            pending.sort()
            for i in pending:
                if (mapping[i] > min([row_index, column_index])
                        and mapping[i] < max([row_index, column_index])):
                    mapping[i] -= 1
                elif (mapping[i] > max([row_index, column_index])):
                    mapping[i] -= 2
            fna_mapping[cluster_id] = temp2
            mapping[cluster_id] = len(cls_dist) - 1
            if (len(leaves_check) == 0):
                break
    del higher_union

    # rebuild identifiers
    all_nodes = tree.all_nodes()
    all_leaves_id = set([])
    leaves = set(tree.leaves())
    for i in leaves:
        all_leaves_id.add(i.identifier)
    id_mapping = bidict.bidict()
    index = 1
    index_internal = len(leaves) + 1
    for i in all_nodes:
        if (recls_label == 0):
            id_mapping[i.identifier] = i.identifier
        elif (i in leaves):
            id_mapping[i.identifier] = index
            index += 1
        else:
            id_mapping[i.identifier] = index_internal
            index_internal += 1
    leaves_identifier = list(range(1, len(leaves) + 1))
    all_identifier = list(id_mapping.values())
    all_identifier.sort()

    # save2file
    f = open(tree_dir + "/tree_structure.txt", "w")
    os.system("mkdir " + tree_dir + "/kmers")
    os.system("mkdir " + tree_dir + "/overlapping_info")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t" % id_mapping[i])
        if (i == all_nodes[0].identifier):
            f.write("N\t")
        else:
            f.write("%d\t" % id_mapping[tree.parent(i).identifier])
        if (nn in leaves_identifier):
            f.write("N\t")
        else:
            [child_a, child_b] = tree.children(i)
            f.write("%d %d\t" % (id_mapping[child_a.identifier],
                                 id_mapping[child_b.identifier]))
        if (len(fna_mapping[i]) == 1):
            temp = list(fna_mapping[i])[0]
            temp = fna_seq.inv[temp]
            f.write("%s" % temp)
        f.write("\n")
    f.close()
    f = open(tree_dir + "/hclsMap_95_recls.txt", "w")
    for nn in leaves_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\t" % (nn, len(fna_mapping[i])))
        temp1 = list(fna_mapping[i])
        for j in temp1:
            temp = fna_seq.inv[j]
            if (j == temp1[-1]):
                f.write("%s\n" % temp)
            else:
                f.write("%s," % temp)
    f.close()
    end = time.time()
    print('- The total running time of re-clustering is ', str(end - start),
          ' s\n')
    start = time.time()

    # build indexing structure
    kmerlist = set([])  # all kmers used
    length = {}
    overload_label = 0
    if (len(tree.leaves()) > max_cls_size):
        overload_label = 1
    # from bottom to top (unique k-mers)
    uniq_temp = defaultdict(set)
    rebuilt_nodes = []
    descendant = defaultdict(set)  # including itself
    ancestor = defaultdict(set)
    descendant_leaves = defaultdict(set)
    ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier)
    for i in all_nodes[1:]:
        ancestor[i.identifier] = ancestor[tree.parent(
            i.identifier).identifier].copy()
        ancestor[i.identifier].add(i.identifier)
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer removing...")
        if (i in leaves):
            uniq_temp[i.identifier] = Lv[i.identifier]
            descendant_leaves[i.identifier].add(i.identifier)
        else:
            (child_a, child_b) = tree.children(i.identifier)
            descendant[i.identifier] = descendant[
                child_a.identifier] | descendant[child_b.identifier]
            descendant_leaves[i.identifier] = descendant_leaves[
                child_a.identifier] | descendant_leaves[child_b.identifier]
            uniq_temp[i.identifier] = uniq_temp[
                child_a.identifier] & uniq_temp[child_b.identifier]
            uniq_temp[child_a.identifier] = uniq_temp[
                child_a.identifier] - uniq_temp[i.identifier]
            uniq_temp[child_b.identifier] = uniq_temp[
                child_b.identifier] - uniq_temp[i.identifier]
        descendant[i.identifier].add(i.identifier)
    all_nodes_id = set(id_mapping.keys())
    # remove overlapping
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer set building...")
        # no difference with sibling, subtree and ancestors
        if (i == all_nodes[0]):
            kmer_t = uniq_temp[i.identifier]
        else:
            diff = {}
            temp = all_nodes_id - descendant[i.identifier] - set([
                tree.siblings(i.identifier)[0].identifier
            ]) - ancestor[i.identifier]
            for j in temp:
                diff[j] = len(uniq_temp[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            kmer_t = uniq_temp[i.identifier]
            for j in a:
                k = j[0]
                kmer_t = kmer_t - uniq_temp[k]
            # remove special k-mers
            temp = all_leaves_id - descendant_leaves[i.identifier]
            diff = {}
            for j in temp:
                diff[j] = len(spec[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            for j in a:
                k = j[0]
                kmer_t = kmer_t - spec[k]
        if (len(kmer_t) < minsize and overload_label == 0):
            rebuilt_nodes.append(i)
            print("%d waiting for reconstruction..." %
                  id_mapping[i.identifier])
        else:
            if (len(kmer_t) > maxsize):
                kmer_t = set(random.sample(kmer_t, maxsize))
            f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
            for j in kmer_t:
                f.write("%d " % j)
            f.close()
            length[i] = len(kmer_t)
            kmerlist = kmerlist | kmer_t
    del uniq_temp

    # rebuild nodes
    overlapping = defaultdict(dict)
    intersection = defaultdict(set)
    higher_union = defaultdict(set)
    del_label = {}
    for i in leaves:
        del_label[i.identifier] = [0, 0]
    for i in rebuilt_nodes:
        print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...")
        kmer_t = get_intersect(intersection, descendant_leaves[i.identifier],
                               Lv, del_label, i.identifier)
        diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i,
                        Lv, spec, del_label)
        for j in diff:
            kmer_t = kmer_t - j
        lower_leaves = set([])
        for j in leaves:
            if (depths[j] < depths[i]):
                lower_leaves.add(j)
        if (len(kmer_t) > maxsize):
            kmer_overlapping_sta = defaultdict(int)
            for j in lower_leaves:
                kmer_o = Lv[j.identifier] & kmer_t
                for k in kmer_o:
                    kmer_overlapping_sta[k] += 1
            temp = sorted(kmer_overlapping_sta.items(),
                          key=lambda kv: (kv[1], kv[0]))
            kmer_t = set([])
            for j in range(0, maxsize):
                kmer_t.add(temp[j][0])
        nkmer = {}
        f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
        index = 0
        for j in kmer_t:
            f.write("%d " % j)
            nkmer[j] = index
            index += 1
        length[i] = len(kmer_t)
        kmerlist = kmerlist | kmer_t
        # save overlapping info
        for j in lower_leaves:
            temp = Lv[j.identifier] & kmer_t
            if (len(temp) > 0):
                ii = id_mapping[i.identifier]
                jj = id_mapping[j.identifier]
                overlapping[jj][ii] = set([])
                for k in temp:
                    overlapping[jj][ii].add(nkmer[k])
        delete(Lv, spec, del_label)

    for i in overlapping:
        f = open(tree_dir + "/overlapping_info/" + str(i), "w")
        f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w")
        count = -1
        for j in overlapping[i]:
            if (len(overlapping[i]) != 0):
                f.write("%d\n" % j)
                for k in overlapping[i][j]:
                    f.write("%d " % k)
                f.write("\n")
                count += 2
                f1.write("%d %d\n" % (j, count))
        f.close()
        f1.close()

    # final saving
    f = open(tree_dir + "/reconstructed_nodes.txt", "w")
    for i in rebuilt_nodes:
        f.write("%d\n" % id_mapping[i.identifier])
    f.close()

    f = open(tree_dir + "/node_length.txt", "w")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\n" % (nn, length[tree[i]]))
    f.close()

    kmer_mapping = {}
    index = 0
    f = open(tree_dir + "/kmer.fa", "w")
    for i in kmerlist:
        f.write(">1\n")
        f.write(kmer_index_dict.inv[i])
        kmer_mapping[i] = index
        index += 1
        f.write("\n")
    f.close()

    # change index
    files = os.listdir(tree_dir + "/kmers")
    for i in files:
        f = open(tree_dir + "/kmers/" + i, "r")
        lines = f.readlines()
        if (len(lines) == 0):
            continue
        d = lines[0].rstrip().split(" ")
        d = map(int, d)
        f = open(tree_dir + "/kmers/" + i, "w")
        for j in d:
            f.write("%d " % kmer_mapping[j])
        f.close()

    end = time.time()
    print(
        '- The total running time of tree-based indexing struture building is ',
        str(end - start), ' s\n')
Example #21
0
class GradientBoostingTree(object):
    def __init__(self,
                 X,
                 y,
                 k=4,
                 epochs=200,
                 loss='mse',
                 metrics=['mae'],
                 learning_rate=0.001,
                 layers=3,
                 ending_units=256,
                 optimizers=[Adam, Nadam, RMSprop],
                 early_stop=None,
                 seed=None):
        self.X = X
        self.y = y
        self.k = k
        self.epochs = epochs
        self.loss = loss
        self.metrics = metrics
        self.learning_rate = learning_rate
        self.layers = layers
        self.ending_units = ending_units
        self.optimizers = optimizers
        self.early_stop = early_stop
        self.generator = secrets.SystemRandom(seed)
        self.models = []
        self.tree = Tree()

    def fit(self):
        y = self.y
        preds = np.zeros(len(y))

        def fit_leaf(preds, y, i):
            optimizer = self.optimizers[self.generator.randint(
                0,
                len(self.optimizers) - 1)](learning_rate=self.learning_rate)
            model = network_builder(layers=self.layers,
                                    ending_units=self.ending_units)
            model.compile(loss=self.loss,
                          optimizer=optimizer,
                          metrics=self.metrics)
            model.fit(self.X,
                      y,
                      epochs=self.epochs * (i + 1),
                      callbacks=[self.early_stop] if self.early_stop else [])
            new_preds = preds + model.predict(self.X).reshape(-1)
            new_y = self.y - new_preds

            return new_preds, new_y, model

        def fit_tree(k, preds, y, i, parent=None):
            if k > 0:
                l_preds, l_y, l_model = fit_leaf(preds, y, i)
                self.tree.add_node(Node(identifier=parent + 'l' + str(k),
                                        data=l_model),
                                   parent=parent)
                fit_tree(k - 1, l_preds, l_y, i + 1, parent + 'l' + str(k))

                r_preds, r_y, r_model = fit_leaf(preds, y, i)
                self.tree.add_node(Node(identifier=parent + 'r' + str(k),
                                        data=r_model),
                                   parent=parent)
                fit_tree(k - 1, r_preds, r_y, i + 1, parent + 'r' + str(k))

        i = 0
        preds, y, root_model = fit_leaf(preds, y, i)
        self.tree.add_node(Node(identifier='root', data=root_model))
        fit_tree(self.k, preds, y, i + 1, parent='root')

    def predict(self, X):
        preds = []
        leafs = self.tree.leaves('root')
        for path in self.tree.paths_to_leaves():
            p = .0
            for id in path:
                p += self.tree.get_node(id).data.predict(X)
            preds.append(p)

        return sum(preds) / len(leafs)
def qmaxrtc(q, leaf_set, triplets):
    # first we need to construct a full binary tree
    # assume q=2^k-1 for k>1. So we want to construct a complete binary tree of depth
    # k-1 and this will yield a tree of size q
    depth_tree = int(math.log(q + 1, 2)) - 1
    node_ids = [
        -(i + 1) for i in range(0, q)
    ]  # ensure our labeling set is negative to be disjoint with leaf_set
    new_tr = Tree()
    new_tr.create_node("n0", 0)
    create_binary_tree(new_tr, 0, node_ids, depth_tree, 0)
    tree_leaf_set = [n.identifier for n in new_tr.leaves()]

    num_pairs_diff_subtree = {}
    num_pairs_same_tree = {}

    num_pairs_diff_subtree[0] = 0
    num_pairs_same_tree[0] = 0

    computer_diff_pairs_subtree(new_tr, 0, 0, num_pairs_diff_subtree,
                                num_pairs_same_tree, q)

    # our goal is to asssign each leaf in leaf_set to an element of tree_leaf_set
    assignments = {}
    for leaf in leaf_set:
        assignments[leaf] = []

    prev = (1 / 3 - 4 / 3 * (q + 1) * (q + 1)) * len(triplets)

    # assign each leaf
    for leaf in leaf_set:
        leaf_triplets = []
        # to compute all triplets xy|z that l is a part of
        for t in triplets:
            if t == leaf:
                for f_set in triplets[t]:
                    leaf_triplets.append((f_set, leaf))
            else:
                for f_set in triplets[t]:
                    for elm in f_set:
                        if elm == leaf:
                            leaf_triplets.append((f_set, t))

        vals = {}
        # initialization
        # vals[potential_parent] will be E[W|assignments, leaf assigned to potential_parent] at the end
        for potential_parent in tree_leaf_set:
            vals[potential_parent] = prev

        for (f_set, z) in leaf_triplets:
            x, y = f_set
            # consider every possible assignment of leaf to a node in tree_leaf_set
            # base case is leaf is unassaigned
            assignments[leaf] = []
            for potential_parent in tree_leaf_set:
                prob = prq(assignments[x],
                           assignments[y], assignments[z], new_tr,
                           len(tree_leaf_set), q, num_pairs_diff_subtree,
                           num_pairs_same_tree)
                # print("prob")
                # print(prob)
                vals[potential_parent] -= prob

            for potential_parent in tree_leaf_set:
                assignments[leaf] = [potential_parent]
                prob = prq(assignments[x],
                           assignments[y], assignments[z], new_tr,
                           len(tree_leaf_set), q, num_pairs_diff_subtree,
                           num_pairs_same_tree)
                # print("prob2")
                # print(prob)
                vals[potential_parent] += prob
        # now we want to compute max expectation over all possible
        # assignments of leaf to a parent
        max_val = -1
        max_parent = 0
        for potential_parent in tree_leaf_set:
            # print(vals[potential_parent])
            if vals[potential_parent] > max_val:
                max_val = vals[potential_parent]
                max_parent = potential_parent
        assignments[leaf] = [max_parent]
        prev = max_val
    # now we want to assign every leaf to its respective parent
    for leaf in leaf_set:
        parent_leaf = assignments[leaf][0]
        # print("assignemnt")
        # print(parent_leaf)
        new_tr.create_node("l" + str(leaf), leaf, parent=parent_leaf)

    return new_tr
def fov_connect(fov_ins_array):
    def parent(edges, i):
        coords = np.where( edges == i )
        edge = edges[ coords[0][0] ]
        if edge[0] == i:
            return edge[1] + 1
        return edge[0] + 1
     
    skels = kimimaro.skeletonize(
        fov_ins_array, 
        teasar_params={
            'scale': 4,
            'const': 500, # physical units
            'pdrf_exponent': 4,
            'pdrf_scale': 100000,
            'soma_detection_threshold': 1100, # physical units
            'soma_acceptance_threshold': 3500, # physical units
            'soma_invalidation_scale': 1.0,
            'soma_invalidation_const': 300, # physical units
            'max_paths': None, # default None
                },
            dust_threshold=50,
            anisotropy=(200,200,1000), # default True
            fix_branching=True, # default True
            fix_borders=True, # default True
            progress=True, # default False
            parallel=2, # <= 0 all cpu, 1 single process, 2+ multiprocess
            )
    ends_dict = {}

    fov_ins_skel_array = np.zeros_like(fov_ins_array)
    ends_array = np.zeros_like(fov_ins_array)
    for label_ in skels:
        skel = skels[label_]

        coords = (skel.vertices / np.array([200, 200, 1000])).astype(int)
        fov_ins_skel_array[coords[:, 0], coords[:, 1], coords[:, 2]] = label_

        coords = coords.tolist()
        edges = skel.edges.tolist()

        ftree = Tree()
        cur_ = edges[0][0]
        ftree.create_node(cur_, cur_, data = coords[0])

        cur_list = [cur_]

        while(len(edges) > 0 and len(cur_list) > 0):
            _cur_list = []
            edges_ = edges[:]
            #print(cur_list)
            for cur_ in cur_list:
                next_inds = np.where(np.array(edges_) == cur_)[0]
                if len(next_inds) == 0:continue
                for next_ind in next_inds:
                    edge_ = edges_[next_ind]
                    edges.remove(edge_)
                    #print(cur_, edge_)

                    if edge_[0] == cur_:
                        next_ = edge_[-1]
                    else:
                        next_ = edge_[0]

                    _cur_list.append(next_)
                    ftree.create_node(next_, next_, data = coords[next_], parent = cur_)
                edges_ = edges[:]

            cur_list = _cur_list

        ends = [x.data for x in ftree.leaves()]
        ends.append(coords[0])

        ends_dict[label_] = ends
        
        ends_ = np.array(ends)
        ends_array[ends_[:, 0], ends_[:, 1], ends_[:, 2]] = 1
        #ends_array = dilation(ends_array, ball(1))

    return fov_ins_skel_array, ends_array, ends_dict 
Example #24
0
class TreeT(object):
    def __init__(self, max_id=0):
        self.tree = Tree()

    def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None):
        # starts by ['(', 'pos']
        pos_tag = line[1]
        if parent_id is None:
            pos_id = 0
        else:
            pos_id = max_id
            max_id += 1

        self.tree.create_node(pos_tag, pos_id, parent_id, TreeData())

        parent_id = pos_id
        total_offset = 2

        if line[2] != '(':
            # sub-tree is leaf
            # line[0:3] = ['(', 'pos', 'word', ')']
            word_tag = line[2]
            self.tree.create_node(word_tag, leaf_id, parent_id, TreeData())
            return 4, max_id, leaf_id + 1

        line = line[2:]

        while line[0] != ')':
            offset, max_id, leaf_id = self.from_ptb_to_tree(
                line, max_id, leaf_id, parent_id)
            total_offset += offset
            line = line[offset:]

        return total_offset + 1, max_id, leaf_id

    def add_height(self, tree_dep):

        for n in self.tree.all_nodes():
            n.data.leaves = []

        for leaf in self.tree.leaves():
            lid = leaf.identifier
            hid = tree_dep[lid]
            if hid == self.tree.root:
                self.tree[lid].data.height = self.tree.depth(self.tree[lid])
                for cid in [
                        p for p in self.tree.paths_to_leaves() if lid in p
                ][0]:
                    self.tree[cid].data.leaves += [lid]
            else:
                height = -1
                cid = lid
                cond = True
                while cond:
                    self.tree[cid].data.leaves += [lid]
                    height += 1
                    cid = self.tree.parent(cid).identifier
                    cid_leaves = [l.identifier for l in self.tree.leaves(cid)]
                    cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid]
                    cond = set(cid_l_dep).issubset(set(cid_leaves))
                self.tree[lid].data.height = height

        x_nodes = [
            n.identifier for n in self.tree.all_nodes() if n.data.leaves == []
        ]
        for x_node in x_nodes[::-1]:
            min_id = min(self.tree.children(x_node),
                         key=lambda c: c.data.height)
            _lid = min_id.data.leaves[0]
            self.tree[_lid].data.height += 1
            self.tree[x_node].data.leaves += [_lid]

        return True

    def _from_tree_to_ptb(self, nid):
        nid = self.tree.subtree(nid).root
        if self.tree[nid].is_leaf():
            return ' (' + self.tree[nid].tag + ' ' + self.tree[
                nid].data.word + ')'

        res = ' (' + self.tree[nid].tag

        for c_nid in sorted(self.tree.children(nid),
                            key=lambda x: x.identifier):
            res += self._from_tree_to_ptb(c_nid.identifier)

        return res + ')'

    def from_tree_to_ptb(self):
        return self._from_tree_to_ptb(self.tree.root)

    def from_tag_to_tree(self, tag, word, pos_id=0):
        parent_id = None
        for tag_nodes in tag:
            if tag_nodes[0] in [CL, CR]:
                c_side = tag_nodes[0]
                _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else ['']
            else:
                c_side = ''
                _tag_nodes = tag_nodes
            self.tree.create_node(_tag_nodes[0],
                                  pos_id,
                                  parent=parent_id,
                                  data=TreeData(comb_side=c_side))

            parent_id = pos_id
            pos_id += 1
            for tag_node in _tag_nodes[1:]:
                self.tree.create_node(tag_node[1:],
                                      pos_id,
                                      parent=parent_id,
                                      data=TreeData(miss_side=tag_node[0]))
                pos_id += 1
        for l in self.tree.leaves():
            if l.data.miss_side == '':
                l.data.word = word
                break
        return pos_id

    @memoize
    def is_combine_to(self, side):
        return self.tree[self.tree.root].data.comb_side == side

    @memoize
    def is_combine_right(self):
        return self.is_combine_to(CR)

    @memoize
    def is_combine_left(self):
        return self.is_combine_to(CL)

    @memoize
    def is_complete_tree(self):
        return all([n.data.miss_side == '' for n in self.tree.all_nodes()])

    @memoize
    def get_missing_leaves_to(self, miss_val, side):
        return [
            l.identifier for l in self.tree.leaves(self.tree.root)
            if l.data.miss_side == side and l.tag == miss_val
        ]

    @memoize
    def get_missing_leaves_left(self, miss_val):
        return self.get_missing_leaves_to(miss_val, L)

    @memoize
    def get_missing_leaves_right(self, miss_val):
        return self.get_missing_leaves_to(miss_val, R)

    @memoize
    def root_tag(self):
        return self.tree[self.tree.root].tag

    @memoize
    def is_no_missing_leaves(self):
        return all(
            [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)])

    @memoize
    def combine_tree(self, _tree, comb_leaf):
        self.tree.paste(comb_leaf, _tree.tree)
        self.tree.link_past_node(comb_leaf)
        return self

    def tree_to_path(self, nid, path):

        # Stop condition
        if self.tree[nid].is_leaf():
            path[nid] = []
            return nid, self.tree[nid].data.height

        # Recursion
        flag = CR
        for child in self.tree.children(nid):
            cid = child.identifier
            leaf_id, height = self.tree_to_path(cid, path)

            if (height == 0):
                # Reached end of path can add flag
                path[leaf_id].insert(0, flag)
                # path[leaf_id].append(flag)

            if height > 0:
                path[leaf_id].insert(0, nid)
                # only single child will have height>0
                # and its value will be the one that is returned
                # to the parent
                ret_leaf_id, ret_height = leaf_id, height - 1

                # once we reached a height>0, it means that
                # this path includes the parent, and thus flag
                # direction should flip
                flag = CL

        return ret_leaf_id, ret_height

    def path_to_tags(self, path):
        tags = []
        for p in path:
            _res = []
            _p = copy.copy(p)
            if _p[0] in [CL, CR]:
                _res.append(_p[0])
                _p = _p[1:]
            while _p[:-1]:
                el_p = _p.pop(0)
                _res.append(self.tree[el_p].tag)
                for c in self.tree.children(el_p):
                    if c.identifier != _p[0]:
                        _res.append(R + c.tag if c.identifier > _p[0] else L +
                                    c.tag)
            _res.append(self.tree[_p[0]].tag)
            tags.append(_res)
        return tags

    def path_to_words(self, path):
        return [self.tree[k].tag for k in path]

    def from_tree_to_tag(self):
        path = {}
        self.tree_to_path(self.tree.root, path)
        return {
            'tags': self.path_to_tags(path.values()),
            'words': self.path_to_words(path.keys())
        }

    def from_ptb_to_tag(self, line, max_id, depend):
        self.from_ptb_to_tree(line, max_id)
        self.add_height(depend)
        path = {}
        self.tree_to_path(self.tree.root, path)
        return self.path_to_tags(path.values())
Example #25
0
def change(tree):
    nidInternal = nidValid(tree)
    choices = [getChoice(tree, n) for n in nidInternal]
    n_choices = map(lambda L: sum([len(i) for i in L]), choices)
    choiceDic = {
        a: b
        for (a, b, c) in zip(nidInternal, choices, n_choices) if c > 1
    }
    choices1 = list(choiceDic.keys())
    nid = random.choice(choices1)
    p = tree[nid].data.shape[1]
    x0 = tree[nid].var
    s0 = tree[nid].split
    choices = choiceDic[nid]  # choose nid to split
    if s0 in choices[x0 - 1]:
        choices[x0 - 1].remove(s0)  # remove original split option
    choices2 = [i for i in range(p - 1)
                if len(choices[i]) > 0]  # choose var to split
    x = random.choice(choices2)
    choices3 = choices[x]  # choose value to split
    x += 1
    s = random.choice(choices3)
    tree1 = Tree(tree, deep=True)
    pid = tree1[nid].bpointer
    sub = tree1.remove_subtree(nid)
    tags = recurTag(sub, nid)
    tags[0] = (nid, x, s)
    try:
        sub1 = genTree(sub[nid], tags)
    except IndexError:
        print(f'{mi} change {t}: {tags[0]}; unchangable')
        return tree
    if pid is not None:
        tree1.paste(pid, sub1)
        tree1[pid].fpointer = sorted(tree1[pid].fpointer)
    else:
        tree1 = sub1
    nidInternal1 = set(nidValid(tree1))
    choices1 = set(choices1)
    choices11 = nidInternal1.intersection(choices1)
    extra = nidInternal1 - choices1
    n_choices = map(lambda L: sum([len(i) for i in L]),
                    [getChoice(tree1, n) for n in extra])
    choices11 = list(choices11) + [
        a for (a, b) in zip(extra, n_choices) if b > 1
    ]
    choices31 = getChoice(tree1, nid, x0)[x0 - 1]
    n31 = len(choices31)
    if (sub1[nid].var == sub[nid].var) and (s0 in choices31):
        n31 -= 1
    rTransit = len(choices1) * len(choices3) / (len(choices11) * n31)
    rLike = get_like_ratio(tree.R, sub.leaves(), sub1.leaves())
    rStruct = get_struct(sub.all_nodes_itr(), sub1.all_nodes_itr())
    r = rLike * rTransit * rStruct
    print(f'{mi} change {t}: {tags[0]}; r={r.round(4)}')
    if random.uniform(0, 1) < r:
        tree1.w2 = tree.w2
        tree1.R = tree.R
        tree1.leaf = [n.identifier for n in tree1.leaves() if len(n.xvar) > 0]
        tree1.show()
        return tree1
    return tree
Example #26
0

#for i in range(m):
#    for l in trees[i].leaves():
#        idx = l.data.index
#        MM.loc[idx,i] = mumu
#%%
Like = zeros(T)
Rmse = zeros(T)
# = zeros(T)
for t in range(T):
    var_ratio = var / var_mu
    for mi in range(m):
        trees[mi].R = y - (MM.sum(axis=1) - MM.iloc[:, mi])
        tree = drawTree(trees[mi])
        g = any(map(drawM, tree.leaves()))
        #for l in tree.leaves(): drawM(l)
        trees[mi] = tree
        tdic[t * m + mi] = tree2dic(tree)
    yhat = MM.sum(axis=1).values
    Yhat[:, t] = yhat
    e = (y - yhat).values
    sse = e @ e
    Rmse[t] = sqrt(sse / n0)
    Like[t] = log(norm.pdf(e, scale=sqrt(var))).sum()
    b = ig2 + 0.5 * sse
    lamda = np.random.gamma(a, 1 / b)
    var = 1 / lamda
    Depth_mu[t] = array([tr.depth() for tr in trees]).mean()

yhat = Yhat[:, burn:].mean(axis=1)
    leaf = partition.split(vertex, partidx)
    subTree.create_node(leaf.PaintedVertices, leaf)
    if leaf.isatomic():
        return subTree

    # recurse onto children nodes to build partition tree depth first
    for v in leaf.Parts[leaf.nextsplitting()]:
        subTree.paste(leaf, branch(leaf, v, leaf.nextsplitting()))

    return subTree


from treelib import Node, Tree
tree = Tree()
tree.create_node(P0.PaintedVertices, P0)  # root node

if not P0.isatomic():
    for v in P0.Parts[P0.nextsplitting()]:
        tree.paste(P0, branch(P0, v, P0.nextsplitting()))

tree.show()
for node in tree.leaves():
    # print(node.identifier.permutation())
    P = node.identifier
    sG = P.applyautomorphism()
    print(lexifyedges(sG))

# P1 = tree.leaves()[0].identifier
# p = P1.permutation()
# G1 = P1.applyautomorphism()
Example #28
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #29
0
class DependencyReader:
    """DependencyReader object"""

    def __init__(self):
        self.tempDirectoryPath = mkdtemp(dir=".")
        self.tree = Tree()
        self.dependencies = {}
        self.graphRelationships = []

    def getPom(self, pomPath):
        shutil.copy(pomPath, self.tempDirectoryPath)
        os.chdir(self.tempDirectoryPath)

    def getDependencies(self):
        mavenTreeOutput = subprocess.Popen('mvn org.apache.maven.plugins:maven-dependency-plugin:RELEASE:tree -DoutputType=tgf', stdout=subprocess.PIPE, shell=True)

        while True:
            line = mavenTreeOutput.stdout.readline().rstrip()

            if not line or re.search(r"BUILD SUCCESS", line):
                break

            match = re.match(r"\[INFO\]\s(\d*)\s*(.*):(.*):(\w+):([0-9\.]*)", line)

            if match:
                if not match.group(1) in self.dependencies.keys():
                    self.dependencies[match.group(1)] = DependencyNode(match.group(2), match.group(3), match.group(5), match.group(1))

                if not self.tree.leaves():
                    self.tree.create_node(match.group(1), match.group(1), data=self.dependencies[match.group(1)])

                self.dependencies[match.group(1)].get('jar', self.tempDirectoryPath)

            match = re.match(r"\[INFO\]\s(\d*)\s(\d*)", line)

            if match and match.group(2):
                self.graphRelationships.append((match.group(1), match.group(2)))

    def relateDependencies(self):
        while self.graphRelationships:
            for item in self.graphRelationships:
                node = self.tree.get_node(item[0])

                if node is not None:
                    parent = self.dependencies[item[0]]
                    child = self.dependencies[item[1]]
                    self.tree.create_node(child.referenceId, child.referenceId, parent=parent.referenceId, data=child)
                    self.graphRelationships.remove(item)

    def scanDependencies(self):
        # Need to run on each package with oneshot to get identifiers
        # unless update dosocsv2 to create identifiers on scan
        # or fix up dosocsv2 to create identifiers on scan instead
        for node in self.tree.expand_tree(mode=Tree.DEPTH):
            treeNode = self.tree.get_node(node)
            subprocess.call('dosocs2 oneshot ' + treeNode.data.jarName, shell=True)

    def createRelationships(self):
        # Pass packages as relationships to new dosocsv2 command created
        self.recursiveRelationship(self.tree.root)

    def recursiveRelationship(self, parent):
        for node in self.tree.is_branch(parent):
            parentNode = self.tree.get_node(parent)
            childNode = self.tree.get_node(node)
            subprocess.call('dosocs2 packagerelate ' + parentNode.data.jarName + ' ' + childNode.data.jarName, shell=True)
            self.recursiveRelationship(node)

    def retrieve_dependencies(self, jarName):
        if jarName is None:
            root = self.tree.get_node(self.tree.root)
            root = root.data.jarName
        else:
            root = jarName

        tgfOutput = subprocess.Popen('dosocs2 dependencies ' + root, stdout=subprocess.PIPE, shell=True)
        count = 0
        tree = Tree()
        dependencies = []
        relationships = []
        while True:
            line = tgfOutput.stdout.readline()

            if not line:
                break

            match = re.match(r"(\d+) - (.*)", line)
            if match:
                if count == 0:
                    count = count + 1
                    tree.create_node(match.group(2), match.group(1))
                else:
                    dependencies.append((match.group(2), match.group(1)))

            match = re.match(r"(\d+) (\d+)", line)

            if match:
                relationships.append((match.group(1), match.group(2)))

        if not relationships:
            print("No child relationships for " + jarName)
            return None

        while relationships:
            for item in relationships:
                node = tree.get_node(item[0])

                if node is not None:
                    rel = [item for item in relationships if int(item[0]) == int(node.identifier)]
                    if rel is not None:
                        rel = rel[0]
                        dep = [item for item in dependencies if int(item[1]) == int(rel[1])]
                        if dep is not None:
                            dep = dep[0]
                            tree.create_node(dep[0], dep[1], parent=node.identifier)
                            relationships.remove(rel)
                            dependencies.remove(dep)

        tree.show()
        if jarName is None:
            os.chdir(os.pardir)
Example #30
0
class RIAC(AbstractTeacher):
    def __init__(self,
                 mins,
                 maxs,
                 seed,
                 env_reward_lb,
                 env_reward_ub,
                 max_region_size=200,
                 alp_window_size=None,
                 nb_split_attempts=50,
                 sampling_in_leaves_only=False,
                 min_region_size=None,
                 min_dims_range_ratio=1 / 6,
                 discard_ratio=1 / 4):

        AbstractTeacher.__init__(self, mins, maxs, env_reward_lb,
                                 env_reward_ub, seed)

        # Maximal number of (task, reward) pairs a region can hold before splitting
        self.maxlen = max_region_size

        self.alp_window = self.maxlen if alp_window_size is None else alp_window_size

        # Initialize Regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)]
        self.regions_alp = [0.]
        self.tree.create_node('root',
                              'root',
                              data=Region(maxlen=self.maxlen,
                                          r_t_pairs=[
                                              deque(maxlen=self.maxlen + 1),
                                              deque(maxlen=self.maxlen + 1)
                                          ],
                                          bounds=self.regions_bounds[-1],
                                          alp=self.regions_alp[-1]))
        self.nb_dims = len(mins)
        self.nb_split_attempts = nb_split_attempts

        # Whether task sampling uses parent and child regions (False) or only child regions (True)
        self.sampling_in_leaves_only = sampling_in_leaves_only

        # Additional tricks to original RIAC, enforcing splitting rules

        # 1 - Minimum population required for both children when splitting --> set to 1 to cancel
        self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size

        # 2 - minimum children region size (compared to initial range of each dimension)
        # Set min_dims_range_ratio to 1/np.inf to cancel
        self.dims_ranges = self.maxs - self.mins
        self.min_dims_range_ratio = min_dims_range_ratio

        # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region
        # If 1- and 2- are canceled, this will be canceled since any split will be valid
        self.discard_ratio = discard_ratio

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_alps = []
        self.update_nb = -1
        self.split_iterations = []

        self.hyperparams = locals()

    def compute_alp(self, sub_region):
        if len(sub_region[0]) > 2:
            cp_window = min(len(sub_region[0]),
                            self.alp_window)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        alp = np.abs(cp)
        return alp

    def split(self, nid):
        # Try nb_split_attempts splits on region corresponding to node <nid>
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]
            sub_reg2 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]

            # repeat until the two sub regions contain at least minlen of the mother region
            while len(sub_reg1[0]) < self.minlen or len(
                    sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = self.random_state.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True

                if np.any(bounds1.high - bounds1.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                sub_reg2 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                for i, task in enumerate(reg.r_t_pairs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.r_t_pairs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.r_t_pairs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute alp
            alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] -
                                                                 alp[1])
            if split_score >= best_split_score and valid_bounds:
                is_split = True
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            # add new nodes to tree
            for i, (r_t_pairs,
                    bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(identifier=self.tree.size(),
                                      parent=nid,
                                      data=Region(self.maxlen,
                                                  r_t_pairs=r_t_pairs,
                                                  bounds=bounds,
                                                  alp=alp[i]))
        else:
            assert len(reg.r_t_pairs[0]) == (self.maxlen + 1)
            reg.r_t_pairs[0] = deque(
                islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))
            reg.r_t_pairs[1] = deque(
                islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))

        return is_split

    def add_task_reward(self, node, task, reward):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task):  # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children:  # if task in region, task is in one sub-region
                self.add_task_reward(n, task, reward)

            need_split = reg.add(task, reward, children == [])  # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)

    def episodic_update(self, task, reward, is_success):
        self.update_nb += 1

        # Add new (task, reward) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_reward(
            root, task, reward)  # Will update self.nodes_to_split if needed
        assert len(self.nodes_to_split) <= 1

        # Split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])  # Execute the split
            if new_split:
                # Update list of regions_bounds
                if self.sampling_in_leaves_only:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.leaves()
                    ]
                else:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.all_nodes()
                    ]

        # Recompute ALPs of modified nodes
        for nid in self.nodes_to_recompute:
            node = self.tree.get_node(nid)
            reg = node.data
            reg.alp = self.compute_alp(reg.r_t_pairs)

        # Collect regions data (regions' ALP and regions' (task, reward) pairs)
        all_nodes = self.tree.all_nodes(
        ) if not self.sampling_in_leaves_only else self.tree.leaves()
        self.regions_alp = []
        self.r_t_pairs = []
        for n in all_nodes:
            self.regions_alp.append(n.data.alp)
            self.r_t_pairs.append(n.data.r_t_pairs)

        # Book-keeping
        if new_split:
            self.all_boxes.append(copy.copy(self.regions_bounds))
            self.all_alps.append(copy.copy(self.regions_alp))
            self.split_iterations.append(self.update_nb)
        assert len(self.regions_alp) == len(self.regions_bounds)

        return new_split, None

    def sample_random_task(self):
        return self.regions_bounds[0].sample()  # First region is root region

    def sample_task(self):
        mode = self.random_state.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.sample_random_task())
            else:
                self.sampled_tasks.append(
                    self.non_exploratory_task_sampling()["task"])

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.sample_random_task())

        else:  # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region
            region_id = proportional_choice(self.regions_alp,
                                            self.random_state,
                                            eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1].astype(np.float32)

    def non_exploratory_task_sampling(self):
        # 1 - Sample region proportionally to its ALP
        region_id = proportional_choice(self.regions_alp,
                                        self.random_state,
                                        eps=0.0)

        # 2 - Retrieve (task, reward) pair with lowest reward
        worst_task_idx = np.argmin(self.r_t_pairs[region_id][0])

        # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std)
        task = self.random_state.normal(
            self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1)
        # clip to stay within region (add small epsilon to avoid falling in multiple regions)
        task = np.clip(task, self.regions_bounds[region_id].low + 1e-5,
                       self.regions_bounds[region_id].high - 1e-5)
        return {
            "task": task,
            "infos": {
                "bk_index": len(self.all_boxes) - 1,
                "task_infos": region_id
            }
        }

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_alps'] = self.all_alps
        # dump_dict['riac_params'] = self.hyperparams
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
Example #31
0
                tree.create_node(key, key)
                added.add(key)
                node_dict.pop(key)
                break
    tree_list.append(tree)

for tree in tree_list:
    tree.save2file("Processed_Skeleton_Trees.txt")

#######################################################################################################################

# Identify end nodes (leaves):
leaf_list = []
for i in range(len(tree_list)):
    tree = tree_list[i]
    leaves = tree.leaves(nid=None)
    for leaf in leaves:
        leaf = leaf.identifier
        leaf_list.append(leaf)

# Identify paths to leaves:
paths_list = []
for i in range(len(tree_list)):
    tree = tree_list[i]
    paths = tree.paths_to_leaves()
    paths_list.append(paths)

# Identify branch points:
branch_list = list(set([x for x in source_list if source_list.count(x) > 1]))

# Remove somas from branch list: