Example #1
0
def render_vocabulary_word(tree: Tree, node: Node) -> str:
    assert tree.parent(node.identifier)
    assert tree.parent(node.identifier).identifier.lower() == 'vocabulary'
    assert node.data
    depth = tree.depth(node) + 1
    word = node.tag
    text = ('*' * depth + ' ' + word + 2 * '\n')
    if 'audio' in node.data and node.data['audio']:
        text += render_link(node.data['audio'], 'play') + '\n'
    if 'book_examples' in node.data:
        for example in node.data['book_examples']:
            text += render_quote(example)
        text += '\n'
    if 'definitions' in node.data:
        text += 'Definitions\n'
        for i, definition in enumerate(node.data['definitions']):
            text += str(i + 1) + '. ' + definition['definition']
            if 'synonyms' in definition:
                text += ' /'
                for n, synonym in enumerate(definition['synonyms']):
                    text += synonym + ', '
                    if n == 2:
                        break
                text = text[:-2]
                text += '/'
            text += '\n'
        text += '\n'
    return text
Example #2
0
    def configure_tree_topology(self, root, degree=2, remove=False):
        """Configures the cluster's network topology as a tree.

        The tree consists of the specified root node and the nodes,
        which build the subtrees. The childrens are incrementally chosen,
        in other words, sequentially as specified in the config file.

        Arguments:
            root {integer} -- The tree's root node.

        Keyword Arguments:
            degree {integer} -- The maximum number of children (default: {2})
            remove {boolean} -- Remove the configuration (default: {False})
        """

        self.logger.info("Configuring tree topology...")
        tree = Tree()
        root_node = self.topology.get_node(root)
        tree.create_node(root_node.name, root_node.node_id)
        parent_node = root
        for nodex in self.topology.nodes:
            if nodex.node_id == root_node.node_id:
                continue
            if len(tree.children(parent_node)) >= degree:
                if parent_node == root and root != 0:
                    parent_node = 0
                elif parent_node + 1 == root:
                    parent_node += 2
                else:
                    parent_node += 1
            tree.create_node(nodex.name, nodex.node_id, parent_node)

        self.logger.info("The following tree will be configured:")
        tree.show()

        for nodex in self.topology.nodes:
            self.logger.debug("%s:", nodex.name)
            subtree = tree.subtree(nodex.node_id)
            for nodey in self.topology.nodes:
                if nodex.node_id == nodey.node_id:
                    continue
                if subtree.contains(nodey.node_id):
                    children = tree.children(nodex.node_id)
                    for child in children:
                        if (child.identifier == nodey.node_id
                                or tree.is_ancestor(child.identifier,
                                                    nodey.node_id)):
                            nodex.add_forwarding(
                                nodey,
                                self.topology.get_node(child.identifier))
                            break
                elif tree.parent(nodex.node_id) != None:
                    nodex.add_forwarding(
                        nodey,
                        self.topology.get_node(
                            tree.parent(nodex.node_id).identifier))

        if not self.testing:
            self.topology.send_forwarding_tables(remove)
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}
        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) == 2
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            child_1 = self._node_to_subprog[children[0].data.span]
            child_2 = self._node_to_subprog[children[1].data.span]
            try:
                if child_1 and not child_2:  # child_2=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_1
                elif not child_1 and child_2:  # child_1=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_2
                elif not child_1 and not child_2:  # Both children are assigned with 'NO_LABEL'
                    self._node_to_subprog[node.data.span] = self._node_to_type(
                        node)  # ignore children and propagate parent
                else:
                    assert child_2.is_full(
                    )  # make sure child_2 value can be formed
                    self._node_to_subprog[node.data.span] = child_1.apply(
                        child_2)
            except Exception as e:
                try:
                    self._node_to_subprog[node.data.span] = child_2.apply(
                        child_1)
                except Exception as e:
                    raise Exception('final apply_exception: {}'.format(e))

            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return inner_program
def get_lca(T: tl.Tree, x: int, y: int) -> int:
    # First, get to the same level
    while T.level(x) > T.level(y):
        x = T.parent(x).identifier
    while T.level(x) < T.level(y):
        y = T.parent(y).identifier

    # Then, increment both until it's the same node.
    while x != y:
        x = T.parent(x).identifier
        y = T.parent(y).identifier

    # now, this is the LCA.
    return x
Example #5
0
 def test_modify_node_identifier_root(self):
     tree = Tree()
     tree.create_node("Harry", "harry")
     tree.create_node("Jane", "jane", parent="harry")
     tree.update_node(tree['harry'].identifier, identifier='xyz', tag='XYZ')
     self.assertTrue(tree.root == 'xyz')
     self.assertTrue(tree['xyz'].tag == 'XYZ')
     self.assertEqual(tree.parent('jane').identifier, 'xyz')
Example #6
0
def get_path_to_santa(orbital_tree: Tree) -> list:

	path_to_santa = []
	current_node = orbital_tree.parent('YOU')
	traversal_complete = False

	while not traversal_complete:
		if orbital_tree.subtree(current_node.identifier).contains('SAN'):
			for path in orbital_tree.subtree(current_node.identifier).paths_to_leaves():
				if 'SAN' in path:
					path_to_santa += path[:-1]
					traversal_complete = True
		else:
			path_to_santa.append(current_node.identifier)
			current_node = orbital_tree.parent(current_node.identifier)

	return path_to_santa
Example #7
0
 def test_subtree(self):
     subtree_copy = Tree(self.tree.subtree("jane"), deep=True)
     self.assertEqual(subtree_copy.parent("jane") is None, True)
     subtree_copy["jane"].tag = "Sweeti"
     self.assertEqual(self.tree["jane"].tag == "Jane", True)
     self.assertEqual(subtree_copy.level("diane"), 1)
     self.assertEqual(subtree_copy.level("jane"), 0)
     self.assertEqual(self.tree.level("jane"), 1)
Example #8
0
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}

        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) in [2, 3]
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            if len(children) == 2:
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_1, child_2, node)
            else:
                children.sort(key=lambda c: c.data.span[0])
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                child_3 = self._node_to_subprog[children[2].data.span]
                intermediate = self.merge_children(child_1, child_3, node)
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_2, intermediate, node)
            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return 'answer ( {} )'.format(inner_program)
Example #9
0
def render_org_tree(tree: Tree, node: Node, payload='') -> str:
    parent = tree.parent(node.identifier)
    if parent and parent.identifier.lower() == 'vocabulary':
        payload += render_vocabulary_word(tree, node)
    else:
        depth = tree.depth(node) + 1
        payload += ('*' * depth + ' ' + node.tag + '\n')
    for child in tree.children(node.identifier):
        payload += render_org_tree(tree, tree[child.identifier])
    return payload
Example #10
0
    def __init__(self, holes=0):
        self.data = np.zeros((3, 3, 3, 3), dtype='int')

        element = range(3)
        order = direct_product(element, element, element, element)

        i = 0
        genTree = Tree()
        root = Node(i, 'root', data=[order[0], self.data.copy()])
        genTree.add_node(root)
        currentNode = root
        getData = lambda node: node.data[1][tuple(node.data[0])]
        while i < len(order):
            i += 1
            a, b, c, d = order[i - 1]
            numPool = pool(self.data, a, b, c, d) - set(
                map(getData, genTree.children(currentNode.identifier)))
            if numPool:
                self.data[a, b, c, d] = np.random.choice(list(numPool))
                node = Node(i, data=[order[i - 1], self.data.copy()])
                genTree.add_node(node, currentNode)
                currentNode = node
            else:
                prev = genTree.parent(currentNode.identifier)
                while len(genTree.children(prev.identifier)) == len(
                        pool(prev.data[1], *(prev.data[0]))):
                    currentNode = prev
                    prev = genTree.parent(currentNode.identifier)
                else:
                    currentNode = prev
                    self.data = currentNode.data[1].copy()
                    i = currentNode.tag
                continue

        h = np.random.choice(len(order), size=holes, replace=False)
        self._answer = self.data.copy()
        self.holes = np.array(order)[h]
        self.data[tuple(self.holes.T.tolist())] = 0
Example #11
0
def get_intersection_tree(T1, T2):
    T = Tree(tree=T1, deep=True)
    T1_bfs = [n for n in T1.expand_tree(mode=1)]
    T2_bfs = [n for n in T2.expand_tree(mode=1)]
    for nid in T1_bfs:
        X = set(get_leaf_node_ids_for_node(T, nid))
        diff = min([len(X.symmetric_difference(set( \
                    get_leaf_node_ids_for_node(T2,i)))) \
                    for i in T2_bfs])
        if diff != 0:
            par = T.parent(nid).identifier
            for c in T.children(nid):
                T.move_node(c.identifier, par)
            T.remove_subtree(nid)
    return T
Example #12
0
class StateMachine(object):
    """A class to track information about a state machine"""

    def __init__(self, name):
        self.name = name
        self.events = {}
        self.effects = {}
        self.state_tree = Tree()
        self.current_state = None

        # Add the Root state automatically
        self.add_state('Root')

    def add_state(self, name):
        assert isinstance(name, str)
        state_node = Node(identifier=name, data=State(name))

        if self.current_state is None:
            self.state_tree.add_node(state_node)
            self.current_state = state_node.data
        else:
            self.state_tree.add_node(state_node, self.current_state.name)

    def add_event(self, ev):
        assert isinstance(ev, Event)
        self.events[ev.name] = ev

    def add_effect(self, eff):
        assert isinstance(eff, Effect)
        self.effects[eff.name] = eff

    def enter_state(self, state):
        self.current_state = state

    def exit_state(self, state):
        self.current_state = self.state_tree.parent(state.name).data

    def get_state_by_name(self, state_name):
        return self.state_tree.get_node(state_name).data
Example #13
0
  def crossOver(individualA, individualB):
    tree = None

    while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH:
      treeA = Tree(tree = individualA.tree, deep=True)
      treeB = Tree(tree = individualB.tree, deep=True)
      regenerate_ids(treeA)
      regenerate_ids(treeB)
      removedNode = random.choice(treeA.all_nodes())
      addedNode = random.choice(treeB.all_nodes())

      addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True)

      if treeA.root == removedNode.identifier:
        tree = addedSubtree

      else:
        parent = treeA.parent(removedNode.identifier)
        treeA.remove_subtree(removedNode.identifier)
        treeA.paste(parent.identifier, addedSubtree)
        tree = treeA

    return Individual(tree)
Example #14
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root', data=FileInfo(type=False))
        self.depth = 3

    def update_path_list(self, file_id='root', depth=None, is_fid=True, **kwargs):
        if depth is None:
            depth = self.depth
        kwargs.setdefault('max_depth', depth)
        max_depth = kwargs['max_depth']
        kwargs.setdefault('get_file_list_bar', GetFileListBar(max_depth))
        kwargs.setdefault('ratio', 0)
        get_file_list_bar = kwargs['get_file_list_bar']
        ratio = kwargs['ratio']
        get_file_list_bar.update(refresh_line=False)
        if not is_fid:
            file_id = self.get_path_fid(file_id, update=False)
        file_list = self._disk.get_file_list(file_id)
        if not file_list:
            if depth == max_depth:
                get_file_list_bar.refresh_line()
            return False
        old_file_list = self._tree.children(file_id)
        for i in old_file_list:
            if i.identifier not in [j['file_id'] for j in file_list]:
                self._tree.remove_node(i.identifier)
        for i, info in enumerate(file_list):
            if depth == max_depth:
                ratio = (i + 1) / len(file_list) if file_list else None
            get_file_list_bar.update(depth=max_depth - depth, ratio=ratio, refresh_line=True)
            file_info = self.get_file_info(info)[0]
            if self._tree.get_node(file_info.id):
                self._tree.update_node(file_info.id, data=file_info)
            else:
                self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id)
            if not file_info.type and depth:
                self.update_path_list(file_id=file_info.id, depth=depth - 1, max_depth=max_depth,
                                      get_file_list_bar=get_file_list_bar, ratio=ratio)
        if depth == max_depth:
            get_file_list_bar.refresh_line()
        return True

    def check_path_diff(self, local_path, disk_path_list):
        p = Path(local_path)
        change_file_list = []
        for path in p.iterdir():
            flag = False
            for i, path_ in enumerate(disk_path_list, 1):
                name, file_info = list(path_.items())[0]
                if p / name not in p.iterdir():
                    change_file_list.append(p / name)
                if Path(path) == p / name:
                    if Path(path).is_dir() and file_info['data'] and path.is_dir() != file_info['data'].type:
                        if 'children' in file_info:
                            children = file_info['children']
                            change_file_list.extend(self.check_path_diff(p / name, children))
                        elif list(path.iterdir()):
                            change_file_list.extend(list(path.iterdir()))
                    if file_info and file_info['data'] and path.is_file() == file_info['data'].type:
                        if path.is_file() and get_sha1(path).lower() != file_info['data'].content_hash.lower():
                            if i == len(disk_path_list):
                                change_file_list.append(path)
                            continue
                        else:
                            flag = True
                if not flag and i == len(disk_path_list):
                    change_file_list.append(path)
        if not len(list(p.iterdir())):
            for path_ in disk_path_list:
                name, file_info = list(path_.items())[0]
                change_file_list.append(p / name)
        if not len(disk_path_list):
            for path_ in p.iterdir():
                change_file_list.append(path_)
        return list(set(change_file_list))

    @staticmethod
    def get_file_info(info):
        file_info_list = []
        if not isinstance(info, list):
            info_list = [info]
        else:
            info_list = info
        for info in info_list:
            if info['type'] == 'file':
                file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=True,
                                     ctime=time.strptime(info['created_at'],
                                                         '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.localtime(),
                                     update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=info.get('hidden'), category=info['category'],
                                     content_type=info.get('content_type'),
                                     size=info['size'], content_hash_name=info.get('content_hash_name'),
                                     content_hash=info.get('content_hash'),
                                     download_url=info['download_url'] if 'download_url' in info else '',
                                     video_media_metadata=info[
                                         'video_media_metadata'] if 'video_media_metadata' in info else None,
                                     video_preview_metadata=info[
                                         'video_preview_metadata'] if 'video_preview_metadata' in info else None)
            else:
                file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=False,
                                     ctime=time.strptime(info['created_at'],
                                                         '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.time(),
                                     update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=info.get('hidden'))
            file_info_list.append(file_info)
        return file_info_list

    def tree(self, path='root', stdout=sys.stdout):
        file_id = self.get_path_fid(path, update=False)
        self.update_path_list(file_id)
        if not file_id:
            raise FileNotFoundError(path)
        return self._tree.show(file_id, stdout=stdout)

    def get_path_list(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        try:
            return self.get_fid_list(file_id, update=update)
        except FileNotFoundError:
            raise FileNotFoundError(path)

    def get_fid_list(self, file_id, update=True):
        if not file_id:
            raise FileNotFoundError
        try:
            self.auto_update_path_list(update, file_id)
        except NodeIDAbsentError:
            return list(map(self.get_file_info, self._disk.get_file_list(file_id)))
        if not self._tree.get_node(file_id):
            return []
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root', update=True):
        if str(path) in ('', '/', '\\', '.', 'root'):
            return 'root'
        path = AliyunpanPath(path)
        flag = False
        path_list = list(filter(None, path.split()))
        if path_list[0] == 'root':
            path_list = path_list[1:]
        for i in path_list:
            flag = False
            node_list = self._tree.children(file_id)
            if not node_list:
                self.auto_update_path_list(update, file_id)
                node_list = self._tree.children(file_id)
            for j in node_list:
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
            if not flag:
                return False
        if flag:
            return file_id
        return False

    def get_path_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            return self._tree.get_node(file_id)
        return False

    def get_path_parent_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            node = self._tree.parent(file_id)
            if node:
                return node
        return False

    def auto_update_path_list(self, update=True, file_id=None):
        if not update and file_id:
            return self.update_path_list(file_id, depth=0)
        elif update and len(self._tree) == 1:
            return self.update_path_list()
Example #15
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #16
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root')
        self.depth = 3

    def update_path_list(self, file_id='root', depth=None, is_fid=True):
        if depth is None:
            depth = self.depth
        if not is_fid:
            file_id = self.get_path_fid(file_id, auto_update=False)
        file_list = self._disk.get_file_list(file_id)
        if 'items' not in file_list:
            return False
        for i in file_list['items']:
            if i['type'] == 'file':
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'],
                                     category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'],
                                     content_hash=i['content_hash'], download_url=i['download_url'])
            else:
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'])
            if self._tree.get_node(file_info.id):
                self._tree.update_node(file_id, data=file_info)
            else:
                self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id)
            if not file_info.type and depth:
                self.update_path_list(file_id=file_info.id, depth=depth - 1)
        return True

    def tree(self, path='root', auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if not file_id:
            raise Exception('No such file or directory')
        self._tree.show(file_id)

    def get_path_list(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        return self.get_fid_list(file_id, auto_update=auto_update)

    def get_fid_list(self, file_id, auto_update=True):
        self.auto_update_path_list(auto_update)
        if not file_id:
            raise Exception('No such file or directory')
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root', auto_update=True):
        self.auto_update_path_list(auto_update)
        path = Path(path)
        if str(path) in ('', '/', '\\', '.', 'root'):
            return 'root'
        flag = False
        for i in filter(None, path.as_posix().split('/')):
            flag = False
            for j in self._tree.children(file_id):
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
        if flag:
            return file_id
        return False

    def get_path_node(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if file_id:
            return self._tree.get_node(file_id)
        return False

    def get_path_parent_node(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if file_id:
            node = self._tree.parent(file_id)
            if node:
                return node
        return False

    def auto_update_path_list(self, auto_update=True):
        if auto_update and len(self._tree) == 1:
            return self.update_path_list()
Example #17
0
                                  "depth": depth})
        previous = tree.get_node(str(ncbi_taxonomy_id))
    elif depth > tree.depth(previous):
        tree.create_node(tag = clade_name,
                         identifier = ncbi_taxonomy_id,
                         data = { "num_reads": number_reads_taxon,
                                  "rank_code": rank_code,
                                  "depth": depth},
                         parent = previous)
        previous = tree.get_node(str(ncbi_taxonomy_id))
    elif depth == tree.depth(previous):
        tree.create_node(tag = clade_name,
                         identifier = ncbi_taxonomy_id,
                         data = { "num_reads": number_reads_taxon,
                                  "rank_code": rank_code,
                                  "depth": depth},
                         parent = tree.parent(previous.identifier))
        previous = tree.get_node(str(ncbi_taxonomy_id))
    elif depth < tree.depth(previous):
        previous_search = previous
        while(tree.depth(previous_search) > depth):
            previous_search = tree.parent(previous_search.identifier)
        tree.create_node(tag = clade_name,
                         identifier = ncbi_taxonomy_id,
                         data = { "num_reads": number_reads_taxon,
                                  "rank_code": rank_code,
                                  "depth": depth},
                         parent = tree.parent(previous_search.identifier))
        previous = tree.get_node(str(ncbi_taxonomy_id))

Example #18
0
def build_tree(arg):
    # read parameters
    start = time.time()
    dist_matrix_file = arg[0]
    cls_file = arg[1]
    tree_dir = arg[2]
    ksize = arg[3]
    params = arg[4]
    alpha_ratio = params[0]
    minsize = params[1]
    maxsize = params[2]
    max_cls_size = params[3]

    # save genomes info
    fna_seq = bidict.bidict()  # : 1
    fna_path = {}

    # read dist matrix (represented by similarity: 1-dist)
    # output: dist, fna_path, fna_seq
    f = open(dist_matrix_file, "r")
    lines = f.readlines()
    f.close()
    index = 0
    d = lines[0].rstrip().split("\t")[1:]
    bac_label = 0
    for i in lines[0].rstrip().split("\t")[1:]:
        temp = i[i.rfind('/') + 1:].split(".")[0]
        fna_seq[temp] = index
        fna_path[index] = i
        index += 1
    dist = []
    for line in lines[1:]:
        dist.append(
            [np.array(list(map(float,
                               line.rstrip().split("\t")[1:])))])
    dist = np.concatenate(dist)

    # read initial clustering results. fna_mapping, from 1 for indexing
    f = open(cls_file, 'r')
    lines = f.readlines()
    f.close()
    fna_mapping = defaultdict(set)
    for line in lines:
        temp = line.rstrip().split("\t")
        for i in temp[2].split(","):
            fna_mapping[int(temp[0])].add(fna_seq[i])
    if (len(lines) == 1):
        tree = Tree()
        kmer_sta = defaultdict(int)
        T0 = Node(identifier=list(fna_mapping.keys())[0])
        tree.add_node(T0)
        kmer_sta = defaultdict(int)
        kmer_index_dict = bidict.bidict()
        kmer_index = 1
        alpha_ratio = 1
        Lv = set()
        for i in fna_mapping[T0.identifier]:
            for seq_record in SeqIO.parse(fna_path[i], "fasta"):
                temp = str(seq_record.seq)
                for k in range(0, len(temp) - ksize):
                    forward = temp[k:k + ksize]
                    reverse = seqpy.revcomp(forward)
                    for kmer in [forward, reverse]:
                        try:
                            kmer_sta[kmer_index_dict[kmer]] += 1
                        except KeyError:
                            kmer_index_dict[kmer] = kmer_index
                            kmer_sta[kmer_index] += 1
                            kmer_index += 1
        alpha = len(fna_mapping[T0.identifier]) * alpha_ratio
        for x in kmer_sta:
            if (kmer_sta[x] >= alpha):
                Lv.add(x)
        print(T0.identifier, len(Lv))
        # save2file
        kmerlist = set()
        pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb'))
        f = open(tree_dir + "/tree_structure.txt", "w")
        os.system("mkdir " + tree_dir + "/kmers")
        os.system("mkdir " + tree_dir + "/overlapping_info")
        f.write("%d\t" % T0.identifier)
        f.close()
        os.system(f'cp {cls_file} {tree_dir}/')
        f = open(tree_dir + "/reconstructed_nodes.txt", "w")
        f.close()
        if (len(Lv) > maxsize):
            Lv = set(random.sample(Lv, maxsize))
        kmerlist = Lv
        length = len(Lv)
        f = open(tree_dir + "/kmers/" + str(T0.identifier), "w")
        for j in Lv:
            f.write("%d " % j)
        f.close()
        f = open(tree_dir + "/node_length.txt", "w")
        f.write("%d\t%d\n" % (T0.identifier, length))
        kmer_mapping = {}
        index = 0
        f = open(tree_dir + "/kmer.fa", "w")
        for i in kmerlist:
            f.write(">1\n")
            f.write(kmer_index_dict.inv[i])
            kmer_mapping[i] = index
            index += 1
            f.write("\n")
        f.close()

        # change index
        files = os.listdir(tree_dir + "/kmers")
        for i in files:
            f = open(tree_dir + "/kmers/" + i, "r")
            lines = f.readlines()
            if (len(lines) == 0):
                continue
            d = lines[0].rstrip().split(" ")
            d = map(int, d)
            f = open(tree_dir + "/kmers/" + i, "w")
            for j in d:
                f.write("%d " % kmer_mapping[j])
            f.close()
        end = time.time()
        print(
            '- The total running time of tree-based indexing struture building is ',
            str(end - start), ' s\n')
        return
    # initially build tree
    cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
        fna_mapping, dist)

    # initially extract k-mers
    kmer_index_dict = bidict.bidict()
    kmer_index = 1
    Lv = defaultdict(set)
    spec = defaultdict(set)  # k-mers <= alpha
    leaves = tree.leaves()
    for i in leaves:
        kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize,
                                   kmer_index_dict, kmer_index, Lv, spec,
                                   tree_dir, alpha_ratio, i.identifier)
    end = time.time()
    print('- The total running time of k-mer extraction is ', str(end - start),
          ' s\n')
    start = time.time()

    # leaf nodes check
    recls_label = 0

    leaves_check = []
    check_waitlist = reversed(leaves)
    while (True):
        if (recls_label):
            cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
                fna_mapping, dist)
            leaves = tree.leaves()
            temp = {}
            temp2 = []
            for i in check_waitlist:
                if (i in fna_mapping):
                    temp2.append(i)
            check_waitlist = temp2.copy()
            for i in check_waitlist:
                temp[tree.get_node(i)] = depths[tree.get_node(i)]
            check_waitlist = []
            a = sorted(temp.items(), key=lambda x: x[1], reverse=True)
            for i in a:
                check_waitlist.append(i[0])
            for i in fna_mapping:
                if (i not in Lv):
                    kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize,
                                               kmer_index_dict, kmer_index, Lv,
                                               spec, tree_dir, alpha_ratio, i)
        higher_union = defaultdict(set)
        for i in check_waitlist:
            diff, diff_nodes = get_leaf_union(depths[i], higher_union,
                                              depths_mapping, Lv, spec, i)
            kmer_t = Lv[i.identifier] - diff
            for j in diff_nodes:
                kmer_t = kmer_t - Lv[j.identifier]
            for j in diff_nodes:
                kmer_t = kmer_t - spec[j.identifier]
            print(str(i.identifier) + " checking", end="\t")
            print(len(kmer_t))
            if (len(kmer_t) < minsize):
                leaves_check.append(i)
        if (len(leaves_check) > 0):
            recls_label = 1
        else:
            break
        # re-clustering
        check_waitlist = []
        while (recls_label == 1):
            cluster_id = max(list(fna_mapping.keys())) + 1
            check_waitlist.append(cluster_id)
            leaf_a = leaves_check[0].identifier
            row_index = mapping[leaf_a]
            column_index = cls_dist[row_index].argmax()
            leaf_b = mapping.inv[column_index]  # (leaf_a, leaf_b)
            temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b]
            print(cluster_id, leaf_a, leaf_b, temp2)
            del fna_mapping[leaf_a], fna_mapping[leaf_b]
            if (leaf_a in Lv):
                del Lv[leaf_a], spec[leaf_a]
            if (leaf_b in Lv):
                del Lv[leaf_b], spec[leaf_b]
            del leaves_check[0]
            if (tree.get_node(leaf_b) in leaves_check):
                leaves_check.remove(tree.get_node(leaf_b))
            temp1 = [
                np.concatenate([[cls_dist[row_index]],
                                [cls_dist[column_index]]]).max(axis=0)
            ]
            cls_dist = np.concatenate([cls_dist, temp1], axis=0)
            temp1 = np.append(temp1, -1)
            temp1 = np.vstack(temp1)
            cls_dist = np.concatenate([cls_dist, temp1], axis=1)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1)
            # change mapping
            del mapping[leaf_a], mapping[leaf_b]
            pending = list(fna_mapping.keys())
            pending.sort()
            for i in pending:
                if (mapping[i] > min([row_index, column_index])
                        and mapping[i] < max([row_index, column_index])):
                    mapping[i] -= 1
                elif (mapping[i] > max([row_index, column_index])):
                    mapping[i] -= 2
            fna_mapping[cluster_id] = temp2
            mapping[cluster_id] = len(cls_dist) - 1
            if (len(leaves_check) == 0):
                break
    del higher_union

    # rebuild identifiers
    all_nodes = tree.all_nodes()
    all_leaves_id = set([])
    leaves = set(tree.leaves())
    for i in leaves:
        all_leaves_id.add(i.identifier)
    id_mapping = bidict.bidict()
    index = 1
    index_internal = len(leaves) + 1
    for i in all_nodes:
        if (recls_label == 0):
            id_mapping[i.identifier] = i.identifier
        elif (i in leaves):
            id_mapping[i.identifier] = index
            index += 1
        else:
            id_mapping[i.identifier] = index_internal
            index_internal += 1
    leaves_identifier = list(range(1, len(leaves) + 1))
    all_identifier = list(id_mapping.values())
    all_identifier.sort()

    # save2file
    f = open(tree_dir + "/tree_structure.txt", "w")
    os.system("mkdir " + tree_dir + "/kmers")
    os.system("mkdir " + tree_dir + "/overlapping_info")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t" % id_mapping[i])
        if (i == all_nodes[0].identifier):
            f.write("N\t")
        else:
            f.write("%d\t" % id_mapping[tree.parent(i).identifier])
        if (nn in leaves_identifier):
            f.write("N\t")
        else:
            [child_a, child_b] = tree.children(i)
            f.write("%d %d\t" % (id_mapping[child_a.identifier],
                                 id_mapping[child_b.identifier]))
        if (len(fna_mapping[i]) == 1):
            temp = list(fna_mapping[i])[0]
            temp = fna_seq.inv[temp]
            f.write("%s" % temp)
        f.write("\n")
    f.close()
    f = open(tree_dir + "/hclsMap_95_recls.txt", "w")
    for nn in leaves_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\t" % (nn, len(fna_mapping[i])))
        temp1 = list(fna_mapping[i])
        for j in temp1:
            temp = fna_seq.inv[j]
            if (j == temp1[-1]):
                f.write("%s\n" % temp)
            else:
                f.write("%s," % temp)
    f.close()
    end = time.time()
    print('- The total running time of re-clustering is ', str(end - start),
          ' s\n')
    start = time.time()

    # build indexing structure
    kmerlist = set([])  # all kmers used
    length = {}
    overload_label = 0
    if (len(tree.leaves()) > max_cls_size):
        overload_label = 1
    # from bottom to top (unique k-mers)
    uniq_temp = defaultdict(set)
    rebuilt_nodes = []
    descendant = defaultdict(set)  # including itself
    ancestor = defaultdict(set)
    descendant_leaves = defaultdict(set)
    ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier)
    for i in all_nodes[1:]:
        ancestor[i.identifier] = ancestor[tree.parent(
            i.identifier).identifier].copy()
        ancestor[i.identifier].add(i.identifier)
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer removing...")
        if (i in leaves):
            uniq_temp[i.identifier] = Lv[i.identifier]
            descendant_leaves[i.identifier].add(i.identifier)
        else:
            (child_a, child_b) = tree.children(i.identifier)
            descendant[i.identifier] = descendant[
                child_a.identifier] | descendant[child_b.identifier]
            descendant_leaves[i.identifier] = descendant_leaves[
                child_a.identifier] | descendant_leaves[child_b.identifier]
            uniq_temp[i.identifier] = uniq_temp[
                child_a.identifier] & uniq_temp[child_b.identifier]
            uniq_temp[child_a.identifier] = uniq_temp[
                child_a.identifier] - uniq_temp[i.identifier]
            uniq_temp[child_b.identifier] = uniq_temp[
                child_b.identifier] - uniq_temp[i.identifier]
        descendant[i.identifier].add(i.identifier)
    all_nodes_id = set(id_mapping.keys())
    # remove overlapping
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer set building...")
        # no difference with sibling, subtree and ancestors
        if (i == all_nodes[0]):
            kmer_t = uniq_temp[i.identifier]
        else:
            diff = {}
            temp = all_nodes_id - descendant[i.identifier] - set([
                tree.siblings(i.identifier)[0].identifier
            ]) - ancestor[i.identifier]
            for j in temp:
                diff[j] = len(uniq_temp[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            kmer_t = uniq_temp[i.identifier]
            for j in a:
                k = j[0]
                kmer_t = kmer_t - uniq_temp[k]
            # remove special k-mers
            temp = all_leaves_id - descendant_leaves[i.identifier]
            diff = {}
            for j in temp:
                diff[j] = len(spec[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            for j in a:
                k = j[0]
                kmer_t = kmer_t - spec[k]
        if (len(kmer_t) < minsize and overload_label == 0):
            rebuilt_nodes.append(i)
            print("%d waiting for reconstruction..." %
                  id_mapping[i.identifier])
        else:
            if (len(kmer_t) > maxsize):
                kmer_t = set(random.sample(kmer_t, maxsize))
            f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
            for j in kmer_t:
                f.write("%d " % j)
            f.close()
            length[i] = len(kmer_t)
            kmerlist = kmerlist | kmer_t
    del uniq_temp

    # rebuild nodes
    overlapping = defaultdict(dict)
    intersection = defaultdict(set)
    higher_union = defaultdict(set)
    del_label = {}
    for i in leaves:
        del_label[i.identifier] = [0, 0]
    for i in rebuilt_nodes:
        print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...")
        kmer_t = get_intersect(intersection, descendant_leaves[i.identifier],
                               Lv, del_label, i.identifier)
        diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i,
                        Lv, spec, del_label)
        for j in diff:
            kmer_t = kmer_t - j
        lower_leaves = set([])
        for j in leaves:
            if (depths[j] < depths[i]):
                lower_leaves.add(j)
        if (len(kmer_t) > maxsize):
            kmer_overlapping_sta = defaultdict(int)
            for j in lower_leaves:
                kmer_o = Lv[j.identifier] & kmer_t
                for k in kmer_o:
                    kmer_overlapping_sta[k] += 1
            temp = sorted(kmer_overlapping_sta.items(),
                          key=lambda kv: (kv[1], kv[0]))
            kmer_t = set([])
            for j in range(0, maxsize):
                kmer_t.add(temp[j][0])
        nkmer = {}
        f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
        index = 0
        for j in kmer_t:
            f.write("%d " % j)
            nkmer[j] = index
            index += 1
        length[i] = len(kmer_t)
        kmerlist = kmerlist | kmer_t
        # save overlapping info
        for j in lower_leaves:
            temp = Lv[j.identifier] & kmer_t
            if (len(temp) > 0):
                ii = id_mapping[i.identifier]
                jj = id_mapping[j.identifier]
                overlapping[jj][ii] = set([])
                for k in temp:
                    overlapping[jj][ii].add(nkmer[k])
        delete(Lv, spec, del_label)

    for i in overlapping:
        f = open(tree_dir + "/overlapping_info/" + str(i), "w")
        f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w")
        count = -1
        for j in overlapping[i]:
            if (len(overlapping[i]) != 0):
                f.write("%d\n" % j)
                for k in overlapping[i][j]:
                    f.write("%d " % k)
                f.write("\n")
                count += 2
                f1.write("%d %d\n" % (j, count))
        f.close()
        f1.close()

    # final saving
    f = open(tree_dir + "/reconstructed_nodes.txt", "w")
    for i in rebuilt_nodes:
        f.write("%d\n" % id_mapping[i.identifier])
    f.close()

    f = open(tree_dir + "/node_length.txt", "w")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\n" % (nn, length[tree[i]]))
    f.close()

    kmer_mapping = {}
    index = 0
    f = open(tree_dir + "/kmer.fa", "w")
    for i in kmerlist:
        f.write(">1\n")
        f.write(kmer_index_dict.inv[i])
        kmer_mapping[i] = index
        index += 1
        f.write("\n")
    f.close()

    # change index
    files = os.listdir(tree_dir + "/kmers")
    for i in files:
        f = open(tree_dir + "/kmers/" + i, "r")
        lines = f.readlines()
        if (len(lines) == 0):
            continue
        d = lines[0].rstrip().split(" ")
        d = map(int, d)
        f = open(tree_dir + "/kmers/" + i, "w")
        for j in d:
            f.write("%d " % kmer_mapping[j])
        f.close()

    end = time.time()
    print(
        '- The total running time of tree-based indexing struture building is ',
        str(end - start), ' s\n')
Example #19
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root')
        self.depth = 3

    def update_path_list(self, file_id='root', depth=None, is_fid=True):
        if depth is None:
            depth = self.depth
        if not is_fid:
            file_id = self.get_path_fid(file_id, update=False)
        file_list = self._disk.get_file_list(file_id)
        if not file_list:
            return False
        for i in file_list:
            if i['type'] == 'file':
                file_info = FileInfo(
                    name=i['name'],
                    id=i['file_id'],
                    pid=i['parent_file_id'],
                    type=True,
                    ctime=time.strptime(i['created_at'],
                                        '%Y-%m-%dT%H:%M:%S.%fZ'),
                    update_time=time.strptime(i['updated_at'],
                                              '%Y-%m-%dT%H:%M:%S.%fZ'),
                    hidden=i['hidden'],
                    category=i['category'],
                    content_type=i['content_type'],
                    size=i['size'],
                    content_hash_name=i['content_hash_name'],
                    content_hash=i['content_hash'],
                    download_url=i['download_url']
                    if 'download_url' in i else '')
            else:
                file_info = FileInfo(
                    name=i['name'],
                    id=i['file_id'],
                    pid=i['parent_file_id'],
                    type=False,
                    ctime=time.strptime(i['created_at'],
                                        '%Y-%m-%dT%H:%M:%S.%fZ'),
                    update_time=time.strptime(i['updated_at'],
                                              '%Y-%m-%dT%H:%M:%S.%fZ'),
                    hidden=i['hidden'])
            if self._tree.get_node(file_info.id):
                self._tree.update_node(file_id, data=file_info)
            else:
                self._tree.create_node(tag=file_info.name,
                                       identifier=file_info.id,
                                       data=file_info,
                                       parent=file_id)
            if not file_info.type and depth:
                self.update_path_list(file_id=file_info.id, depth=depth - 1)
        return True

    def tree(self, path='root'):
        file_id = self.get_path_fid(path, update=False)
        self.update_path_list(file_id)
        if not file_id:
            raise FileNotFoundError(path)
        self._tree.show(file_id)

    def get_path_list(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        return self.get_fid_list(file_id, update=update)

    def get_fid_list(self, file_id, update=True):
        if not file_id:
            raise FileNotFoundError(Path)
        self.auto_update_path_list(update, file_id)
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root', update=True):
        path = PurePosixPath(Path(path).as_posix())
        if str(path) in ('', '/', '\\', '.', 'root'):
            return 'root'
        flag = False
        path_list = list(filter(None, str(path).split('/')))
        if path_list[0] == 'root':
            path_list = path_list[1:]
        for i in path_list:
            flag = False
            node_list = self._tree.children(file_id)
            if not node_list:
                self.auto_update_path_list(update, file_id)
                node_list = self._tree.children(file_id)
            for j in node_list:
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
            if not flag:
                return False
        if flag:
            return file_id
        return False

    def get_path_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            return self._tree.get_node(file_id)
        return False

    def get_path_parent_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            node = self._tree.parent(file_id)
            if node:
                return node
        return False

    def auto_update_path_list(self, update=True, file_id=None):
        if not update and file_id:
            return self.update_path_list(file_id, depth=0)
        elif update and len(self._tree) == 1:
            return self.update_path_list()
Example #20
0
class LuaDec:
    def __init__(self, fileName, format = "luadec"):
        self.format = format
        self.ptr = 0
        self.pc = 0
        self.tree = Tree()
        self.readFile(fileName)
        self.readHeader()
        self.readFunction()
        #self.tree.show()

    def readFile(self, fileName):
        f = open(fileName, "rb")
        self.fileBuf = f.read()
        f.close()

    def readUInt32(self):
        result = struct.unpack("<I", self.fileBuf[self.ptr:self.ptr + 4])[0]
        self.ptr += 4
        return result

    def readUInt64(self):
        result = struct.unpack("<Q", self.fileBuf[self.ptr:self.ptr + 8])[0]
        self.ptr += 8
        return result

    def formatValue(self, val):
        if type(val) == str:
            return "\"{}\"".format(val)
        elif type(val) == bool:
            if val:
                return "true"
            else:
                return "false"
        elif val is None:
            return "nil"
        elif type(val) == float and int(val) == val:
            return int(val)
        else:
            return val

    def processUpvalue(self, i, funcName):
        if i[0] == 1:
            if funcName == "root":
                return "G"
            return "UR{}".format(i[1])
        elif i[0] == 0:
            pNode = self.tree.parent(funcName)
            result = self.processUpvalue(pNode.data['upvalues'][i[1]], pNode.identifier)
            if result[-1] != "G":
                return "U" + result
            else:
                return result
        else:
            raise Exception("Unexpected upvalue {}".format(i[0]))

    def readHeader(self):
        magic = self.fileBuf[:4]
        if magic != b"\x1bLua":
            raise Exception("Unknown magic: {0}".format(magic.hex()))

        version = self.fileBuf[4]
        if version != 82:
            raise Exception("This program support ONLY Lua 5.2")

        lua_tail = self.fileBuf[12:18]
        if lua_tail != b"\x19\x93\r\n\x1a\n":
            raise Exception("Unexcepted lua_tail value: {0}".format(lua_tail.hex()))
        self.ptr = 18

    def readFunction(self, parent=None):
        #处理tree
        if parent:
            funcName = "function"
            funcSuffix = []
            #强烈谴责py不支持do...while
            #别问我这堆东西怎么工作的,it just works!!
            pNode = self.tree.get_node(parent).identifier
            funcSuffix.append("_{0}".format(len(self.tree.children(pNode))))
            while self.tree.parent(pNode):
                pNode = self.tree.parent(pNode).identifier
                funcSuffix.append("_{0}".format(len(self.tree.children(pNode)) - 1))
            
            funcSuffix.reverse()
            for i in funcSuffix:
                funcName += i
        else:
            funcName = "root"
        #self.tree.show()

        #ProtoHeader
        protoheader = struct.unpack("<IIccc", self.fileBuf[self.ptr:self.ptr + 11])
        self.ptr += 11
        lineDefined     = protoheader[0]
        lastLineDefined = protoheader[1]
        numParams       = ord(protoheader[2])
        is_vararg       = ord(protoheader[3])
        maxStackSize    = ord(protoheader[4])
        
        #Code
        sizeCode = self.readUInt32()
        instructions = []
        #print("Code total size: {0}".format(sizeCode))
        for i in range(sizeCode):
            ins = self.readUInt32()
            instructions.append(ins)
            #self.processInstruction(ins)
            #print("Instruction: {0}".format(hex(ins)))

        #Constants
        sizeConstants = self.readUInt32()
        constants = []
        #print("Constants total size: {0}".format(sizeConstants))
        for i in range(sizeConstants):
            const_type = self.fileBuf[self.ptr]
            self.ptr += 1
            if const_type == const.LUA_DATATYPE['LUA_TNIL']:
                const_val = None
                const_type = "nil"
            elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']:
                #lua的number=double(8 bytes)
                const_val = struct.unpack("<d", self.fileBuf[self.ptr:self.ptr + 8])[0]
                self.ptr += 8
                const_type = "number"
            elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']:
                const_val = bool(self.fileBuf[self.ptr])
                self.ptr += 1
                const_type = "bool"
            elif const_type == const.LUA_DATATYPE['LUA_TSTRING']:
                str_len = self.readUInt32()
                buf = self.fileBuf[self.ptr:self.ptr + str_len - 1]
                try:
                    const_val = str(buf, encoding="utf8")
                except UnicodeDecodeError:
                    const_val = ""
                    for i in buf:
                        const_val += "\\{}".format(i)
                self.ptr += str_len
                const_type = "string"
                if self.fileBuf[self.ptr - 1] != 0:
                    raise Exception("Bad string")
            else:
                raise Exception("Undefined constant type {0}.".format(hex(const_type)))
            constants.append([const_val, const_type])
            #print("Constant: {0}".format(const_val))

        #Skip Protos
        ptrBackupStart = self.ptr #备份protos的位置,先处理后面的upvalue等东西
        sizeProtos = self.readUInt32()
        for i in range(sizeProtos):
            self.skipFunction()

        #Upvalue
        sizeUpvalue = self.readUInt32()
        upvalues = []
        #print("Upvalue total size: {0}".format(sizeUpvalue))
        for i in range(sizeUpvalue):
            instack = self.fileBuf[self.ptr]
            idx     = self.fileBuf[self.ptr + 1]
            self.ptr += 2
            upvalues.append([instack, idx])
            #print("Upvalue: {0} {1}".format(instack, idx))

        #srcName
        sizeSrcName = self.readUInt32()
        #print("srcName size: {0}".format(sizeSrcName))
        if sizeSrcName > 0:
            srcName = str(self.fileBuf[self.ptr:self.ptr + sizeSrcName], encoding="utf8")
            self.ptr += sizeSrcName
            #print("srcName: " + srcName)

        #Lines
        sizeLines = self.readUInt32()
        self.ptr += sizeLines

        #LocVars
        sizeLocVars = self.readUInt32()
        #for i in sizeLocVars:
        #    varname_size = 
        #TODO: sizeLocVars不为0的情况(未strip)

        #UpvalNames
        sizeUpvalNames = self.readUInt32()

        #将内容写入tree
        data = {
            "instructions": instructions,
            "constants":    constants,
            "upvalues":     upvalues,
        }
        self.tree.create_node(funcName, funcName, parent=parent, data=data)
        
        if self.format == "luaasm":
            print("\n.fn(R{}{})".format(numParams, ", __va_args__" if is_vararg else ""))
        print("; {:<20s}{}".format("Function", funcName))
        print("; {:<20s}{}".format("Defined from line", lineDefined))
        print("; {:<20s}{}".format("Defined to line", lastLineDefined))
        print("; {:<20s}{}".format("#Upvalues", sizeUpvalue))
        print("; {:<20s}{}".format("#Parameters", numParams))
        print("; {:<20s}{}".format("Is_vararg", is_vararg))
        if self.format == "luaasm":
            print("; {:<20s}{}".format("Max Stack Size", maxStackSize))
        else:
            print("; {:<20s}{}\n".format("Max Stack Size", maxStackSize))
        
        #生成一个Upvalue和Constant的拼接表
        fmtVals = {}
        count = 0
        for i in data['constants']:
            fmtVals["K{}".format(count)] = self.formatValue(i[0])
            count += 1
        count = 0
        for i in data['upvalues']:
            fmtVals["U{}".format(count)] = self.processUpvalue(i, funcName)
            count += 1

        if self.format == "luadec":
            #处理单个指令
            self.pc = 0
            self.currFunc = funcName
            self.fmtVals = fmtVals
            for i in data['instructions']:
                self.processInstruction(i)
                self.pc += 1
        
        if self.format == "luadec":
            print("\n")

        if self.format == "luaasm":
            print("\n.instruction")
        #处理单个指令
        self.pc = 0
        self.currFunc = funcName
        self.fmtVals = fmtVals
        for i in data['instructions']:
            self.processInstruction(i)
            self.pc += 1

        if self.format == "luaasm":
            print("\n.const")
        else:
            print("\n; Constants")
        count = 0
        for i in data['constants']:
            print("K{:<5s} = {}".format(str(count), self.formatValue(i[0])))
            count += 1

        if self.format == "luaasm":
            print("\n.upvalue")
        else:
            print("\n; Upvalues")
        count = 0
        for i in data['upvalues']:
            if self.format == "luaasm":
                print("U{:<5s} = L{} R{}".format(str(count), i[0], i[1]))
            else:
                print("{:>5s}\t{}\t{}".format(str(count), i[0], i[1]))
            count += 1

        #Proto
        ptrBackupEnd = self.ptr
        self.ptr = ptrBackupStart
        sizeProtos = self.readUInt32()
        #print("Protos total size: {0}".format(sizeProtos))
        for i in range(sizeProtos):
            self.readFunction(parent=funcName)
        self.ptr = ptrBackupEnd

        if self.format == "luaasm":
            print(".endfn\n")


    #跳过函数,用于需要获取后面的指针位置的情况
    def skipFunction(self):
        #print("Start skipping Proto, current ptr at {0}".format(hex(self.ptr)))
        #ProtoHeader
        self.ptr += 11

        #Code
        sizeCode = self.readUInt32()
        for i in range(sizeCode):
            self.ptr += 4

        #Constants
        sizeConstants = self.readUInt32()
        for i in range(sizeConstants):
            const_type = self.fileBuf[self.ptr]
            self.ptr += 1
            if const_type == const.LUA_DATATYPE['LUA_TNIL']:
                pass
            elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']:
                self.ptr += 8
            elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']:
                self.ptr += 1
            elif const_type == const.LUA_DATATYPE['LUA_TSTRING']:
                str_len = self.readUInt32()
                self.ptr += str_len
            else:
                raise Exception("Undefined constant type {0}.".format(hex(const_type)))

        #Protos
        sizeProtos = self.readUInt32()
        for i in range(sizeProtos):
            self.skipFunction()

        #Upvalue
        sizeUpvalue = self.readUInt32()
        for i in range(sizeUpvalue):
            self.ptr += 2

        #srcName
        sizeSrcName = self.readUInt32()
        if sizeSrcName > 0:
            self.ptr += sizeSrcName

        #Lines
        sizeLines = self.readUInt32()
        self.ptr += sizeLines

        #LocVars
        sizeLocVars = self.readUInt32()
        #for i in sizeLocVars:
        #    varname_size = 
        #TODO: sizeLocVars不为0的情况(未strip)

        #UpvalNames
        sizeUpvalNames = self.readUInt32()
        #print("End skipping Proto. Current ptr at {0}".format(hex(self.ptr)))
    def getExtraArg(self):
        next_ins = self.tree.get_node(self.currFunc).data['instructions'][self.pc + 1]
        opCode = next_ins % (1 << 6)
        if const.opCode[opCode] == "OP_EXTRAARG":
            Ax = (next_ins >> 6)
            return True, Ax
        else:
            return False, "ERROR: C == 0 but no OP_EXTRAARG followed."
            
    def processInstruction(self, ins):
        opCode = ins % (1 << 6)
        opMode = const.opMode[opCode]
        A = 0
        B = 0
        C = 0

        if opMode[4] == "iABC":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 23)#% (1 << 9)
            C   = (ins >> 14) % (1 << 9)
        elif opMode[4] == "iABx":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 14)#% (1 << 18)
        elif opMode[4] == "iAsBx":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 14) - (1 << 17) + 1
        elif opMode[4] == "iAx":
            A   = (ins >> 6 )#% (1 << 26)
        else:
            raise Exception("Unknown opMode {0}".format(opMode[4]))

        #format A
        if opMode[1] == 1:
            parsedA = "R{0}".format(A)
        elif opMode[1] == 0:
            if const.opCode[opCode] == "OP_SETTABUP":
                parsedA = "U{0}".format(A)
            elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE"]:
                parsedA = A
            else:
                parsedA = "R{0}".format(A)
        else:
            raise Exception("Unknown A Mode {0}".format(opMode[1]))

        #format B
        if opMode[2] == 1:
            if const.opCode[opCode].find("UP") >= 0:
                parsedB = "U{0}".format(B)
            else:
                parsedB = "{0}".format(B)
        elif opMode[2] == 0:
            parsedB = ""
        elif opMode[2] == 2 or opMode[2] == 3:
            if opMode[4] == "iAsBx":
                #B为sBx的时候,只有可能是立即数而不是寄存器
                parsedB = "{0}".format(B)
            elif const.opCode[opCode] == "OP_LOADK":
                #LOADK一定是读Kx而不是Rx
                parsedB = "K{0}".format(B)
            elif B < 0x100:
                parsedB = "R{0}".format(B)
            else:
                parsedB = "K{0}".format(B - 0x100)
                B -= 0x100
        else:
            raise Exception("Unknown B Mode {0}".format(opMode[2]))

        #format C
        if opMode[3] == 1:
            if const.opCode[opCode].find("UP") >= 0:
                parsedC = "U{0}".format(C)
            else:
                parsedC = "{0}".format(C)
        elif opMode[3] == 0:
            parsedC = ""
        elif opMode[3] == 2 or opMode[3] == 3:
            if C < 0x100:
                parsedC = "R{0}".format(C)
            else:
                parsedC = "K{0}".format(C - 0x100)
                C -= 0x100
        else:
            raise Exception("Unknown C Mode {0}".format(opMode[3]))

        # parse comment
        #先用模板拼接
        if len(parsedB) > 0 and (parsedB[0] == 'K' or parsedB[0] == 'U'):
            parsedB_ = "{{{}}}".format(parsedB)
        else:
            parsedB_ = parsedB
        if len(parsedC) > 0 and (parsedC[0] == 'K' or parsedC[0] == 'U'):
            parsedC_ = "{{{}}}".format(parsedC)
        else:
            parsedC_ = parsedC
        comment = const.pseudoCode[opCode].format(A=A,B=B,C=C,PB=parsedB_,PC=parsedC_)

        #预处理
        #if BForceK:
        #    comment = comment.replace("R{}".format(B), "K{}".format(B))
        #if const.opCode[opCode] == "OP_SETTABLE" and CForceK:
        #    comment = comment.replace("R{}".format(C), "{{K{}}}".format(C))

        #再处理Upvalue和Constants
        comment = comment.format(**self.fmtVals)

        #对部分需要处理的命令进行处理
        if const.opCode[opCode] == "OP_LOADBOOL":
            #把0/1转换成false/true
            comment = comment[:-1]
            if B:
                comment += "true"
            else:
                comment += "false"
            #处理跳转
            if C:
                comment += "; goto {0}".format(self.pc + 2)
        elif const.opCode[opCode] == "OP_LOADNIL":
            comment = ""
            for i in range(B + 1):
                comment += "R{0}, ".format(A + i)
            comment = comment[:-2]
            comment += " := nil"
        elif const.opCode[opCode] == "OP_SELF":
            comment = "R{}".format(A+1) + comment[2:]
        elif const.opCode[opCode] == "OP_JMP":
            comment += " (goto {0})".format(self.pc + 1 + B)
        elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE", "OP_TEST", "OP_TESTSET"]:
            if A:
                if const.opCode[opCode] == "OP_EQ":
                    comment = comment.replace("==", "~=")
                elif const.opCode[opCode] == "OP_LT":
                    comment = comment.replace("<", ">=")
                elif const.opCode[opCode] == "OP_LE":
                    comment = comment.replace("<=", ">")
            comment += " goto {0} else goto {1}".format(self.pc + 2, self.pc + 1)
            if C == 0:
                comment = comment.replace("not ", "")
        elif const.opCode[opCode] == "OP_CALL":
            comment = ""
            for i in range(C - 1):
                comment += "R{}, ".format(A + i)
            if C > 1:
                comment = comment[:-2] + " := R{}(".format(A)
            elif C == 1:
                comment += " := R{}(".format(A)
            else:
                comment = "R{} to top := R{}(".format(A, A)
            
            for i in range(B - 1):
                comment += "R{}, ".format(A + i + 1)
            if B > 1:
                comment = comment[:-2] + ")"
            elif B == 1:
                comment += ")"
            else:
                comment += "R{} to top)".format(C)
        elif const.opCode[opCode] == "OP_TAILCALL":
            comment = "R{} to top := R{}(".format(A, A)
            for i in range(B - 1):
                comment += "R{}, ".format(A + i + 1)
            if B > 1:
                comment = comment[:-2] + ")"
            else:
                comment = comment + ")"
        elif const.opCode[opCode] == "OP_RETURN":
            for i in range(B - 1):
                comment += "R{}, ".format(A + i)
            if B > 1:
                comment = comment[:-2]
            elif B == 0:
                comment += "R{} to top".format(A)
        elif const.opCode[opCode] == "OP_FORLOOP":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment = comment.replace("RE", "R{}".format(A + 2))
            comment = comment.replace("RF", "R{}".format(A + 3))
            comment += "goto {} end".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_FORPREP":
            comment = comment.replace("RD", "R{}".format(A + 2))
            comment += "(goto {})".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_TFORCALL":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment = comment.replace("RE", "R{}".format(A + 2))
            comment = comment.replace("RF", "R{}".format(A + 3))
            comment = comment.replace("RG", "R{}".format(A + 4))
        elif const.opCode[opCode] == "OP_TFORLOOP":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment += " (goto {}))".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_CLOSURE":
            if self.currFunc == "root":
                comment += "function_{})".format(B)
            else:
                comment += self.currFunc + "_{})".format(B)
        elif const.opCode[opCode] == "OP_SETLIST":
            real_c = C
            err = False
            if C == 0:
                success, result = self.getExtraArg()
                if success:
                    real_c = result
                else:
                    comment += result
                    err = True
                
            if not err:
                LFIELDS_PER_FLUSH = 50
                start_index = (real_c - 1) * LFIELDS_PER_FLUSH
                if B == 0:
                    comment += "R{}[{}] to R{}[top] := R{} to top".format(A, start_index, A, A + 1)
                elif B == 1:
                    comment += "R{}[{}] := R{}".format(A, start_index, A + 1)
                else:
                    comment += "R{}[{}] to R{}[{}] := R{} to R{}".format(A, start_index, A, start_index + B - 1, A + 1, A + B)
                if C == 0:
                    comment += "; CONTAINS EXTRAARG"
        elif const.opCode[opCode] == "OP_LOADKX":
            success, result = self.getExtraArg()
            if success:
                Ax = result
                comment += "R{} := {{K{}}}".format(A, Ax).format(**self.fmtVals)
            else:
                comment += result

        seq = []
        for i in [parsedA, parsedB, parsedC]:
            if i != "":
                seq.append(str(i))
        regsFmt = " ".join(seq)
        if self.format == "luaasm":
            print("{:<10s}{:<13s} ; {:>5s} {}".format(const.opCode[opCode][3:], regsFmt, "[{}]".format(str(self.pc)), comment))
        else:
            print("{:>5s} [-]: {:<10s}{:<13s}; {}".format(str(self.pc), const.opCode[opCode][3:], regsFmt, comment))
    kraken_data['name'] = line[5]
    kraken_data['depth'] = calculate_depth(kraken_data['name'])
    kraken_data['name'] = kraken_data['name'].lstrip(' ')
    
    if kraken_data['name'] == "unclassified":
        unclassified = Node(tag = kraken_data['name'],
                            identifier = kraken_data['ncbi_taxonomy_id'],
                            data = kraken_data)
    elif kraken_data['name'] == "root":
        add_node(tree, None, kraken_data)
        previous = tree.get_node(kraken_data['ncbi_taxonomy_id'])
    elif kraken_data['depth'] > tree.depth(previous):
        add_node(tree, previous, kraken_data)
        previous = tree.get_node(kraken_data['ncbi_taxonomy_id'])
    elif kraken_data['depth'] == tree.depth(previous):
        add_node(tree, tree.parent(previous.identifier), kraken_data)
        previous = tree.get_node(kraken_data['ncbi_taxonomy_id'])
    elif kraken_data['depth'] < tree.depth(previous):
        previous_search = previous
        while(tree.depth(previous_search) > kraken_data['depth']):
            previous_search = tree.parent(previous_search.identifier)
        add_node(tree, tree.parent(previous_search.identifier), kraken_data)
        previous = tree.get_node(kraken_data['ncbi_taxonomy_id'])

tree_dict = tree.to_dict(with_data=True)

def transform(tree_dict):
    for key in tree_dict.keys():
        for data_key in tree_dict[key]['data'].keys():
            tree_dict[data_key] = tree_dict[key]['data'][data_key]
        tree_dict[key].pop('data', None)
Example #22
0
tree = Tree()
tree.create_node("Harry", "h")  # korzen
tree.create_node("Jane", "j", parent="h")
tree.create_node("Bill", "b", parent="h")
tree.create_node("Diane", "d", parent="j")
tree.create_node("Mary", "m", parent="d")
tree.create_node("Harry", "h2", parent="j")

tree.show()

x = tree.get_node("m")
print(x.tag)
print()
print(x.identifier)
print()
y = tree.parent("m")
print(y.tag)
print()
print(y.identifier)
print()
z = tree.get_node("h2")
print(z.tag)
print()
print(z.is_root())
print()
print(z.is_leaf())
print()
print(tree.paths_to_leaves())


def duplicate_node_path_check(tree, node):
class IOTeqDBBuilder():
    def __init__(self, configFile):
        with open(configFile, 'r') as f:
            jsonOutput = json.load(f)
            self.databaseTags = jsonOutput["database"]["tags"]

        self.IOTEQ_TAG_BYTE_SIZE = 44
        self.tree = Tree()
        self.constPtrChar = []
        self.constPtrTree = []
        self.tagList = []
        self.dataPtr = []
        self.persistentPtr = []

        self.currentTagAddress = 0

    def totalNumberOfTags(self):
        return len(self.tagList)

    def addTag(self, tag):
        self.tagList.append(tag)

    def addNameToCharPtr(self, name):
        charList = list(name)
        for char in charList:
            self.constPtrChar.append(ord(char))
        self.constPtrChar.append(0)  # escape char for string

    def setRoot(self, rootName):
        rootTag = IOTeqTag(rootName, 0x00, len(rootName))
        self.tree.create_node(rootName, rootName, None, rootTag)
        self.tagList.append(rootTag)
        self.addNameToCharPtr(rootName)

    def addValueToDataPtr(self, tag):
        datatype = tag["datatype"]
        if (datatype == "Number"):
            value = tag["value"]

            if ("numtype" in tag["config"]):
                if (tag["config"]["numtype"] == "float"):
                    ba = bytearray(struct.pack("<f", value))
            else:
                ba = bytearray(struct.pack("<L", value))

            for b in ba:
                self.dataPtr.append(hex(b))

        elif (datatype == "Text"):
            value = tag["value"]
            for char in value:
                self.dataPtr.append(hex(ord(char)))
            # for i in range(len(self.dataPtr), 40):
            self.dataPtr.append(hex(0))

    def addValueToPersistentPtr(self, tag):
        datatype = tag["datatype"]
        if (datatype == "Number"):
            value = tag["value"]

            if ("numtype" in tag["config"]):
                if (tag["config"]["numtype"] == "float"):
                    ba = bytearray(struct.pack("<f", value))
            else:
                ba = bytearray(struct.pack("<L", value))

            for b in ba:
                self.persistentPtr.append(hex(b))

        elif (datatype == "Text"):
            value = tag["value"]
            for char in value:
                self.persistentPtr.append(hex(ord(char)))
            # for i in range(len(self.dataPtr), 40):
            self.persistentPtr.append(hex(0))

    def createTree(self, tags, parent=None):
        for tag in tags:
            for i in range(tags[tag]["arraydim"]):

                # Naming function for tags with dimensions larger than 1
                if (tags[tag]["arraydim"] > 1):
                    tagName = tag + "[" + str(i) + "]"
                else:
                    tagName = tag

                # Add tag name in hex format to a list
                charIndex = len(self.constPtrChar)
                self.addNameToCharPtr(tagName)

                # Create tag an add to tagList
                newTag = IOTeqTag(tagName, charIndex,
                                  len(tagName) + 1)  # plus 1 for \0

                # Set the tags valueSize based on datatype
                # Numbers are 4 bytes (float) and Text are 40 characters long total
                if (tags[tag]["datatype"] != "Folder"):
                    if (tags[tag]["datatype"] == "Number"):
                        newTag.valueSize = 4
                    elif (tags[tag]["datatype"] == "Text"):
                        newTag.valueSize = len(tags[tag]["value"])

                    if ("persistent" in tags[tag]["config"]):
                        if (tags[tag]["config"]["persistent"] == True):
                            newTag.persistentValuePtr = len(self.persistentPtr)
                            self.addValueToPersistentPtr(tags[tag])
                            newTag.isPersistent = 1

                    # Adding default value to dataPtr list
                    newTag.valuePtr = len(self.dataPtr)
                    self.addValueToDataPtr(tags[tag])

                    # Add num type to tag object
                    if ("numtype" in tags[tag]["config"]):
                        if (tags[tag]["config"]["numtype"] == "float"):
                            newTag.numType = "float"
                    else:
                        newTag.numType = "integer"

                    # Set default value of tag
                    newTag.value = tags[tag]['value']

                # Adding tag to tree and tag list
                self.tree.create_node(tagName, tagName, parent, newTag)
                self.tagList.append(newTag)

                # Recursion for tags that have children
                if (tags[tag]["datatype"] == "Folder"):
                    self.createTree(tags[tag]["children"], tagName)

    def setTagAddresses(self):
        for level in range(self.tree.depth() + 1):
            for node in dict(
                    filter(lambda elem: self.tree.level(elem[0]) == level,
                           self.tree.nodes.items())):
                tag = self.tree.get_node(node).data
                tag.address = self.currentTagAddress
                self.currentTagAddress += self.IOTEQ_TAG_BYTE_SIZE

    def setTagParentChildrenPtrs(self):
        for node in self.tree.nodes:
            # tagIndex = None
            for tag in self.tagList:
                if (tag.tagName == node):
                    tagIndex = self.tagList.index(tag)
                    # Get IOTeq Tag
                    treeNode = self.tree.get_node(node)
                    self.tagList[tagIndex] = treeNode.data

                    # If node has a parent, i.e. not root
                    if (treeNode.is_root() != True):
                        # Get IOTeq Parent Tag
                        parentTag = self.tree.parent(node).data
                        # Set Parent Ptr of current tag
                        self.tagList[tagIndex].parentPtr = parentTag.address
                        if (parentTag.tagName != "tags"):
                            self.tagList[tagIndex].parentTag = parentTag

                    # If node has children
                    if (self.tree.get_node(node).is_leaf() != True):
                        childrenNodes = self.tree.children(node)
                        childrenNodes.sort(key=lambda x: x.data.address)
                        self.tagList[tagIndex].childPtr = childrenNodes[
                            0].data.address
                        self.tagList[tagIndex].numOfChildren = len(
                            childrenNodes)

                        if (tag.tagName != "tags"):
                            for i in range(0, len(childrenNodes)):
                                tagIndex = self.tagList.index(
                                    childrenNodes[i].data)
                                if (i == 0):
                                    self.tagList[
                                        tagIndex].nextSibling = childrenNodes[
                                            i + 1].data.address
                                elif (i == len(childrenNodes) - 1):
                                    self.tagList[
                                        tagIndex].prevSibling = childrenNodes[
                                            i - 1].data.address
                                else:
                                    self.tagList[
                                        tagIndex].nextSibling = childrenNodes[
                                            i + 1].data.address
                                    self.tagList[
                                        tagIndex].prevSibling = childrenNodes[
                                            i - 1].data.address

    def createConstPtrTree(self):
        sortedTags = sorted(self.tagList,
                            key=lambda x: x.address,
                            reverse=False)
        for tag in sortedTags:
            self.constPtrTree.extend(tag.getStruct())

    def build(self):
        self.setRoot("tags")
        self.createTree(ioteqDBBuilder.databaseTags, "tags")
        self.setTagAddresses()
        self.setTagParentChildrenPtrs()
        self.createConstPtrTree()
Example #24
0
class TreeT(object):
    def __init__(self, max_id=0):
        self.tree = Tree()

    def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None):
        # starts by ['(', 'pos']
        pos_tag = line[1]
        if parent_id is None:
            pos_id = 0
        else:
            pos_id = max_id
            max_id += 1

        self.tree.create_node(pos_tag, pos_id, parent_id, TreeData())

        parent_id = pos_id
        total_offset = 2

        if line[2] != '(':
            # sub-tree is leaf
            # line[0:3] = ['(', 'pos', 'word', ')']
            word_tag = line[2]
            self.tree.create_node(word_tag, leaf_id, parent_id, TreeData())
            return 4, max_id, leaf_id + 1

        line = line[2:]

        while line[0] != ')':
            offset, max_id, leaf_id = self.from_ptb_to_tree(
                line, max_id, leaf_id, parent_id)
            total_offset += offset
            line = line[offset:]

        return total_offset + 1, max_id, leaf_id

    def add_height(self, tree_dep):

        for n in self.tree.all_nodes():
            n.data.leaves = []

        for leaf in self.tree.leaves():
            lid = leaf.identifier
            hid = tree_dep[lid]
            if hid == self.tree.root:
                self.tree[lid].data.height = self.tree.depth(self.tree[lid])
                for cid in [
                        p for p in self.tree.paths_to_leaves() if lid in p
                ][0]:
                    self.tree[cid].data.leaves += [lid]
            else:
                height = -1
                cid = lid
                cond = True
                while cond:
                    self.tree[cid].data.leaves += [lid]
                    height += 1
                    cid = self.tree.parent(cid).identifier
                    cid_leaves = [l.identifier for l in self.tree.leaves(cid)]
                    cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid]
                    cond = set(cid_l_dep).issubset(set(cid_leaves))
                self.tree[lid].data.height = height

        x_nodes = [
            n.identifier for n in self.tree.all_nodes() if n.data.leaves == []
        ]
        for x_node in x_nodes[::-1]:
            min_id = min(self.tree.children(x_node),
                         key=lambda c: c.data.height)
            _lid = min_id.data.leaves[0]
            self.tree[_lid].data.height += 1
            self.tree[x_node].data.leaves += [_lid]

        return True

    def _from_tree_to_ptb(self, nid):
        nid = self.tree.subtree(nid).root
        if self.tree[nid].is_leaf():
            return ' (' + self.tree[nid].tag + ' ' + self.tree[
                nid].data.word + ')'

        res = ' (' + self.tree[nid].tag

        for c_nid in sorted(self.tree.children(nid),
                            key=lambda x: x.identifier):
            res += self._from_tree_to_ptb(c_nid.identifier)

        return res + ')'

    def from_tree_to_ptb(self):
        return self._from_tree_to_ptb(self.tree.root)

    def from_tag_to_tree(self, tag, word, pos_id=0):
        parent_id = None
        for tag_nodes in tag:
            if tag_nodes[0] in [CL, CR]:
                c_side = tag_nodes[0]
                _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else ['']
            else:
                c_side = ''
                _tag_nodes = tag_nodes
            self.tree.create_node(_tag_nodes[0],
                                  pos_id,
                                  parent=parent_id,
                                  data=TreeData(comb_side=c_side))

            parent_id = pos_id
            pos_id += 1
            for tag_node in _tag_nodes[1:]:
                self.tree.create_node(tag_node[1:],
                                      pos_id,
                                      parent=parent_id,
                                      data=TreeData(miss_side=tag_node[0]))
                pos_id += 1
        for l in self.tree.leaves():
            if l.data.miss_side == '':
                l.data.word = word
                break
        return pos_id

    @memoize
    def is_combine_to(self, side):
        return self.tree[self.tree.root].data.comb_side == side

    @memoize
    def is_combine_right(self):
        return self.is_combine_to(CR)

    @memoize
    def is_combine_left(self):
        return self.is_combine_to(CL)

    @memoize
    def is_complete_tree(self):
        return all([n.data.miss_side == '' for n in self.tree.all_nodes()])

    @memoize
    def get_missing_leaves_to(self, miss_val, side):
        return [
            l.identifier for l in self.tree.leaves(self.tree.root)
            if l.data.miss_side == side and l.tag == miss_val
        ]

    @memoize
    def get_missing_leaves_left(self, miss_val):
        return self.get_missing_leaves_to(miss_val, L)

    @memoize
    def get_missing_leaves_right(self, miss_val):
        return self.get_missing_leaves_to(miss_val, R)

    @memoize
    def root_tag(self):
        return self.tree[self.tree.root].tag

    @memoize
    def is_no_missing_leaves(self):
        return all(
            [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)])

    @memoize
    def combine_tree(self, _tree, comb_leaf):
        self.tree.paste(comb_leaf, _tree.tree)
        self.tree.link_past_node(comb_leaf)
        return self

    def tree_to_path(self, nid, path):

        # Stop condition
        if self.tree[nid].is_leaf():
            path[nid] = []
            return nid, self.tree[nid].data.height

        # Recursion
        flag = CR
        for child in self.tree.children(nid):
            cid = child.identifier
            leaf_id, height = self.tree_to_path(cid, path)

            if (height == 0):
                # Reached end of path can add flag
                path[leaf_id].insert(0, flag)
                # path[leaf_id].append(flag)

            if height > 0:
                path[leaf_id].insert(0, nid)
                # only single child will have height>0
                # and its value will be the one that is returned
                # to the parent
                ret_leaf_id, ret_height = leaf_id, height - 1

                # once we reached a height>0, it means that
                # this path includes the parent, and thus flag
                # direction should flip
                flag = CL

        return ret_leaf_id, ret_height

    def path_to_tags(self, path):
        tags = []
        for p in path:
            _res = []
            _p = copy.copy(p)
            if _p[0] in [CL, CR]:
                _res.append(_p[0])
                _p = _p[1:]
            while _p[:-1]:
                el_p = _p.pop(0)
                _res.append(self.tree[el_p].tag)
                for c in self.tree.children(el_p):
                    if c.identifier != _p[0]:
                        _res.append(R + c.tag if c.identifier > _p[0] else L +
                                    c.tag)
            _res.append(self.tree[_p[0]].tag)
            tags.append(_res)
        return tags

    def path_to_words(self, path):
        return [self.tree[k].tag for k in path]

    def from_tree_to_tag(self):
        path = {}
        self.tree_to_path(self.tree.root, path)
        return {
            'tags': self.path_to_tags(path.values()),
            'words': self.path_to_words(path.keys())
        }

    def from_ptb_to_tag(self, line, max_id, depend):
        self.from_ptb_to_tree(line, max_id)
        self.add_height(depend)
        path = {}
        self.tree_to_path(self.tree.root, path)
        return self.path_to_tags(path.values())
Example #25
0
    for z in d:
        path = walkTree(tree, z, path + z)

    return path


input = list(map(lambda x: x.strip(), open("test_input.txt").readlines()))
tree = Tree()
tree.create_node("root", "root")

# first figure out how many steps there are and then sort them
# by their name
for lines in input:
    (l1, l2) = (lines[5], lines[36])
    print(lines)
    if tree.contains(l1) and tree.contains(l2):
        tree.move_node(l2, l1)
    elif tree.contains(l1) and not tree.contains(l2):
        tree.create_node(l2, l2, parent=l1)
    elif not tree.contains(l1) and tree.contains(l2):
        # get the root for l2 and make that the root for l1
        # then move l2 under l1
        tree.create_node(l1, l1, parent=tree.parent(l2))
        tree.move_node(l2, l1)

    else:
        tree.create_node(l1, l1, parent="root")
        tree.create_node(l2, l2, parent=l1)

tree.show()
print(walkTree(tree, 'root', ''))
Example #26
-1
 def test_subtree(self):
     subtree_copy = Tree(self.tree.subtree("jane"), deep=True)
     self.assertEqual(subtree_copy.parent("jane") is None, True)
     subtree_copy["jane"].tag = "Sweeti"
     self.assertEqual(self.tree["jane"].tag == "Jane", True)
     self.assertEqual(subtree_copy.level("diane"), 1)
     self.assertEqual(subtree_copy.level("jane"), 0)
     self.assertEqual(self.tree.level("jane"), 1)