def render_vocabulary_word(tree: Tree, node: Node) -> str: assert tree.parent(node.identifier) assert tree.parent(node.identifier).identifier.lower() == 'vocabulary' assert node.data depth = tree.depth(node) + 1 word = node.tag text = ('*' * depth + ' ' + word + 2 * '\n') if 'audio' in node.data and node.data['audio']: text += render_link(node.data['audio'], 'play') + '\n' if 'book_examples' in node.data: for example in node.data['book_examples']: text += render_quote(example) text += '\n' if 'definitions' in node.data: text += 'Definitions\n' for i, definition in enumerate(node.data['definitions']): text += str(i + 1) + '. ' + definition['definition'] if 'synonyms' in definition: text += ' /' for n, synonym in enumerate(definition['synonyms']): text += synonym + ', ' if n == 2: break text = text[:-2] text += '/' text += '\n' text += '\n' return text
def configure_tree_topology(self, root, degree=2, remove=False): """Configures the cluster's network topology as a tree. The tree consists of the specified root node and the nodes, which build the subtrees. The childrens are incrementally chosen, in other words, sequentially as specified in the config file. Arguments: root {integer} -- The tree's root node. Keyword Arguments: degree {integer} -- The maximum number of children (default: {2}) remove {boolean} -- Remove the configuration (default: {False}) """ self.logger.info("Configuring tree topology...") tree = Tree() root_node = self.topology.get_node(root) tree.create_node(root_node.name, root_node.node_id) parent_node = root for nodex in self.topology.nodes: if nodex.node_id == root_node.node_id: continue if len(tree.children(parent_node)) >= degree: if parent_node == root and root != 0: parent_node = 0 elif parent_node + 1 == root: parent_node += 2 else: parent_node += 1 tree.create_node(nodex.name, nodex.node_id, parent_node) self.logger.info("The following tree will be configured:") tree.show() for nodex in self.topology.nodes: self.logger.debug("%s:", nodex.name) subtree = tree.subtree(nodex.node_id) for nodey in self.topology.nodes: if nodex.node_id == nodey.node_id: continue if subtree.contains(nodey.node_id): children = tree.children(nodex.node_id) for child in children: if (child.identifier == nodey.node_id or tree.is_ancestor(child.identifier, nodey.node_id)): nodex.add_forwarding( nodey, self.topology.get_node(child.identifier)) break elif tree.parent(nodex.node_id) != None: nodex.add_forwarding( nodey, self.topology.get_node( tree.parent(nodex.node_id).identifier)) if not self.testing: self.topology.send_forwarding_tables(remove)
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) == 2 # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] try: if child_1 and not child_2: # child_2=='NO_LABEL' self._node_to_subprog[node.data.span] = child_1 elif not child_1 and child_2: # child_1=='NO_LABEL' self._node_to_subprog[node.data.span] = child_2 elif not child_1 and not child_2: # Both children are assigned with 'NO_LABEL' self._node_to_subprog[node.data.span] = self._node_to_type( node) # ignore children and propagate parent else: assert child_2.is_full( ) # make sure child_2 value can be formed self._node_to_subprog[node.data.span] = child_1.apply( child_2) except Exception as e: try: self._node_to_subprog[node.data.span] = child_2.apply( child_1) except Exception as e: raise Exception('final apply_exception: {}'.format(e)) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return inner_program
def get_lca(T: tl.Tree, x: int, y: int) -> int: # First, get to the same level while T.level(x) > T.level(y): x = T.parent(x).identifier while T.level(x) < T.level(y): y = T.parent(y).identifier # Then, increment both until it's the same node. while x != y: x = T.parent(x).identifier y = T.parent(y).identifier # now, this is the LCA. return x
def test_modify_node_identifier_root(self): tree = Tree() tree.create_node("Harry", "harry") tree.create_node("Jane", "jane", parent="harry") tree.update_node(tree['harry'].identifier, identifier='xyz', tag='XYZ') self.assertTrue(tree.root == 'xyz') self.assertTrue(tree['xyz'].tag == 'XYZ') self.assertEqual(tree.parent('jane').identifier, 'xyz')
def get_path_to_santa(orbital_tree: Tree) -> list: path_to_santa = [] current_node = orbital_tree.parent('YOU') traversal_complete = False while not traversal_complete: if orbital_tree.subtree(current_node.identifier).contains('SAN'): for path in orbital_tree.subtree(current_node.identifier).paths_to_leaves(): if 'SAN' in path: path_to_santa += path[:-1] traversal_complete = True else: path_to_santa.append(current_node.identifier) current_node = orbital_tree.parent(current_node.identifier) return path_to_santa
def test_subtree(self): subtree_copy = Tree(self.tree.subtree("jane"), deep=True) self.assertEqual(subtree_copy.parent("jane") is None, True) subtree_copy["jane"].tag = "Sweeti" self.assertEqual(self.tree["jane"].tag == "Jane", True) self.assertEqual(subtree_copy.level("diane"), 1) self.assertEqual(subtree_copy.level("jane"), 0) self.assertEqual(self.tree.level("jane"), 1)
def map_tree_to_program(self, tree: Tree) -> str: self._node_to_subprog = {} frontier = [] # Tree nodes that are left to be explored for leaf in tree.leaves(): span = leaf.data.span self._node_to_subprog[span] = self._node_to_type(leaf) parent = tree.parent(leaf.identifier) if parent and parent not in frontier: frontier.append(tree.parent(leaf.identifier)) while frontier: node = frontier.pop() children = tree.children(node.identifier) assert len(children) in [2, 3] # check if children were already discovered if not all([ child.data.span in self._node_to_subprog for child in children ]): frontier.insert(0, node) continue if len(children) == 2: child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] self._node_to_subprog[node.data.span] = self.merge_children( child_1, child_2, node) else: children.sort(key=lambda c: c.data.span[0]) child_1 = self._node_to_subprog[children[0].data.span] child_2 = self._node_to_subprog[children[1].data.span] child_3 = self._node_to_subprog[children[2].data.span] intermediate = self.merge_children(child_1, child_3, node) self._node_to_subprog[node.data.span] = self.merge_children( child_2, intermediate, node) parent = tree.parent(node.identifier) if parent and parent not in frontier: frontier.insert(0, parent) inner_program = self._node_to_subprog[tree.get_node( tree.root).data.span].get_value() # return the root's value return 'answer ( {} )'.format(inner_program)
def render_org_tree(tree: Tree, node: Node, payload='') -> str: parent = tree.parent(node.identifier) if parent and parent.identifier.lower() == 'vocabulary': payload += render_vocabulary_word(tree, node) else: depth = tree.depth(node) + 1 payload += ('*' * depth + ' ' + node.tag + '\n') for child in tree.children(node.identifier): payload += render_org_tree(tree, tree[child.identifier]) return payload
def __init__(self, holes=0): self.data = np.zeros((3, 3, 3, 3), dtype='int') element = range(3) order = direct_product(element, element, element, element) i = 0 genTree = Tree() root = Node(i, 'root', data=[order[0], self.data.copy()]) genTree.add_node(root) currentNode = root getData = lambda node: node.data[1][tuple(node.data[0])] while i < len(order): i += 1 a, b, c, d = order[i - 1] numPool = pool(self.data, a, b, c, d) - set( map(getData, genTree.children(currentNode.identifier))) if numPool: self.data[a, b, c, d] = np.random.choice(list(numPool)) node = Node(i, data=[order[i - 1], self.data.copy()]) genTree.add_node(node, currentNode) currentNode = node else: prev = genTree.parent(currentNode.identifier) while len(genTree.children(prev.identifier)) == len( pool(prev.data[1], *(prev.data[0]))): currentNode = prev prev = genTree.parent(currentNode.identifier) else: currentNode = prev self.data = currentNode.data[1].copy() i = currentNode.tag continue h = np.random.choice(len(order), size=holes, replace=False) self._answer = self.data.copy() self.holes = np.array(order)[h] self.data[tuple(self.holes.T.tolist())] = 0
def get_intersection_tree(T1, T2): T = Tree(tree=T1, deep=True) T1_bfs = [n for n in T1.expand_tree(mode=1)] T2_bfs = [n for n in T2.expand_tree(mode=1)] for nid in T1_bfs: X = set(get_leaf_node_ids_for_node(T, nid)) diff = min([len(X.symmetric_difference(set( \ get_leaf_node_ids_for_node(T2,i)))) \ for i in T2_bfs]) if diff != 0: par = T.parent(nid).identifier for c in T.children(nid): T.move_node(c.identifier, par) T.remove_subtree(nid) return T
class StateMachine(object): """A class to track information about a state machine""" def __init__(self, name): self.name = name self.events = {} self.effects = {} self.state_tree = Tree() self.current_state = None # Add the Root state automatically self.add_state('Root') def add_state(self, name): assert isinstance(name, str) state_node = Node(identifier=name, data=State(name)) if self.current_state is None: self.state_tree.add_node(state_node) self.current_state = state_node.data else: self.state_tree.add_node(state_node, self.current_state.name) def add_event(self, ev): assert isinstance(ev, Event) self.events[ev.name] = ev def add_effect(self, eff): assert isinstance(eff, Effect) self.effects[eff.name] = eff def enter_state(self, state): self.current_state = state def exit_state(self, state): self.current_state = self.state_tree.parent(state.name).data def get_state_by_name(self, state_name): return self.state_tree.get_node(state_name).data
def crossOver(individualA, individualB): tree = None while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH: treeA = Tree(tree = individualA.tree, deep=True) treeB = Tree(tree = individualB.tree, deep=True) regenerate_ids(treeA) regenerate_ids(treeB) removedNode = random.choice(treeA.all_nodes()) addedNode = random.choice(treeB.all_nodes()) addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True) if treeA.root == removedNode.identifier: tree = addedSubtree else: parent = treeA.parent(removedNode.identifier) treeA.remove_subtree(removedNode.identifier) treeA.paste(parent.identifier, addedSubtree) tree = treeA return Individual(tree)
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root', data=FileInfo(type=False)) self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True, **kwargs): if depth is None: depth = self.depth kwargs.setdefault('max_depth', depth) max_depth = kwargs['max_depth'] kwargs.setdefault('get_file_list_bar', GetFileListBar(max_depth)) kwargs.setdefault('ratio', 0) get_file_list_bar = kwargs['get_file_list_bar'] ratio = kwargs['ratio'] get_file_list_bar.update(refresh_line=False) if not is_fid: file_id = self.get_path_fid(file_id, update=False) file_list = self._disk.get_file_list(file_id) if not file_list: if depth == max_depth: get_file_list_bar.refresh_line() return False old_file_list = self._tree.children(file_id) for i in old_file_list: if i.identifier not in [j['file_id'] for j in file_list]: self._tree.remove_node(i.identifier) for i, info in enumerate(file_list): if depth == max_depth: ratio = (i + 1) / len(file_list) if file_list else None get_file_list_bar.update(depth=max_depth - depth, ratio=ratio, refresh_line=True) file_info = self.get_file_info(info)[0] if self._tree.get_node(file_info.id): self._tree.update_node(file_info.id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1, max_depth=max_depth, get_file_list_bar=get_file_list_bar, ratio=ratio) if depth == max_depth: get_file_list_bar.refresh_line() return True def check_path_diff(self, local_path, disk_path_list): p = Path(local_path) change_file_list = [] for path in p.iterdir(): flag = False for i, path_ in enumerate(disk_path_list, 1): name, file_info = list(path_.items())[0] if p / name not in p.iterdir(): change_file_list.append(p / name) if Path(path) == p / name: if Path(path).is_dir() and file_info['data'] and path.is_dir() != file_info['data'].type: if 'children' in file_info: children = file_info['children'] change_file_list.extend(self.check_path_diff(p / name, children)) elif list(path.iterdir()): change_file_list.extend(list(path.iterdir())) if file_info and file_info['data'] and path.is_file() == file_info['data'].type: if path.is_file() and get_sha1(path).lower() != file_info['data'].content_hash.lower(): if i == len(disk_path_list): change_file_list.append(path) continue else: flag = True if not flag and i == len(disk_path_list): change_file_list.append(path) if not len(list(p.iterdir())): for path_ in disk_path_list: name, file_info = list(path_.items())[0] change_file_list.append(p / name) if not len(disk_path_list): for path_ in p.iterdir(): change_file_list.append(path_) return list(set(change_file_list)) @staticmethod def get_file_info(info): file_info_list = [] if not isinstance(info, list): info_list = [info] else: info_list = info for info in info_list: if info['type'] == 'file': file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=True, ctime=time.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.localtime(), update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=info.get('hidden'), category=info['category'], content_type=info.get('content_type'), size=info['size'], content_hash_name=info.get('content_hash_name'), content_hash=info.get('content_hash'), download_url=info['download_url'] if 'download_url' in info else '', video_media_metadata=info[ 'video_media_metadata'] if 'video_media_metadata' in info else None, video_preview_metadata=info[ 'video_preview_metadata'] if 'video_preview_metadata' in info else None) else: file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=False, ctime=time.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.time(), update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=info.get('hidden')) file_info_list.append(file_info) return file_info_list def tree(self, path='root', stdout=sys.stdout): file_id = self.get_path_fid(path, update=False) self.update_path_list(file_id) if not file_id: raise FileNotFoundError(path) return self._tree.show(file_id, stdout=stdout) def get_path_list(self, path, update=True): file_id = self.get_path_fid(path, update=update) try: return self.get_fid_list(file_id, update=update) except FileNotFoundError: raise FileNotFoundError(path) def get_fid_list(self, file_id, update=True): if not file_id: raise FileNotFoundError try: self.auto_update_path_list(update, file_id) except NodeIDAbsentError: return list(map(self.get_file_info, self._disk.get_file_list(file_id))) if not self._tree.get_node(file_id): return [] if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', update=True): if str(path) in ('', '/', '\\', '.', 'root'): return 'root' path = AliyunpanPath(path) flag = False path_list = list(filter(None, path.split())) if path_list[0] == 'root': path_list = path_list[1:] for i in path_list: flag = False node_list = self._tree.children(file_id) if not node_list: self.auto_update_path_list(update, file_id) node_list = self._tree.children(file_id) for j in node_list: if i == j.tag: flag = True file_id = j.identifier break if not flag: return False if flag: return file_id return False def get_path_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, update=True, file_id=None): if not update and file_id: return self.update_path_list(file_id, depth=0) elif update and len(self._tree) == 1: return self.update_path_list()
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH remember previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j, el_ in enumerate(self.nauo_lines): self.nauo_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_lines): self.prod_def_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_form_lines): self.prod_def_form_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_lines): self.prod_refs.append([ el.strip(',') for el in el_.replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(el_.split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j, el_ in enumerate(self.prod_all_refs): # Add 'PRODUCT_DEFINITION' ref for i, el in enumerate(self.prod_def_form_refs): if el[0] == el_[1]: el_.append(el[1]) break # Add names from 'PRODUCT_DEFINITION' lines for i, el in enumerate(self.prod_refs): if el[0] == el_[2]: el_.append(el[2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) # Create simple parts dictionary (ref + label) self.part_dict = {el[0]: el[3] for el in self.prod_all_refs} # self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs} def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) # HR: "create_dict" replaced by list comprehension elsewhere # # def create_dict(self): # # # TH: links nauo number with a name and creates dict # self.part_dict = {} # for part in self.all_type_refs: # for sublist in self.prod_def_refs: # if sublist[0] == part: # prod_loc = '#' + re.findall('\d+',sublist[1])[0] # pass # for sublist in self.prod_def_form_refs: # if sublist[0] == prod_loc: # prod_loc = '#' + str(re.findall('\d+',sublist[1])[0]) # pass # for sublist in self.prod_refs: # if sublist[0] == prod_loc: # part_name = sublist[2] # # self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] # HR added part reference as data for later use self.tree.create_node(self.part_dict[root_node_ref], 0, data={'ref': root_node_ref}) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # Iterates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) # HR added part reference as data for later use self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent, data={'ref': str(line[2])}) tree_next_layer(self, i[0]) tree_next_layer(self, 0) self.appended = False self.get_levels() def get_levels(self): # Initialise dict and get first level (leaves) self.levels = {} self.levels_set_p = set() self.levels_set_a = set() self.leaf_ids = [el.identifier for el in self.tree.leaves()] self.all_ids = [el for el in self.tree.nodes] self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids) self.part_level = 1 def do_level(self, tree_level): # Get all nodes within this level node_ids = [ el for el in self.tree.nodes if self.tree.level(el) == tree_level ] for el in node_ids: # If leaf, then n_p = 1 and n_a = 1 if el in self.leaf_ids: self.levels[el] = {} self.levels[el]['n_p'] = self.part_level self.levels[el]['n_a'] = self.part_level # If assembly, then get all children and sum all parts + assemblies else: # Get all children of node and sum levels child_ids = self.tree.is_branch(el) child_sum_p = 0 child_sum_a = 0 for el_ in child_ids: child_sum_p += self.levels[el_]['n_p'] child_sum_a += self.levels[el_]['n_a'] self.levels[el] = {} self.levels[el]['n_p'] = child_sum_p self.levels[el]['n_a'] = child_sum_a + 1 self.levels_set_p.add(child_sum_p) self.levels_set_a.add(child_sum_a + 1) # Go up through tree levels and populate lattice level dict for i in range(self.tree.depth(), -1, -1): do_level(self, i) self.create_lattice() self.levels_p_sorted = sorted(list(self.levels_set_p)) self.levels_a_sorted = sorted(list(self.levels_set_a)) # Function to return dictionary of item IDs for each lattice level def get_levels_inv(list_in, key): #Initialise levels_inv = {} levels_inv[self.part_level] = [] for el in list_in: levels_inv[el] = [] for k, v in self.levels.items(): levels_inv[v[key]].append(k) return levels_inv self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p') self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a') def get_all_children(self, id_): ancestors = [el.identifier for el in self.tree.children(id_)] parents = ancestors while parents: children = [] for parent in parents: children = [el.identifier for el in self.tree.children(parent)] ancestors.extend(children) parents = children return ancestors def create_lattice(self): # Create lattice self.g = nx.DiGraph() self.default_colour = 'r' # Get root node and set parent to -1 to maintain data type of "parent" # Set position to top/middle node_id = self.tree.root label_text = self.tree.get_node(node_id).tag self.g.add_node(node_id, parent=-1, label=label_text, colour=self.default_colour) # Do nodes from treelib "nodes" dictionary for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier label_text = self.tree.get_node(key).tag # Node IDs same as for tree self.g.add_node(key, parent=parent_id, label=label_text, colour=self.default_colour) # Do edges from nodes for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier self.g.add_edge(key, parent_id) # Escape if only one node # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD if self.tree.size() == 1: id_ = [el.identifier for el in self.tree.leaves()] self.g.nodes[id_[-1]]['pos'] = (0, 0) return # Get set of parents of leaf nodes leaf_parents = set( [self.tree.parent(el).identifier for el in self.leaf_ids]) # For each leaf_parent, set position of leaf nodes sequentially i = 0 no_leaves = len(self.tree.leaves()) for el in leaf_parents: for el_ in self.tree.is_branch(el): child_ids = [el.identifier for el in self.tree.leaves()] if el_ in child_ids: self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1) i += 1 # To set plot positions of nodes from lattice levels # --- # Traverse upwards from leaves for el in sorted(list(self.levels_set_a)): # Get all nodes at that level node_ids = [k for k, v in self.levels.items() if v['n_a'] == el] # Get all positions of children of that node # and set position as mean value of them for el_ in node_ids: child_ids = self.tree.is_branch(el_) pos_sum = 0 for el__ in child_ids: pos_ = self.g.nodes[el__]['pos'][0] pos_sum += pos_ pos_sum = pos_sum / len(child_ids) self.g.nodes[el_]['pos'] = (pos_sum, el) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, auto_update=False) file_list = self._disk.get_file_list(file_id) if 'items' not in file_list: return False for i in file_list['items']: if i['type'] == 'file': file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url']) else: file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root', auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if not file_id: raise Exception('No such file or directory') self._tree.show(file_id) def get_path_list(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) return self.get_fid_list(file_id, auto_update=auto_update) def get_fid_list(self, file_id, auto_update=True): self.auto_update_path_list(auto_update) if not file_id: raise Exception('No such file or directory') if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', auto_update=True): self.auto_update_path_list(auto_update) path = Path(path) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False for i in filter(None, path.as_posix().split('/')): flag = False for j in self._tree.children(file_id): if i == j.tag: flag = True file_id = j.identifier break if flag: return file_id return False def get_path_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, auto_update=True): if auto_update and len(self._tree) == 1: return self.update_path_list()
"depth": depth}) previous = tree.get_node(str(ncbi_taxonomy_id)) elif depth > tree.depth(previous): tree.create_node(tag = clade_name, identifier = ncbi_taxonomy_id, data = { "num_reads": number_reads_taxon, "rank_code": rank_code, "depth": depth}, parent = previous) previous = tree.get_node(str(ncbi_taxonomy_id)) elif depth == tree.depth(previous): tree.create_node(tag = clade_name, identifier = ncbi_taxonomy_id, data = { "num_reads": number_reads_taxon, "rank_code": rank_code, "depth": depth}, parent = tree.parent(previous.identifier)) previous = tree.get_node(str(ncbi_taxonomy_id)) elif depth < tree.depth(previous): previous_search = previous while(tree.depth(previous_search) > depth): previous_search = tree.parent(previous_search.identifier) tree.create_node(tag = clade_name, identifier = ncbi_taxonomy_id, data = { "num_reads": number_reads_taxon, "rank_code": rank_code, "depth": depth}, parent = tree.parent(previous_search.identifier)) previous = tree.get_node(str(ncbi_taxonomy_id))
def build_tree(arg): # read parameters start = time.time() dist_matrix_file = arg[0] cls_file = arg[1] tree_dir = arg[2] ksize = arg[3] params = arg[4] alpha_ratio = params[0] minsize = params[1] maxsize = params[2] max_cls_size = params[3] # save genomes info fna_seq = bidict.bidict() # : 1 fna_path = {} # read dist matrix (represented by similarity: 1-dist) # output: dist, fna_path, fna_seq f = open(dist_matrix_file, "r") lines = f.readlines() f.close() index = 0 d = lines[0].rstrip().split("\t")[1:] bac_label = 0 for i in lines[0].rstrip().split("\t")[1:]: temp = i[i.rfind('/') + 1:].split(".")[0] fna_seq[temp] = index fna_path[index] = i index += 1 dist = [] for line in lines[1:]: dist.append( [np.array(list(map(float, line.rstrip().split("\t")[1:])))]) dist = np.concatenate(dist) # read initial clustering results. fna_mapping, from 1 for indexing f = open(cls_file, 'r') lines = f.readlines() f.close() fna_mapping = defaultdict(set) for line in lines: temp = line.rstrip().split("\t") for i in temp[2].split(","): fna_mapping[int(temp[0])].add(fna_seq[i]) if (len(lines) == 1): tree = Tree() kmer_sta = defaultdict(int) T0 = Node(identifier=list(fna_mapping.keys())[0]) tree.add_node(T0) kmer_sta = defaultdict(int) kmer_index_dict = bidict.bidict() kmer_index = 1 alpha_ratio = 1 Lv = set() for i in fna_mapping[T0.identifier]: for seq_record in SeqIO.parse(fna_path[i], "fasta"): temp = str(seq_record.seq) for k in range(0, len(temp) - ksize): forward = temp[k:k + ksize] reverse = seqpy.revcomp(forward) for kmer in [forward, reverse]: try: kmer_sta[kmer_index_dict[kmer]] += 1 except KeyError: kmer_index_dict[kmer] = kmer_index kmer_sta[kmer_index] += 1 kmer_index += 1 alpha = len(fna_mapping[T0.identifier]) * alpha_ratio for x in kmer_sta: if (kmer_sta[x] >= alpha): Lv.add(x) print(T0.identifier, len(Lv)) # save2file kmerlist = set() pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb')) f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") f.write("%d\t" % T0.identifier) f.close() os.system(f'cp {cls_file} {tree_dir}/') f = open(tree_dir + "/reconstructed_nodes.txt", "w") f.close() if (len(Lv) > maxsize): Lv = set(random.sample(Lv, maxsize)) kmerlist = Lv length = len(Lv) f = open(tree_dir + "/kmers/" + str(T0.identifier), "w") for j in Lv: f.write("%d " % j) f.close() f = open(tree_dir + "/node_length.txt", "w") f.write("%d\t%d\n" % (T0.identifier, length)) kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n') return # initially build tree cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) # initially extract k-mers kmer_index_dict = bidict.bidict() kmer_index = 1 Lv = defaultdict(set) spec = defaultdict(set) # k-mers <= alpha leaves = tree.leaves() for i in leaves: kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i.identifier) end = time.time() print('- The total running time of k-mer extraction is ', str(end - start), ' s\n') start = time.time() # leaf nodes check recls_label = 0 leaves_check = [] check_waitlist = reversed(leaves) while (True): if (recls_label): cls_dist, mapping, tree, depths, depths_mapping = hierarchy( fna_mapping, dist) leaves = tree.leaves() temp = {} temp2 = [] for i in check_waitlist: if (i in fna_mapping): temp2.append(i) check_waitlist = temp2.copy() for i in check_waitlist: temp[tree.get_node(i)] = depths[tree.get_node(i)] check_waitlist = [] a = sorted(temp.items(), key=lambda x: x[1], reverse=True) for i in a: check_waitlist.append(i[0]) for i in fna_mapping: if (i not in Lv): kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize, kmer_index_dict, kmer_index, Lv, spec, tree_dir, alpha_ratio, i) higher_union = defaultdict(set) for i in check_waitlist: diff, diff_nodes = get_leaf_union(depths[i], higher_union, depths_mapping, Lv, spec, i) kmer_t = Lv[i.identifier] - diff for j in diff_nodes: kmer_t = kmer_t - Lv[j.identifier] for j in diff_nodes: kmer_t = kmer_t - spec[j.identifier] print(str(i.identifier) + " checking", end="\t") print(len(kmer_t)) if (len(kmer_t) < minsize): leaves_check.append(i) if (len(leaves_check) > 0): recls_label = 1 else: break # re-clustering check_waitlist = [] while (recls_label == 1): cluster_id = max(list(fna_mapping.keys())) + 1 check_waitlist.append(cluster_id) leaf_a = leaves_check[0].identifier row_index = mapping[leaf_a] column_index = cls_dist[row_index].argmax() leaf_b = mapping.inv[column_index] # (leaf_a, leaf_b) temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b] print(cluster_id, leaf_a, leaf_b, temp2) del fna_mapping[leaf_a], fna_mapping[leaf_b] if (leaf_a in Lv): del Lv[leaf_a], spec[leaf_a] if (leaf_b in Lv): del Lv[leaf_b], spec[leaf_b] del leaves_check[0] if (tree.get_node(leaf_b) in leaves_check): leaves_check.remove(tree.get_node(leaf_b)) temp1 = [ np.concatenate([[cls_dist[row_index]], [cls_dist[column_index]]]).max(axis=0) ] cls_dist = np.concatenate([cls_dist, temp1], axis=0) temp1 = np.append(temp1, -1) temp1 = np.vstack(temp1) cls_dist = np.concatenate([cls_dist, temp1], axis=1) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0) cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1) # change mapping del mapping[leaf_a], mapping[leaf_b] pending = list(fna_mapping.keys()) pending.sort() for i in pending: if (mapping[i] > min([row_index, column_index]) and mapping[i] < max([row_index, column_index])): mapping[i] -= 1 elif (mapping[i] > max([row_index, column_index])): mapping[i] -= 2 fna_mapping[cluster_id] = temp2 mapping[cluster_id] = len(cls_dist) - 1 if (len(leaves_check) == 0): break del higher_union # rebuild identifiers all_nodes = tree.all_nodes() all_leaves_id = set([]) leaves = set(tree.leaves()) for i in leaves: all_leaves_id.add(i.identifier) id_mapping = bidict.bidict() index = 1 index_internal = len(leaves) + 1 for i in all_nodes: if (recls_label == 0): id_mapping[i.identifier] = i.identifier elif (i in leaves): id_mapping[i.identifier] = index index += 1 else: id_mapping[i.identifier] = index_internal index_internal += 1 leaves_identifier = list(range(1, len(leaves) + 1)) all_identifier = list(id_mapping.values()) all_identifier.sort() # save2file f = open(tree_dir + "/tree_structure.txt", "w") os.system("mkdir " + tree_dir + "/kmers") os.system("mkdir " + tree_dir + "/overlapping_info") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t" % id_mapping[i]) if (i == all_nodes[0].identifier): f.write("N\t") else: f.write("%d\t" % id_mapping[tree.parent(i).identifier]) if (nn in leaves_identifier): f.write("N\t") else: [child_a, child_b] = tree.children(i) f.write("%d %d\t" % (id_mapping[child_a.identifier], id_mapping[child_b.identifier])) if (len(fna_mapping[i]) == 1): temp = list(fna_mapping[i])[0] temp = fna_seq.inv[temp] f.write("%s" % temp) f.write("\n") f.close() f = open(tree_dir + "/hclsMap_95_recls.txt", "w") for nn in leaves_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\t" % (nn, len(fna_mapping[i]))) temp1 = list(fna_mapping[i]) for j in temp1: temp = fna_seq.inv[j] if (j == temp1[-1]): f.write("%s\n" % temp) else: f.write("%s," % temp) f.close() end = time.time() print('- The total running time of re-clustering is ', str(end - start), ' s\n') start = time.time() # build indexing structure kmerlist = set([]) # all kmers used length = {} overload_label = 0 if (len(tree.leaves()) > max_cls_size): overload_label = 1 # from bottom to top (unique k-mers) uniq_temp = defaultdict(set) rebuilt_nodes = [] descendant = defaultdict(set) # including itself ancestor = defaultdict(set) descendant_leaves = defaultdict(set) ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier) for i in all_nodes[1:]: ancestor[i.identifier] = ancestor[tree.parent( i.identifier).identifier].copy() ancestor[i.identifier].add(i.identifier) for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer removing...") if (i in leaves): uniq_temp[i.identifier] = Lv[i.identifier] descendant_leaves[i.identifier].add(i.identifier) else: (child_a, child_b) = tree.children(i.identifier) descendant[i.identifier] = descendant[ child_a.identifier] | descendant[child_b.identifier] descendant_leaves[i.identifier] = descendant_leaves[ child_a.identifier] | descendant_leaves[child_b.identifier] uniq_temp[i.identifier] = uniq_temp[ child_a.identifier] & uniq_temp[child_b.identifier] uniq_temp[child_a.identifier] = uniq_temp[ child_a.identifier] - uniq_temp[i.identifier] uniq_temp[child_b.identifier] = uniq_temp[ child_b.identifier] - uniq_temp[i.identifier] descendant[i.identifier].add(i.identifier) all_nodes_id = set(id_mapping.keys()) # remove overlapping for i in reversed(all_nodes): print(str(id_mapping[i.identifier]) + " k-mer set building...") # no difference with sibling, subtree and ancestors if (i == all_nodes[0]): kmer_t = uniq_temp[i.identifier] else: diff = {} temp = all_nodes_id - descendant[i.identifier] - set([ tree.siblings(i.identifier)[0].identifier ]) - ancestor[i.identifier] for j in temp: diff[j] = len(uniq_temp[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) kmer_t = uniq_temp[i.identifier] for j in a: k = j[0] kmer_t = kmer_t - uniq_temp[k] # remove special k-mers temp = all_leaves_id - descendant_leaves[i.identifier] diff = {} for j in temp: diff[j] = len(spec[j]) a = sorted(diff.items(), key=lambda x: x[1], reverse=True) for j in a: k = j[0] kmer_t = kmer_t - spec[k] if (len(kmer_t) < minsize and overload_label == 0): rebuilt_nodes.append(i) print("%d waiting for reconstruction..." % id_mapping[i.identifier]) else: if (len(kmer_t) > maxsize): kmer_t = set(random.sample(kmer_t, maxsize)) f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") for j in kmer_t: f.write("%d " % j) f.close() length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t del uniq_temp # rebuild nodes overlapping = defaultdict(dict) intersection = defaultdict(set) higher_union = defaultdict(set) del_label = {} for i in leaves: del_label[i.identifier] = [0, 0] for i in rebuilt_nodes: print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...") kmer_t = get_intersect(intersection, descendant_leaves[i.identifier], Lv, del_label, i.identifier) diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i, Lv, spec, del_label) for j in diff: kmer_t = kmer_t - j lower_leaves = set([]) for j in leaves: if (depths[j] < depths[i]): lower_leaves.add(j) if (len(kmer_t) > maxsize): kmer_overlapping_sta = defaultdict(int) for j in lower_leaves: kmer_o = Lv[j.identifier] & kmer_t for k in kmer_o: kmer_overlapping_sta[k] += 1 temp = sorted(kmer_overlapping_sta.items(), key=lambda kv: (kv[1], kv[0])) kmer_t = set([]) for j in range(0, maxsize): kmer_t.add(temp[j][0]) nkmer = {} f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w") index = 0 for j in kmer_t: f.write("%d " % j) nkmer[j] = index index += 1 length[i] = len(kmer_t) kmerlist = kmerlist | kmer_t # save overlapping info for j in lower_leaves: temp = Lv[j.identifier] & kmer_t if (len(temp) > 0): ii = id_mapping[i.identifier] jj = id_mapping[j.identifier] overlapping[jj][ii] = set([]) for k in temp: overlapping[jj][ii].add(nkmer[k]) delete(Lv, spec, del_label) for i in overlapping: f = open(tree_dir + "/overlapping_info/" + str(i), "w") f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w") count = -1 for j in overlapping[i]: if (len(overlapping[i]) != 0): f.write("%d\n" % j) for k in overlapping[i][j]: f.write("%d " % k) f.write("\n") count += 2 f1.write("%d %d\n" % (j, count)) f.close() f1.close() # final saving f = open(tree_dir + "/reconstructed_nodes.txt", "w") for i in rebuilt_nodes: f.write("%d\n" % id_mapping[i.identifier]) f.close() f = open(tree_dir + "/node_length.txt", "w") for nn in all_identifier: i = id_mapping.inv[nn] f.write("%d\t%d\n" % (nn, length[tree[i]])) f.close() kmer_mapping = {} index = 0 f = open(tree_dir + "/kmer.fa", "w") for i in kmerlist: f.write(">1\n") f.write(kmer_index_dict.inv[i]) kmer_mapping[i] = index index += 1 f.write("\n") f.close() # change index files = os.listdir(tree_dir + "/kmers") for i in files: f = open(tree_dir + "/kmers/" + i, "r") lines = f.readlines() if (len(lines) == 0): continue d = lines[0].rstrip().split(" ") d = map(int, d) f = open(tree_dir + "/kmers/" + i, "w") for j in d: f.write("%d " % kmer_mapping[j]) f.close() end = time.time() print( '- The total running time of tree-based indexing struture building is ', str(end - start), ' s\n')
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, update=False) file_list = self._disk.get_file_list(file_id) if not file_list: return False for i in file_list: if i['type'] == 'file': file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], content_type=i['content_type'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url'] if 'download_url' in i else '') else: file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root'): file_id = self.get_path_fid(path, update=False) self.update_path_list(file_id) if not file_id: raise FileNotFoundError(path) self._tree.show(file_id) def get_path_list(self, path, update=True): file_id = self.get_path_fid(path, update=update) return self.get_fid_list(file_id, update=update) def get_fid_list(self, file_id, update=True): if not file_id: raise FileNotFoundError(Path) self.auto_update_path_list(update, file_id) if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', update=True): path = PurePosixPath(Path(path).as_posix()) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False path_list = list(filter(None, str(path).split('/'))) if path_list[0] == 'root': path_list = path_list[1:] for i in path_list: flag = False node_list = self._tree.children(file_id) if not node_list: self.auto_update_path_list(update, file_id) node_list = self._tree.children(file_id) for j in node_list: if i == j.tag: flag = True file_id = j.identifier break if not flag: return False if flag: return file_id return False def get_path_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, update=True, file_id=None): if not update and file_id: return self.update_path_list(file_id, depth=0) elif update and len(self._tree) == 1: return self.update_path_list()
class LuaDec: def __init__(self, fileName, format = "luadec"): self.format = format self.ptr = 0 self.pc = 0 self.tree = Tree() self.readFile(fileName) self.readHeader() self.readFunction() #self.tree.show() def readFile(self, fileName): f = open(fileName, "rb") self.fileBuf = f.read() f.close() def readUInt32(self): result = struct.unpack("<I", self.fileBuf[self.ptr:self.ptr + 4])[0] self.ptr += 4 return result def readUInt64(self): result = struct.unpack("<Q", self.fileBuf[self.ptr:self.ptr + 8])[0] self.ptr += 8 return result def formatValue(self, val): if type(val) == str: return "\"{}\"".format(val) elif type(val) == bool: if val: return "true" else: return "false" elif val is None: return "nil" elif type(val) == float and int(val) == val: return int(val) else: return val def processUpvalue(self, i, funcName): if i[0] == 1: if funcName == "root": return "G" return "UR{}".format(i[1]) elif i[0] == 0: pNode = self.tree.parent(funcName) result = self.processUpvalue(pNode.data['upvalues'][i[1]], pNode.identifier) if result[-1] != "G": return "U" + result else: return result else: raise Exception("Unexpected upvalue {}".format(i[0])) def readHeader(self): magic = self.fileBuf[:4] if magic != b"\x1bLua": raise Exception("Unknown magic: {0}".format(magic.hex())) version = self.fileBuf[4] if version != 82: raise Exception("This program support ONLY Lua 5.2") lua_tail = self.fileBuf[12:18] if lua_tail != b"\x19\x93\r\n\x1a\n": raise Exception("Unexcepted lua_tail value: {0}".format(lua_tail.hex())) self.ptr = 18 def readFunction(self, parent=None): #处理tree if parent: funcName = "function" funcSuffix = [] #强烈谴责py不支持do...while #别问我这堆东西怎么工作的,it just works!! pNode = self.tree.get_node(parent).identifier funcSuffix.append("_{0}".format(len(self.tree.children(pNode)))) while self.tree.parent(pNode): pNode = self.tree.parent(pNode).identifier funcSuffix.append("_{0}".format(len(self.tree.children(pNode)) - 1)) funcSuffix.reverse() for i in funcSuffix: funcName += i else: funcName = "root" #self.tree.show() #ProtoHeader protoheader = struct.unpack("<IIccc", self.fileBuf[self.ptr:self.ptr + 11]) self.ptr += 11 lineDefined = protoheader[0] lastLineDefined = protoheader[1] numParams = ord(protoheader[2]) is_vararg = ord(protoheader[3]) maxStackSize = ord(protoheader[4]) #Code sizeCode = self.readUInt32() instructions = [] #print("Code total size: {0}".format(sizeCode)) for i in range(sizeCode): ins = self.readUInt32() instructions.append(ins) #self.processInstruction(ins) #print("Instruction: {0}".format(hex(ins))) #Constants sizeConstants = self.readUInt32() constants = [] #print("Constants total size: {0}".format(sizeConstants)) for i in range(sizeConstants): const_type = self.fileBuf[self.ptr] self.ptr += 1 if const_type == const.LUA_DATATYPE['LUA_TNIL']: const_val = None const_type = "nil" elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']: #lua的number=double(8 bytes) const_val = struct.unpack("<d", self.fileBuf[self.ptr:self.ptr + 8])[0] self.ptr += 8 const_type = "number" elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']: const_val = bool(self.fileBuf[self.ptr]) self.ptr += 1 const_type = "bool" elif const_type == const.LUA_DATATYPE['LUA_TSTRING']: str_len = self.readUInt32() buf = self.fileBuf[self.ptr:self.ptr + str_len - 1] try: const_val = str(buf, encoding="utf8") except UnicodeDecodeError: const_val = "" for i in buf: const_val += "\\{}".format(i) self.ptr += str_len const_type = "string" if self.fileBuf[self.ptr - 1] != 0: raise Exception("Bad string") else: raise Exception("Undefined constant type {0}.".format(hex(const_type))) constants.append([const_val, const_type]) #print("Constant: {0}".format(const_val)) #Skip Protos ptrBackupStart = self.ptr #备份protos的位置,先处理后面的upvalue等东西 sizeProtos = self.readUInt32() for i in range(sizeProtos): self.skipFunction() #Upvalue sizeUpvalue = self.readUInt32() upvalues = [] #print("Upvalue total size: {0}".format(sizeUpvalue)) for i in range(sizeUpvalue): instack = self.fileBuf[self.ptr] idx = self.fileBuf[self.ptr + 1] self.ptr += 2 upvalues.append([instack, idx]) #print("Upvalue: {0} {1}".format(instack, idx)) #srcName sizeSrcName = self.readUInt32() #print("srcName size: {0}".format(sizeSrcName)) if sizeSrcName > 0: srcName = str(self.fileBuf[self.ptr:self.ptr + sizeSrcName], encoding="utf8") self.ptr += sizeSrcName #print("srcName: " + srcName) #Lines sizeLines = self.readUInt32() self.ptr += sizeLines #LocVars sizeLocVars = self.readUInt32() #for i in sizeLocVars: # varname_size = #TODO: sizeLocVars不为0的情况(未strip) #UpvalNames sizeUpvalNames = self.readUInt32() #将内容写入tree data = { "instructions": instructions, "constants": constants, "upvalues": upvalues, } self.tree.create_node(funcName, funcName, parent=parent, data=data) if self.format == "luaasm": print("\n.fn(R{}{})".format(numParams, ", __va_args__" if is_vararg else "")) print("; {:<20s}{}".format("Function", funcName)) print("; {:<20s}{}".format("Defined from line", lineDefined)) print("; {:<20s}{}".format("Defined to line", lastLineDefined)) print("; {:<20s}{}".format("#Upvalues", sizeUpvalue)) print("; {:<20s}{}".format("#Parameters", numParams)) print("; {:<20s}{}".format("Is_vararg", is_vararg)) if self.format == "luaasm": print("; {:<20s}{}".format("Max Stack Size", maxStackSize)) else: print("; {:<20s}{}\n".format("Max Stack Size", maxStackSize)) #生成一个Upvalue和Constant的拼接表 fmtVals = {} count = 0 for i in data['constants']: fmtVals["K{}".format(count)] = self.formatValue(i[0]) count += 1 count = 0 for i in data['upvalues']: fmtVals["U{}".format(count)] = self.processUpvalue(i, funcName) count += 1 if self.format == "luadec": #处理单个指令 self.pc = 0 self.currFunc = funcName self.fmtVals = fmtVals for i in data['instructions']: self.processInstruction(i) self.pc += 1 if self.format == "luadec": print("\n") if self.format == "luaasm": print("\n.instruction") #处理单个指令 self.pc = 0 self.currFunc = funcName self.fmtVals = fmtVals for i in data['instructions']: self.processInstruction(i) self.pc += 1 if self.format == "luaasm": print("\n.const") else: print("\n; Constants") count = 0 for i in data['constants']: print("K{:<5s} = {}".format(str(count), self.formatValue(i[0]))) count += 1 if self.format == "luaasm": print("\n.upvalue") else: print("\n; Upvalues") count = 0 for i in data['upvalues']: if self.format == "luaasm": print("U{:<5s} = L{} R{}".format(str(count), i[0], i[1])) else: print("{:>5s}\t{}\t{}".format(str(count), i[0], i[1])) count += 1 #Proto ptrBackupEnd = self.ptr self.ptr = ptrBackupStart sizeProtos = self.readUInt32() #print("Protos total size: {0}".format(sizeProtos)) for i in range(sizeProtos): self.readFunction(parent=funcName) self.ptr = ptrBackupEnd if self.format == "luaasm": print(".endfn\n") #跳过函数,用于需要获取后面的指针位置的情况 def skipFunction(self): #print("Start skipping Proto, current ptr at {0}".format(hex(self.ptr))) #ProtoHeader self.ptr += 11 #Code sizeCode = self.readUInt32() for i in range(sizeCode): self.ptr += 4 #Constants sizeConstants = self.readUInt32() for i in range(sizeConstants): const_type = self.fileBuf[self.ptr] self.ptr += 1 if const_type == const.LUA_DATATYPE['LUA_TNIL']: pass elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']: self.ptr += 8 elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']: self.ptr += 1 elif const_type == const.LUA_DATATYPE['LUA_TSTRING']: str_len = self.readUInt32() self.ptr += str_len else: raise Exception("Undefined constant type {0}.".format(hex(const_type))) #Protos sizeProtos = self.readUInt32() for i in range(sizeProtos): self.skipFunction() #Upvalue sizeUpvalue = self.readUInt32() for i in range(sizeUpvalue): self.ptr += 2 #srcName sizeSrcName = self.readUInt32() if sizeSrcName > 0: self.ptr += sizeSrcName #Lines sizeLines = self.readUInt32() self.ptr += sizeLines #LocVars sizeLocVars = self.readUInt32() #for i in sizeLocVars: # varname_size = #TODO: sizeLocVars不为0的情况(未strip) #UpvalNames sizeUpvalNames = self.readUInt32() #print("End skipping Proto. Current ptr at {0}".format(hex(self.ptr))) def getExtraArg(self): next_ins = self.tree.get_node(self.currFunc).data['instructions'][self.pc + 1] opCode = next_ins % (1 << 6) if const.opCode[opCode] == "OP_EXTRAARG": Ax = (next_ins >> 6) return True, Ax else: return False, "ERROR: C == 0 but no OP_EXTRAARG followed." def processInstruction(self, ins): opCode = ins % (1 << 6) opMode = const.opMode[opCode] A = 0 B = 0 C = 0 if opMode[4] == "iABC": A = (ins >> 6 ) % (1 << 8) B = (ins >> 23)#% (1 << 9) C = (ins >> 14) % (1 << 9) elif opMode[4] == "iABx": A = (ins >> 6 ) % (1 << 8) B = (ins >> 14)#% (1 << 18) elif opMode[4] == "iAsBx": A = (ins >> 6 ) % (1 << 8) B = (ins >> 14) - (1 << 17) + 1 elif opMode[4] == "iAx": A = (ins >> 6 )#% (1 << 26) else: raise Exception("Unknown opMode {0}".format(opMode[4])) #format A if opMode[1] == 1: parsedA = "R{0}".format(A) elif opMode[1] == 0: if const.opCode[opCode] == "OP_SETTABUP": parsedA = "U{0}".format(A) elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE"]: parsedA = A else: parsedA = "R{0}".format(A) else: raise Exception("Unknown A Mode {0}".format(opMode[1])) #format B if opMode[2] == 1: if const.opCode[opCode].find("UP") >= 0: parsedB = "U{0}".format(B) else: parsedB = "{0}".format(B) elif opMode[2] == 0: parsedB = "" elif opMode[2] == 2 or opMode[2] == 3: if opMode[4] == "iAsBx": #B为sBx的时候,只有可能是立即数而不是寄存器 parsedB = "{0}".format(B) elif const.opCode[opCode] == "OP_LOADK": #LOADK一定是读Kx而不是Rx parsedB = "K{0}".format(B) elif B < 0x100: parsedB = "R{0}".format(B) else: parsedB = "K{0}".format(B - 0x100) B -= 0x100 else: raise Exception("Unknown B Mode {0}".format(opMode[2])) #format C if opMode[3] == 1: if const.opCode[opCode].find("UP") >= 0: parsedC = "U{0}".format(C) else: parsedC = "{0}".format(C) elif opMode[3] == 0: parsedC = "" elif opMode[3] == 2 or opMode[3] == 3: if C < 0x100: parsedC = "R{0}".format(C) else: parsedC = "K{0}".format(C - 0x100) C -= 0x100 else: raise Exception("Unknown C Mode {0}".format(opMode[3])) # parse comment #先用模板拼接 if len(parsedB) > 0 and (parsedB[0] == 'K' or parsedB[0] == 'U'): parsedB_ = "{{{}}}".format(parsedB) else: parsedB_ = parsedB if len(parsedC) > 0 and (parsedC[0] == 'K' or parsedC[0] == 'U'): parsedC_ = "{{{}}}".format(parsedC) else: parsedC_ = parsedC comment = const.pseudoCode[opCode].format(A=A,B=B,C=C,PB=parsedB_,PC=parsedC_) #预处理 #if BForceK: # comment = comment.replace("R{}".format(B), "K{}".format(B)) #if const.opCode[opCode] == "OP_SETTABLE" and CForceK: # comment = comment.replace("R{}".format(C), "{{K{}}}".format(C)) #再处理Upvalue和Constants comment = comment.format(**self.fmtVals) #对部分需要处理的命令进行处理 if const.opCode[opCode] == "OP_LOADBOOL": #把0/1转换成false/true comment = comment[:-1] if B: comment += "true" else: comment += "false" #处理跳转 if C: comment += "; goto {0}".format(self.pc + 2) elif const.opCode[opCode] == "OP_LOADNIL": comment = "" for i in range(B + 1): comment += "R{0}, ".format(A + i) comment = comment[:-2] comment += " := nil" elif const.opCode[opCode] == "OP_SELF": comment = "R{}".format(A+1) + comment[2:] elif const.opCode[opCode] == "OP_JMP": comment += " (goto {0})".format(self.pc + 1 + B) elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE", "OP_TEST", "OP_TESTSET"]: if A: if const.opCode[opCode] == "OP_EQ": comment = comment.replace("==", "~=") elif const.opCode[opCode] == "OP_LT": comment = comment.replace("<", ">=") elif const.opCode[opCode] == "OP_LE": comment = comment.replace("<=", ">") comment += " goto {0} else goto {1}".format(self.pc + 2, self.pc + 1) if C == 0: comment = comment.replace("not ", "") elif const.opCode[opCode] == "OP_CALL": comment = "" for i in range(C - 1): comment += "R{}, ".format(A + i) if C > 1: comment = comment[:-2] + " := R{}(".format(A) elif C == 1: comment += " := R{}(".format(A) else: comment = "R{} to top := R{}(".format(A, A) for i in range(B - 1): comment += "R{}, ".format(A + i + 1) if B > 1: comment = comment[:-2] + ")" elif B == 1: comment += ")" else: comment += "R{} to top)".format(C) elif const.opCode[opCode] == "OP_TAILCALL": comment = "R{} to top := R{}(".format(A, A) for i in range(B - 1): comment += "R{}, ".format(A + i + 1) if B > 1: comment = comment[:-2] + ")" else: comment = comment + ")" elif const.opCode[opCode] == "OP_RETURN": for i in range(B - 1): comment += "R{}, ".format(A + i) if B > 1: comment = comment[:-2] elif B == 0: comment += "R{} to top".format(A) elif const.opCode[opCode] == "OP_FORLOOP": comment = comment.replace("RD", "R{}".format(A + 1)) comment = comment.replace("RE", "R{}".format(A + 2)) comment = comment.replace("RF", "R{}".format(A + 3)) comment += "goto {} end".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_FORPREP": comment = comment.replace("RD", "R{}".format(A + 2)) comment += "(goto {})".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_TFORCALL": comment = comment.replace("RD", "R{}".format(A + 1)) comment = comment.replace("RE", "R{}".format(A + 2)) comment = comment.replace("RF", "R{}".format(A + 3)) comment = comment.replace("RG", "R{}".format(A + 4)) elif const.opCode[opCode] == "OP_TFORLOOP": comment = comment.replace("RD", "R{}".format(A + 1)) comment += " (goto {}))".format(self.pc + B + 1) elif const.opCode[opCode] == "OP_CLOSURE": if self.currFunc == "root": comment += "function_{})".format(B) else: comment += self.currFunc + "_{})".format(B) elif const.opCode[opCode] == "OP_SETLIST": real_c = C err = False if C == 0: success, result = self.getExtraArg() if success: real_c = result else: comment += result err = True if not err: LFIELDS_PER_FLUSH = 50 start_index = (real_c - 1) * LFIELDS_PER_FLUSH if B == 0: comment += "R{}[{}] to R{}[top] := R{} to top".format(A, start_index, A, A + 1) elif B == 1: comment += "R{}[{}] := R{}".format(A, start_index, A + 1) else: comment += "R{}[{}] to R{}[{}] := R{} to R{}".format(A, start_index, A, start_index + B - 1, A + 1, A + B) if C == 0: comment += "; CONTAINS EXTRAARG" elif const.opCode[opCode] == "OP_LOADKX": success, result = self.getExtraArg() if success: Ax = result comment += "R{} := {{K{}}}".format(A, Ax).format(**self.fmtVals) else: comment += result seq = [] for i in [parsedA, parsedB, parsedC]: if i != "": seq.append(str(i)) regsFmt = " ".join(seq) if self.format == "luaasm": print("{:<10s}{:<13s} ; {:>5s} {}".format(const.opCode[opCode][3:], regsFmt, "[{}]".format(str(self.pc)), comment)) else: print("{:>5s} [-]: {:<10s}{:<13s}; {}".format(str(self.pc), const.opCode[opCode][3:], regsFmt, comment))
kraken_data['name'] = line[5] kraken_data['depth'] = calculate_depth(kraken_data['name']) kraken_data['name'] = kraken_data['name'].lstrip(' ') if kraken_data['name'] == "unclassified": unclassified = Node(tag = kraken_data['name'], identifier = kraken_data['ncbi_taxonomy_id'], data = kraken_data) elif kraken_data['name'] == "root": add_node(tree, None, kraken_data) previous = tree.get_node(kraken_data['ncbi_taxonomy_id']) elif kraken_data['depth'] > tree.depth(previous): add_node(tree, previous, kraken_data) previous = tree.get_node(kraken_data['ncbi_taxonomy_id']) elif kraken_data['depth'] == tree.depth(previous): add_node(tree, tree.parent(previous.identifier), kraken_data) previous = tree.get_node(kraken_data['ncbi_taxonomy_id']) elif kraken_data['depth'] < tree.depth(previous): previous_search = previous while(tree.depth(previous_search) > kraken_data['depth']): previous_search = tree.parent(previous_search.identifier) add_node(tree, tree.parent(previous_search.identifier), kraken_data) previous = tree.get_node(kraken_data['ncbi_taxonomy_id']) tree_dict = tree.to_dict(with_data=True) def transform(tree_dict): for key in tree_dict.keys(): for data_key in tree_dict[key]['data'].keys(): tree_dict[data_key] = tree_dict[key]['data'][data_key] tree_dict[key].pop('data', None)
tree = Tree() tree.create_node("Harry", "h") # korzen tree.create_node("Jane", "j", parent="h") tree.create_node("Bill", "b", parent="h") tree.create_node("Diane", "d", parent="j") tree.create_node("Mary", "m", parent="d") tree.create_node("Harry", "h2", parent="j") tree.show() x = tree.get_node("m") print(x.tag) print() print(x.identifier) print() y = tree.parent("m") print(y.tag) print() print(y.identifier) print() z = tree.get_node("h2") print(z.tag) print() print(z.is_root()) print() print(z.is_leaf()) print() print(tree.paths_to_leaves()) def duplicate_node_path_check(tree, node):
class IOTeqDBBuilder(): def __init__(self, configFile): with open(configFile, 'r') as f: jsonOutput = json.load(f) self.databaseTags = jsonOutput["database"]["tags"] self.IOTEQ_TAG_BYTE_SIZE = 44 self.tree = Tree() self.constPtrChar = [] self.constPtrTree = [] self.tagList = [] self.dataPtr = [] self.persistentPtr = [] self.currentTagAddress = 0 def totalNumberOfTags(self): return len(self.tagList) def addTag(self, tag): self.tagList.append(tag) def addNameToCharPtr(self, name): charList = list(name) for char in charList: self.constPtrChar.append(ord(char)) self.constPtrChar.append(0) # escape char for string def setRoot(self, rootName): rootTag = IOTeqTag(rootName, 0x00, len(rootName)) self.tree.create_node(rootName, rootName, None, rootTag) self.tagList.append(rootTag) self.addNameToCharPtr(rootName) def addValueToDataPtr(self, tag): datatype = tag["datatype"] if (datatype == "Number"): value = tag["value"] if ("numtype" in tag["config"]): if (tag["config"]["numtype"] == "float"): ba = bytearray(struct.pack("<f", value)) else: ba = bytearray(struct.pack("<L", value)) for b in ba: self.dataPtr.append(hex(b)) elif (datatype == "Text"): value = tag["value"] for char in value: self.dataPtr.append(hex(ord(char))) # for i in range(len(self.dataPtr), 40): self.dataPtr.append(hex(0)) def addValueToPersistentPtr(self, tag): datatype = tag["datatype"] if (datatype == "Number"): value = tag["value"] if ("numtype" in tag["config"]): if (tag["config"]["numtype"] == "float"): ba = bytearray(struct.pack("<f", value)) else: ba = bytearray(struct.pack("<L", value)) for b in ba: self.persistentPtr.append(hex(b)) elif (datatype == "Text"): value = tag["value"] for char in value: self.persistentPtr.append(hex(ord(char))) # for i in range(len(self.dataPtr), 40): self.persistentPtr.append(hex(0)) def createTree(self, tags, parent=None): for tag in tags: for i in range(tags[tag]["arraydim"]): # Naming function for tags with dimensions larger than 1 if (tags[tag]["arraydim"] > 1): tagName = tag + "[" + str(i) + "]" else: tagName = tag # Add tag name in hex format to a list charIndex = len(self.constPtrChar) self.addNameToCharPtr(tagName) # Create tag an add to tagList newTag = IOTeqTag(tagName, charIndex, len(tagName) + 1) # plus 1 for \0 # Set the tags valueSize based on datatype # Numbers are 4 bytes (float) and Text are 40 characters long total if (tags[tag]["datatype"] != "Folder"): if (tags[tag]["datatype"] == "Number"): newTag.valueSize = 4 elif (tags[tag]["datatype"] == "Text"): newTag.valueSize = len(tags[tag]["value"]) if ("persistent" in tags[tag]["config"]): if (tags[tag]["config"]["persistent"] == True): newTag.persistentValuePtr = len(self.persistentPtr) self.addValueToPersistentPtr(tags[tag]) newTag.isPersistent = 1 # Adding default value to dataPtr list newTag.valuePtr = len(self.dataPtr) self.addValueToDataPtr(tags[tag]) # Add num type to tag object if ("numtype" in tags[tag]["config"]): if (tags[tag]["config"]["numtype"] == "float"): newTag.numType = "float" else: newTag.numType = "integer" # Set default value of tag newTag.value = tags[tag]['value'] # Adding tag to tree and tag list self.tree.create_node(tagName, tagName, parent, newTag) self.tagList.append(newTag) # Recursion for tags that have children if (tags[tag]["datatype"] == "Folder"): self.createTree(tags[tag]["children"], tagName) def setTagAddresses(self): for level in range(self.tree.depth() + 1): for node in dict( filter(lambda elem: self.tree.level(elem[0]) == level, self.tree.nodes.items())): tag = self.tree.get_node(node).data tag.address = self.currentTagAddress self.currentTagAddress += self.IOTEQ_TAG_BYTE_SIZE def setTagParentChildrenPtrs(self): for node in self.tree.nodes: # tagIndex = None for tag in self.tagList: if (tag.tagName == node): tagIndex = self.tagList.index(tag) # Get IOTeq Tag treeNode = self.tree.get_node(node) self.tagList[tagIndex] = treeNode.data # If node has a parent, i.e. not root if (treeNode.is_root() != True): # Get IOTeq Parent Tag parentTag = self.tree.parent(node).data # Set Parent Ptr of current tag self.tagList[tagIndex].parentPtr = parentTag.address if (parentTag.tagName != "tags"): self.tagList[tagIndex].parentTag = parentTag # If node has children if (self.tree.get_node(node).is_leaf() != True): childrenNodes = self.tree.children(node) childrenNodes.sort(key=lambda x: x.data.address) self.tagList[tagIndex].childPtr = childrenNodes[ 0].data.address self.tagList[tagIndex].numOfChildren = len( childrenNodes) if (tag.tagName != "tags"): for i in range(0, len(childrenNodes)): tagIndex = self.tagList.index( childrenNodes[i].data) if (i == 0): self.tagList[ tagIndex].nextSibling = childrenNodes[ i + 1].data.address elif (i == len(childrenNodes) - 1): self.tagList[ tagIndex].prevSibling = childrenNodes[ i - 1].data.address else: self.tagList[ tagIndex].nextSibling = childrenNodes[ i + 1].data.address self.tagList[ tagIndex].prevSibling = childrenNodes[ i - 1].data.address def createConstPtrTree(self): sortedTags = sorted(self.tagList, key=lambda x: x.address, reverse=False) for tag in sortedTags: self.constPtrTree.extend(tag.getStruct()) def build(self): self.setRoot("tags") self.createTree(ioteqDBBuilder.databaseTags, "tags") self.setTagAddresses() self.setTagParentChildrenPtrs() self.createConstPtrTree()
class TreeT(object): def __init__(self, max_id=0): self.tree = Tree() def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None): # starts by ['(', 'pos'] pos_tag = line[1] if parent_id is None: pos_id = 0 else: pos_id = max_id max_id += 1 self.tree.create_node(pos_tag, pos_id, parent_id, TreeData()) parent_id = pos_id total_offset = 2 if line[2] != '(': # sub-tree is leaf # line[0:3] = ['(', 'pos', 'word', ')'] word_tag = line[2] self.tree.create_node(word_tag, leaf_id, parent_id, TreeData()) return 4, max_id, leaf_id + 1 line = line[2:] while line[0] != ')': offset, max_id, leaf_id = self.from_ptb_to_tree( line, max_id, leaf_id, parent_id) total_offset += offset line = line[offset:] return total_offset + 1, max_id, leaf_id def add_height(self, tree_dep): for n in self.tree.all_nodes(): n.data.leaves = [] for leaf in self.tree.leaves(): lid = leaf.identifier hid = tree_dep[lid] if hid == self.tree.root: self.tree[lid].data.height = self.tree.depth(self.tree[lid]) for cid in [ p for p in self.tree.paths_to_leaves() if lid in p ][0]: self.tree[cid].data.leaves += [lid] else: height = -1 cid = lid cond = True while cond: self.tree[cid].data.leaves += [lid] height += 1 cid = self.tree.parent(cid).identifier cid_leaves = [l.identifier for l in self.tree.leaves(cid)] cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid] cond = set(cid_l_dep).issubset(set(cid_leaves)) self.tree[lid].data.height = height x_nodes = [ n.identifier for n in self.tree.all_nodes() if n.data.leaves == [] ] for x_node in x_nodes[::-1]: min_id = min(self.tree.children(x_node), key=lambda c: c.data.height) _lid = min_id.data.leaves[0] self.tree[_lid].data.height += 1 self.tree[x_node].data.leaves += [_lid] return True def _from_tree_to_ptb(self, nid): nid = self.tree.subtree(nid).root if self.tree[nid].is_leaf(): return ' (' + self.tree[nid].tag + ' ' + self.tree[ nid].data.word + ')' res = ' (' + self.tree[nid].tag for c_nid in sorted(self.tree.children(nid), key=lambda x: x.identifier): res += self._from_tree_to_ptb(c_nid.identifier) return res + ')' def from_tree_to_ptb(self): return self._from_tree_to_ptb(self.tree.root) def from_tag_to_tree(self, tag, word, pos_id=0): parent_id = None for tag_nodes in tag: if tag_nodes[0] in [CL, CR]: c_side = tag_nodes[0] _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else [''] else: c_side = '' _tag_nodes = tag_nodes self.tree.create_node(_tag_nodes[0], pos_id, parent=parent_id, data=TreeData(comb_side=c_side)) parent_id = pos_id pos_id += 1 for tag_node in _tag_nodes[1:]: self.tree.create_node(tag_node[1:], pos_id, parent=parent_id, data=TreeData(miss_side=tag_node[0])) pos_id += 1 for l in self.tree.leaves(): if l.data.miss_side == '': l.data.word = word break return pos_id @memoize def is_combine_to(self, side): return self.tree[self.tree.root].data.comb_side == side @memoize def is_combine_right(self): return self.is_combine_to(CR) @memoize def is_combine_left(self): return self.is_combine_to(CL) @memoize def is_complete_tree(self): return all([n.data.miss_side == '' for n in self.tree.all_nodes()]) @memoize def get_missing_leaves_to(self, miss_val, side): return [ l.identifier for l in self.tree.leaves(self.tree.root) if l.data.miss_side == side and l.tag == miss_val ] @memoize def get_missing_leaves_left(self, miss_val): return self.get_missing_leaves_to(miss_val, L) @memoize def get_missing_leaves_right(self, miss_val): return self.get_missing_leaves_to(miss_val, R) @memoize def root_tag(self): return self.tree[self.tree.root].tag @memoize def is_no_missing_leaves(self): return all( [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)]) @memoize def combine_tree(self, _tree, comb_leaf): self.tree.paste(comb_leaf, _tree.tree) self.tree.link_past_node(comb_leaf) return self def tree_to_path(self, nid, path): # Stop condition if self.tree[nid].is_leaf(): path[nid] = [] return nid, self.tree[nid].data.height # Recursion flag = CR for child in self.tree.children(nid): cid = child.identifier leaf_id, height = self.tree_to_path(cid, path) if (height == 0): # Reached end of path can add flag path[leaf_id].insert(0, flag) # path[leaf_id].append(flag) if height > 0: path[leaf_id].insert(0, nid) # only single child will have height>0 # and its value will be the one that is returned # to the parent ret_leaf_id, ret_height = leaf_id, height - 1 # once we reached a height>0, it means that # this path includes the parent, and thus flag # direction should flip flag = CL return ret_leaf_id, ret_height def path_to_tags(self, path): tags = [] for p in path: _res = [] _p = copy.copy(p) if _p[0] in [CL, CR]: _res.append(_p[0]) _p = _p[1:] while _p[:-1]: el_p = _p.pop(0) _res.append(self.tree[el_p].tag) for c in self.tree.children(el_p): if c.identifier != _p[0]: _res.append(R + c.tag if c.identifier > _p[0] else L + c.tag) _res.append(self.tree[_p[0]].tag) tags.append(_res) return tags def path_to_words(self, path): return [self.tree[k].tag for k in path] def from_tree_to_tag(self): path = {} self.tree_to_path(self.tree.root, path) return { 'tags': self.path_to_tags(path.values()), 'words': self.path_to_words(path.keys()) } def from_ptb_to_tag(self, line, max_id, depend): self.from_ptb_to_tree(line, max_id) self.add_height(depend) path = {} self.tree_to_path(self.tree.root, path) return self.path_to_tags(path.values())
for z in d: path = walkTree(tree, z, path + z) return path input = list(map(lambda x: x.strip(), open("test_input.txt").readlines())) tree = Tree() tree.create_node("root", "root") # first figure out how many steps there are and then sort them # by their name for lines in input: (l1, l2) = (lines[5], lines[36]) print(lines) if tree.contains(l1) and tree.contains(l2): tree.move_node(l2, l1) elif tree.contains(l1) and not tree.contains(l2): tree.create_node(l2, l2, parent=l1) elif not tree.contains(l1) and tree.contains(l2): # get the root for l2 and make that the root for l1 # then move l2 under l1 tree.create_node(l1, l1, parent=tree.parent(l2)) tree.move_node(l2, l1) else: tree.create_node(l1, l1, parent="root") tree.create_node(l2, l2, parent=l1) tree.show() print(walkTree(tree, 'root', ''))