def et_define_branch(self, xml_node, tree, tree_parent, a_dict): """ Parses a branching data structure by calling ``parse_et_fork``. """ subtree = Tree() parent = subtree.root for e in xml_node: self.parse_et_fork(e, subtree, parent, a_dict) if subtree.size() == 0: raise RuntimeError('Event tree branch contains no data') a_dict[xml_node.get('name')] = subtree
class RIAC(AbstractTeacher): def __init__(self, mins, maxs, seed, env_reward_lb, env_reward_ub, max_region_size=200, alp_window_size=None, nb_split_attempts=50, sampling_in_leaves_only=False, min_region_size=None, min_dims_range_ratio=1 / 6, discard_ratio=1 / 4): AbstractTeacher.__init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed) # Maximal number of (task, reward) pairs a region can hold before splitting self.maxlen = max_region_size self.alp_window = self.maxlen if alp_window_size is None else alp_window_size # Initialize Regions' tree self.tree = Tree() self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)] self.regions_alp = [0.] self.tree.create_node('root', 'root', data=Region(maxlen=self.maxlen, r_t_pairs=[ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ], bounds=self.regions_bounds[-1], alp=self.regions_alp[-1])) self.nb_dims = len(mins) self.nb_split_attempts = nb_split_attempts # Whether task sampling uses parent and child regions (False) or only child regions (True) self.sampling_in_leaves_only = sampling_in_leaves_only # Additional tricks to original RIAC, enforcing splitting rules # 1 - Minimum population required for both children when splitting --> set to 1 to cancel self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size # 2 - minimum children region size (compared to initial range of each dimension) # Set min_dims_range_ratio to 1/np.inf to cancel self.dims_ranges = self.maxs - self.mins self.min_dims_range_ratio = min_dims_range_ratio # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region # If 1- and 2- are canceled, this will be canceled since any split will be valid self.discard_ratio = discard_ratio # book-keeping self.sampled_tasks = [] self.all_boxes = [] self.all_alps = [] self.update_nb = -1 self.split_iterations = [] self.hyperparams = locals() def compute_alp(self, sub_region): if len(sub_region[0]) > 2: cp_window = min(len(sub_region[0]), self.alp_window) # not completely window half = int(cp_window / 2) # print(str(cp_window) + 'and' + str(half)) first_half = np.array(sub_region[0])[-cp_window:-half] snd_half = np.array(sub_region[0])[-half:] diff = first_half.mean() - snd_half.mean() cp = np.abs(diff) else: cp = 0 alp = np.abs(cp) return alp def split(self, nid): # Try nb_split_attempts splits on region corresponding to node <nid> reg = self.tree.get_node(nid).data best_split_score = 0 best_bounds = None best_sub_regions = None is_split = False for i in range(self.nb_split_attempts): sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] # repeat until the two sub regions contain at least minlen of the mother region while len(sub_reg1[0]) < self.minlen or len( sub_reg2[0]) < self.minlen: # decide on dimension dim = self.random_state.choice(range(self.nb_dims)) threshold = reg.bounds.sample()[dim] bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds1.high[dim] = threshold bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32) bounds2.low[dim] = threshold bounds = [bounds1, bounds2] valid_bounds = True if np.any(bounds1.high - bounds1.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = False if np.any(bounds2.high - bounds2.low < self.dims_ranges * self.min_dims_range_ratio): valid_bounds = valid_bounds and False # perform split in sub regions sub_reg1 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] sub_reg2 = [ deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1) ] for i, task in enumerate(reg.r_t_pairs[1]): if bounds1.contains(task): sub_reg1[1].append(task) sub_reg1[0].append(reg.r_t_pairs[0][i]) else: sub_reg2[1].append(task) sub_reg2[0].append(reg.r_t_pairs[0][i]) sub_regions = [sub_reg1, sub_reg2] # compute alp alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)] # compute score split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] - alp[1]) if split_score >= best_split_score and valid_bounds: is_split = True best_split_score = split_score best_sub_regions = sub_regions best_bounds = bounds if is_split: # add new nodes to tree for i, (r_t_pairs, bounds) in enumerate(zip(best_sub_regions, best_bounds)): self.tree.create_node(identifier=self.tree.size(), parent=nid, data=Region(self.maxlen, r_t_pairs=r_t_pairs, bounds=bounds, alp=alp[i])) else: assert len(reg.r_t_pairs[0]) == (self.maxlen + 1) reg.r_t_pairs[0] = deque( islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) reg.r_t_pairs[1] = deque( islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio), self.maxlen + 1)) return is_split def add_task_reward(self, node, task, reward): reg = node.data nid = node.identifier if reg.bounds.contains(task): # task falls within region self.nodes_to_recompute.append(nid) children = self.tree.children(nid) for n in children: # if task in region, task is in one sub-region self.add_task_reward(n, task, reward) need_split = reg.add(task, reward, children == []) # COPY ALL MODE if need_split: self.nodes_to_split.append(nid) def episodic_update(self, task, reward, is_success): self.update_nb += 1 # Add new (task, reward) to regions nodes self.nodes_to_split = [] self.nodes_to_recompute = [] new_split = False root = self.tree.get_node('root') self.add_task_reward( root, task, reward) # Will update self.nodes_to_split if needed assert len(self.nodes_to_split) <= 1 # Split a node if needed need_split = len(self.nodes_to_split) == 1 if need_split: new_split = self.split(self.nodes_to_split[0]) # Execute the split if new_split: # Update list of regions_bounds if self.sampling_in_leaves_only: self.regions_bounds = [ n.data.bounds for n in self.tree.leaves() ] else: self.regions_bounds = [ n.data.bounds for n in self.tree.all_nodes() ] # Recompute ALPs of modified nodes for nid in self.nodes_to_recompute: node = self.tree.get_node(nid) reg = node.data reg.alp = self.compute_alp(reg.r_t_pairs) # Collect regions data (regions' ALP and regions' (task, reward) pairs) all_nodes = self.tree.all_nodes( ) if not self.sampling_in_leaves_only else self.tree.leaves() self.regions_alp = [] self.r_t_pairs = [] for n in all_nodes: self.regions_alp.append(n.data.alp) self.r_t_pairs.append(n.data.r_t_pairs) # Book-keeping if new_split: self.all_boxes.append(copy.copy(self.regions_bounds)) self.all_alps.append(copy.copy(self.regions_alp)) self.split_iterations.append(self.update_nb) assert len(self.regions_alp) == len(self.regions_bounds) return new_split, None def sample_random_task(self): return self.regions_bounds[0].sample() # First region is root region def sample_task(self): mode = self.random_state.rand() if mode < 0.1: # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region if len(self.sampled_tasks) == 0: self.sampled_tasks.append(self.sample_random_task()) else: self.sampled_tasks.append( self.non_exploratory_task_sampling()["task"]) elif mode < 0.3: # "mode 2" (20%) -> random task self.sampled_tasks.append(self.sample_random_task()) else: # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) self.sampled_tasks.append(self.regions_bounds[region_id].sample()) return self.sampled_tasks[-1].astype(np.float32) def non_exploratory_task_sampling(self): # 1 - Sample region proportionally to its ALP region_id = proportional_choice(self.regions_alp, self.random_state, eps=0.0) # 2 - Retrieve (task, reward) pair with lowest reward worst_task_idx = np.argmin(self.r_t_pairs[region_id][0]) # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std) task = self.random_state.normal( self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1) # clip to stay within region (add small epsilon to avoid falling in multiple regions) task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5) return { "task": task, "infos": { "bk_index": len(self.all_boxes) - 1, "task_infos": region_id } } def dump(self, dump_dict): dump_dict['all_boxes'] = self.all_boxes dump_dict['split_iterations'] = self.split_iterations dump_dict['all_alps'] = self.all_alps # dump_dict['riac_params'] = self.hyperparams return dump_dict @property def nb_regions(self): return len(self.regions_bounds) @property def get_regions(self): return self.regions_bounds
class Blockchain(object): def __init__(self, genesis): # TODO: figure out if genesis should be passed in or created here # self.tinput = tinput self.blockCount = 0 self.blockchain = Tree() self.genesis = genesis self.addGenesisBlock(genesis) #Add the genesis block to chain def addGenesisBlock(self, genesis): self.blockchain.create_node("Genesis Block" + " ID: " + genesis.proofOfWork[:12], genesis.proofOfWork, data=genesis) def printBlockchain(self): self.blockchain.show() def addBlock(self, block): # TODO: run proof of work verification before adding block # Add block to chain & return true if POW valid # Else return false self.blockCount += 1 self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " + block.proofOfWork[:12], block.proofOfWork, parent=block.prevBlockHash, data=block) def getGenesisID(self): return self.blockchain.root def getLongestChainBlocks(self): allNodes = self.blockchain.all_nodes() forkNum = 0 #number of leaves at longest branch treeDepth = self.blockchain.depth() longestPathLeaves = [ ] #WIll hold leaves with treeDepth depth ie longest branch(es) for node in allNodes: currentDepth = self.blockchain.depth(node) if (currentDepth == treeDepth): forkNum += 1 longestPathLeaves.append(node) return forkNum, longestPathLeaves def blockchainLength(self): # returns the depth of the tree ie the length of # the longest chain return self.blockchain.depth() def numBlocks(self): return self.blockchain.size() def printChain(self, chain): chain.show(data_property="humanID") def tailBlocks(self, chain): leaves = chain.leaves() print("Num leaves" + str(len(leaves))) print(leaves) def checkBlock(self): # Check the proof work work # return true if proof of work is valid # else rerturn false print("printing block") def createBlockchainGraph(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.gv') g.render() def createBlockchainImg(self, outfilename): print("creating graph") self.blockchain.to_graphviz(filename=outfilename + '.gv', shape=u'box', graph=u'digraph') g = Source.from_file(outfilename + '.png') g.render()
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH remember previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j, el_ in enumerate(self.nauo_lines): self.nauo_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_lines): self.prod_def_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_def_form_lines): self.prod_def_form_refs.append([ el.rstrip(',') for el in el_.replace(",", " ").replace("=", " ").split() if el.startswith('#') ]) for j, el_ in enumerate(self.prod_lines): self.prod_refs.append([ el.strip(',') for el in el_.replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(el_.split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j, el_ in enumerate(self.prod_all_refs): # Add 'PRODUCT_DEFINITION' ref for i, el in enumerate(self.prod_def_form_refs): if el[0] == el_[1]: el_.append(el[1]) break # Add names from 'PRODUCT_DEFINITION' lines for i, el in enumerate(self.prod_refs): if el[0] == el_[2]: el_.append(el[2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) # Create simple parts dictionary (ref + label) self.part_dict = {el[0]: el[3] for el in self.prod_all_refs} # self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs} def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) # HR: "create_dict" replaced by list comprehension elsewhere # # def create_dict(self): # # # TH: links nauo number with a name and creates dict # self.part_dict = {} # for part in self.all_type_refs: # for sublist in self.prod_def_refs: # if sublist[0] == part: # prod_loc = '#' + re.findall('\d+',sublist[1])[0] # pass # for sublist in self.prod_def_form_refs: # if sublist[0] == prod_loc: # prod_loc = '#' + str(re.findall('\d+',sublist[1])[0]) # pass # for sublist in self.prod_refs: # if sublist[0] == prod_loc: # part_name = sublist[2] # # self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] # HR added part reference as data for later use self.tree.create_node(self.part_dict[root_node_ref], 0, data={'ref': root_node_ref}) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # Iterates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) # HR added part reference as data for later use self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent, data={'ref': str(line[2])}) tree_next_layer(self, i[0]) tree_next_layer(self, 0) self.appended = False self.get_levels() def get_levels(self): # Initialise dict and get first level (leaves) self.levels = {} self.levels_set_p = set() self.levels_set_a = set() self.leaf_ids = [el.identifier for el in self.tree.leaves()] self.all_ids = [el for el in self.tree.nodes] self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids) self.part_level = 1 def do_level(self, tree_level): # Get all nodes within this level node_ids = [ el for el in self.tree.nodes if self.tree.level(el) == tree_level ] for el in node_ids: # If leaf, then n_p = 1 and n_a = 1 if el in self.leaf_ids: self.levels[el] = {} self.levels[el]['n_p'] = self.part_level self.levels[el]['n_a'] = self.part_level # If assembly, then get all children and sum all parts + assemblies else: # Get all children of node and sum levels child_ids = self.tree.is_branch(el) child_sum_p = 0 child_sum_a = 0 for el_ in child_ids: child_sum_p += self.levels[el_]['n_p'] child_sum_a += self.levels[el_]['n_a'] self.levels[el] = {} self.levels[el]['n_p'] = child_sum_p self.levels[el]['n_a'] = child_sum_a + 1 self.levels_set_p.add(child_sum_p) self.levels_set_a.add(child_sum_a + 1) # Go up through tree levels and populate lattice level dict for i in range(self.tree.depth(), -1, -1): do_level(self, i) self.create_lattice() self.levels_p_sorted = sorted(list(self.levels_set_p)) self.levels_a_sorted = sorted(list(self.levels_set_a)) # Function to return dictionary of item IDs for each lattice level def get_levels_inv(list_in, key): #Initialise levels_inv = {} levels_inv[self.part_level] = [] for el in list_in: levels_inv[el] = [] for k, v in self.levels.items(): levels_inv[v[key]].append(k) return levels_inv self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p') self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a') def get_all_children(self, id_): ancestors = [el.identifier for el in self.tree.children(id_)] parents = ancestors while parents: children = [] for parent in parents: children = [el.identifier for el in self.tree.children(parent)] ancestors.extend(children) parents = children return ancestors def create_lattice(self): # Create lattice self.g = nx.DiGraph() self.default_colour = 'r' # Get root node and set parent to -1 to maintain data type of "parent" # Set position to top/middle node_id = self.tree.root label_text = self.tree.get_node(node_id).tag self.g.add_node(node_id, parent=-1, label=label_text, colour=self.default_colour) # Do nodes from treelib "nodes" dictionary for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier label_text = self.tree.get_node(key).tag # Node IDs same as for tree self.g.add_node(key, parent=parent_id, label=label_text, colour=self.default_colour) # Do edges from nodes for key in self.tree.nodes: # Exclude root if key != self.tree.root: parent_id = self.tree.parent(key).identifier self.g.add_edge(key, parent_id) # Escape if only one node # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD if self.tree.size() == 1: id_ = [el.identifier for el in self.tree.leaves()] self.g.nodes[id_[-1]]['pos'] = (0, 0) return # Get set of parents of leaf nodes leaf_parents = set( [self.tree.parent(el).identifier for el in self.leaf_ids]) # For each leaf_parent, set position of leaf nodes sequentially i = 0 no_leaves = len(self.tree.leaves()) for el in leaf_parents: for el_ in self.tree.is_branch(el): child_ids = [el.identifier for el in self.tree.leaves()] if el_ in child_ids: self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1) i += 1 # To set plot positions of nodes from lattice levels # --- # Traverse upwards from leaves for el in sorted(list(self.levels_set_a)): # Get all nodes at that level node_ids = [k for k, v in self.levels.items() if v['n_a'] == el] # Get all positions of children of that node # and set position as mean value of them for el_ in node_ids: child_ids = self.tree.is_branch(el_) pos_sum = 0 for el__ in child_ids: pos_ = self.g.nodes[el__]['pos'][0] pos_sum += pos_ pos_sum = pos_sum / len(child_ids) self.g.nodes[el_]['pos'] = (pos_sum, el) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
def pytree(start_path: str = '.', include_files: bool = True, include_sizes: bool = False, include_counts: bool = False, specific_extension: str or None = None, force_absolute_ids: bool = True ) -> None: """ Returns a `treelib.Tree` representing the filesystem under `start_path`. You can then print the `Tree` object using `tree.show()` :param start_path: String. Represents an absolute or relative path. :param include_files: Boolean. Indicates whether to also include the files in the tree. :param include_sizes: Boolean. Indicates whether or not tree should display file and folder sizes, in megabytes. :param include_counts: Boolean. Indicates whether or not tree should display file and folder counts. :param specific_extension: String. Represents a specific file extension to be searched. :param force_absolute_ids: Boolean. Indicates whether ids should be absolute. They will be relative if start_path is relative, and absolute otherwise. """ # creating tree instance tree = Tree() first = True # getting dirs and files all_files_and_folders = os.walk(start_path) # starting dirs, files and size count total_dirs_num = 0 total_files_num = 0 total_disk_size = 0 # iterating over dirs and files for root, _, files in all_files_and_folders: p_root = Path(root) if first: parent_id = None first = False else: parent = p_root.parent parent_id = parent.absolute() if force_absolute_ids else parent # getting absolute path abs_path = p_root.absolute() # getting root id p_root_id = abs_path if force_absolute_ids else p_root # getting dir name dir_name = (p_root.name if p_root.name != "" else ".") dir_name += '/' # coloring dir string colored_text_string = f"\033[0;34;42m{dir_name}" # recoloring to white so that it doesn't affect other nodes colored_text_string += f"\033[0;37;40m" # getting number of files and folders inside directory current_dir_file_and_folder_count = get_number_of_files_inside_folder(path_to_folder=abs_path) # adding count to dir name if include_counts: colored_text_string += f' [{current_dir_file_and_folder_count}]' # adding dir size to name if include_sizes: dir_size_in_bytes = get_folder_size_in_bytes(path_to_folder=abs_path) adjusted_dir_size = get_adjusted_file_size(file_size_in_bytes=dir_size_in_bytes) colored_text_string += f' ({adjusted_dir_size})' # creating folder node tree.create_node(tag=colored_text_string, identifier=p_root_id, parent=parent_id) # increasing total dirs count total_dirs_num += 1 # iterating over files for file in files: # getting file name f_id = p_root_id / file file_name = f_id.name # checking if user has passed specific extension if specific_extension is not None: # checking if current file is of specified extension if not file.endswith(specific_extension): continue # adding file size to name if include_sizes: file_size_in_bytes = get_file_size_in_bytes(file_path=f_id) adjusted_file_size = get_adjusted_file_size(file_size_in_bytes=file_size_in_bytes) file_name += f' ({adjusted_file_size})' # creating file node if include_files: tree.create_node(tag=file_name, identifier=f_id, parent=p_root_id) # increasing total files count total_files_num += 1 # getting dirs and files string # checking dirs num if total_dirs_num == 1: dirs_string = 'directory' else: dirs_string = 'directories' # checking files num if total_files_num == 1: files_string = 'file' else: files_string = 'files' # defining dirs and files string dirs_and_files_string = f'{total_dirs_num - 1} {dirs_string}, {total_files_num} {files_string}' # adding full size if include_sizes: full_size = get_folder_size_in_bytes(path_to_folder=start_path) adjusted_full_size = get_adjusted_file_size(file_size_in_bytes=full_size) full_size_string = f', {adjusted_full_size}' dirs_and_files_string += full_size_string # getting tree size size = tree.size() # checking if tree is empty if size == 0: # printing invalid input message print('Invalid input. Must be a directory.\nPlease check input and try again.') else: # displaying tree print(tree) print(dirs_and_files_string)
class Match_base: def __init__(self): self.token_list = None self.index = 0 self.token = '' self.token_node = None self.tree = Tree() self.anls_proc = [] self.res = True # self.info = [] self.info = '' def set_tokenList(self, token_list): self.token_list = token_list self.index = 0 self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] self.tree = Tree() self.anls_proc = [] self.res = True # self.info = [] self.info = '' def get_next(self, parent): tmp = self.index - len(self.anls_proc) if tmp < 0: tmp = 0 self.index += 1 for i in range(tmp + 1): if self.index - tmp + i < len(self.token_list): self.anls_proc.append(self.token_list[self.index - tmp + i].tag) if self.token is not None: self.tree.create_node(tag=self.token, identifier=str(uuid.uuid1()), parent=parent) if self.index >= len(self.token_list) - 1: self.index += 1 self.token = '#' self.anls_proc.append(self.token) return self.token else: self.index += 1 self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] return self.token def reset_token(self, re_num=-1): if re_num == -1: self.index = 0 self.anls_proc.clear() self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] else: self.index -= re_num for i in range(re_num): self.anls_proc.pop(len(self.anls_proc) - 1) self.token = self.token_list[self.index].tag self.token_node = self.token_list[self.index] def creat_node(self, name, parent): iid = str(uuid.uuid1()) if self.tree.size() == 0: self.tree.create_node(tag='{}'.format(name), identifier=iid) else: self.tree.create_node(tag='{}'.format(name), identifier=iid, parent=parent) return iid def func_main(self, parent): return False def is_var(self): res = self.token.isidentifier() if self.token in { "void", "main", "short", "long", "int", "double", "float", "while", "if", "else", "for", "break", "return" }: res = False return res def is_const(self): return self.token.isdigit() def run(self, flag): self.res = self.func_main('root') if self.res is True: if len(self.token_list) > len(self.anls_proc): self.info = 'error: {}, token: {}, row: {}, col: {}\n'.format( 'unmatched char', self.token_node.tag, self.token_node.row, self.token_node.col) if flag: self.res = False if self.index == 0: self.index += 1 if len(self.info) == 0: self.info = 'all ok' return self.res, self.index - 1, self.tree, self.info def create_dotPic(self, root_dir): if not os.path.exists(root_dir): os.makedirs(root_dir) self.tree.to_graphviz(filename='{}/tree.dot'.format(root_dir)) string = open('{}/tree.dot'.format(root_dir)).read() dot = graphviz.Source(string) dot.render('{}/tree'.format(root_dir), format='png')
class StepParse: def __init__(self): pass def load_step(self, step_filename): self.nauo_lines = [] self.prod_def_lines = [] self.prod_def_form_lines = [] self.prod_lines = [] self.filename = os.path.splitext(step_filename)[0] line_hold = '' line_type = '' # Find all search lines with open(step_filename) as f: for line in f: # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines index = re.search("#(.*)=", line) if index: # TH: if not none then it is the start of a line so read it # want to hold line until it has checked next line # if next line is a new indexed line then save previous line if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' prev_index = True # TH rememeber previous line had an index if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line: line_hold = line.rstrip() line_type = 'nauo' elif ('PRODUCT_DEFINITION ' in line or 'PRODUCT_DEFINITION(' in line): line_hold = line.rstrip() line_type = 'prod_def' elif 'PRODUCT_DEFINITION_FORMATION' in line: line_hold = line.rstrip() line_type = 'prod_def_form' elif ('PRODUCT ' in line or 'PRODUCT(' in line): line_hold = line.rstrip() line_type = 'prod' else: prev_index = False #TH: if end of file and previous line was held if 'ENDSEC;' in line: if line_hold: if line_type == 'nauo': self.nauo_lines.append(line_hold) elif line_type == 'prod_def': self.prod_def_lines.append(line_hold) elif line_type == 'prod_def_form': self.prod_def_form_lines.append(line_hold) elif line_type == 'prod': self.prod_lines.append(line_hold) line_hold = '' line_type = '' else: #TH: if not end of file line_hold = line_hold + line.rstrip() self.nauo_refs = [] self.prod_def_refs = [] self.prod_def_form_refs = [] self.prod_refs = [] # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words' # Find all (# hashed) line references and product names # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't for j in range(len(self.nauo_lines)): self.nauo_refs.append([ el.rstrip(',') for el in self.nauo_lines[j].replace(",", " ").replace( "=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_def_lines)): self.prod_def_refs.append([ el.rstrip(',') for el in self.prod_def_lines[j].replace( ",", " ").replace("=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_def_form_lines)): self.prod_def_form_refs.append([ el.rstrip(',') for el in self.prod_def_form_lines[j].replace( ",", " ").replace("=", " ").split() if el.startswith('#') ]) for j in range(len(self.prod_lines)): self.prod_refs.append([ el.strip(',') for el in self.prod_lines[j].replace(",", " ").replace( "(", " ").replace("=", " ").split() if el.startswith('#') ]) self.prod_refs[j].append(self.prod_lines[j].split("'")[1]) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j in range(len(self.prod_all_refs)): # Add 'PRODUCT_DEFINITION' ref for i in range(len(self.prod_def_form_refs)): if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]: self.prod_all_refs[j].append(self.prod_def_form_refs[i][1]) break # Add names from 'PRODUCT_DEFINITION' lines for i in range(len(self.prod_refs)): if self.prod_refs[i][0] == self.prod_all_refs[j][2]: self.prod_all_refs[j].append(self.prod_refs[i][2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) # Get first two items in each sublist (as third is shape ref) # # First item is 'PRODUCT_DEFINITION' ref # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref self.prod_all_refs = [el[:2] for el in self.prod_def_refs] # Match up all references down to level of product name for j in range(len(self.prod_all_refs)): # Add 'PRODUCT_DEFINITION' ref for i in range(len(self.prod_def_form_refs)): if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]: self.prod_all_refs[j].append(self.prod_def_form_refs[i][1]) break # Add names from 'PRODUCT_DEFINITION' lines for i in range(len(self.prod_refs)): if self.prod_refs[i][0] == self.prod_all_refs[j][2]: self.prod_all_refs[j].append(self.prod_refs[i][2]) break # Find all parent and child relationships (3rd and 2nd item in each sublist) self.parent_refs = [el[1] for el in self.nauo_refs] self.child_refs = [el[2] for el in self.nauo_refs] # Find distinct parts and assemblies via set operations; returns list, so no repetition of items self.all_type_refs = set(self.child_refs) | set(self.parent_refs) self.ass_type_refs = set(self.parent_refs) self.part_type_refs = set(self.child_refs) - set(self.parent_refs) #TH: find root node self.root_type_refs = set(self.parent_refs) - set(self.child_refs) self.create_dict() def show_values(self): # TH: basic testing, if needed these could be spilt up print(self.nauo_lines) print(self.prod_def_lines) print(self.prod_def_form_lines) print(self.prod_lines) print(self.nauo_refs) print(self.prod_def_refs) print(self.prod_def_form_refs) print(self.prod_refs) def create_dict(self): # TH: links nauo number with a name and creates dict self.part_dict = {} for part in self.all_type_refs: for sublist in self.prod_def_refs: if sublist[0] == part: prod_loc = '#' + re.findall('\d+', sublist[1])[0] pass for sublist in self.prod_def_form_refs: if sublist[0] == prod_loc: prod_loc = '#' + str(re.findall('\d+', sublist[1])[0]) pass for sublist in self.prod_refs: if sublist[0] == prod_loc: part_name = sublist[2] self.part_dict[part] = part_name def create_tree(self): #TH: create tree diagram in newick format #TH: find root node self.tree = Tree() #TH: check if there are any parts to make a tree from, if not don't bother if self.part_dict == {}: return root_node_ref = list(self.root_type_refs)[0] self.tree.create_node(self.part_dict[root_node_ref], 0) #TH: created root node now fill in next layer #TH: create dict for tree, as each node needs a unique name i = [0] # itirates through nodes self.tree_dict = {} self.tree_dict[i[0]] = root_node_ref def tree_next_layer(self, parent): root_node = self.tree_dict[i[0]] for line in self.nauo_refs: if line[1] == root_node: i[0] += 1 self.tree_dict[i[0]] = str(line[2]) self.tree.create_node(self.part_dict[line[2]], i[0], parent=parent) tree_next_layer(self, i[0]) tree_next_layer(self, 0) def print_tree(self): try: self.tree.show() except: self.create_tree() self.tree.show() def tree_to_json(self, save_to_file=False, filename='file', path=''): #TH: return json format tree, can also save to file if self.tree.size() != 0: data = self.tree.to_json() j = json.loads(data) if save_to_file == True: if path: file_path = os.path.join(path, filename) else: file_path = filename with open(file_path + '.json', 'w') as outfile: json.dump(j, outfile) return data else: print("no tree to print") return
"Select extra knowledge to load mirror infomation") if filename is not None: with open(filename, 'r') as load_f: load_dict = json.load(load_f) mirror = get_mirror(root.startEA) procs = get_all_procs() tree.create_node(fname, hex(root.startEA), data=Xref_node(fname, hex(root.startEA), XType.code, mirror)) if mirror == fname: add_xrefs(root.startEA, XType.code) Message("Reference Tree:\n\n") tree.show(line_type="ascii-em", idhidden=False, data_property='mirror') Message("Unique references:\n") for node in tree.all_nodes_itr(): if type(node.data.mirror) is str: print node.identifier #hierarchical output for level in range(1, tree.depth()): Message("\nLevel %d: %d\n" % (level, tree.size(level))) for node in tree.all_nodes(): if tree.level(node.identifier) == level and type( node.data.mirror) is str: print node.identifier Message("\n%d subroutines in routine %s need transplanting.\n" % (Xref_node.xrefTrans - 1, fname)) conn.close() else: Warning("No function found at location %x" % here())
class Parser_analyzer: """ 语句LL(1)文法: NEED:expr, 各种终止符 NOTE:int_t为无法解决: A -> B int B -> int b | ϵ 类型的回溯问题采用的特殊方案, 出现在int_t main()位置。 """ def __init__(self): self.Vn = [] # 非终结符 self.Vt = [] # 终结符 self.table = None # 预测分析表 self.stack_anls = [] self.stack_toke = [] self.err_info = [] self.AST_Tree = Tree() self.AST_Tree_root = None self.parent_uid = None self.node_parent_dict = None self.current_anal_scope = 0 def load_analyzer(self, prod_path, ff_path): prod_set = {} prod_set_ori = open(prod_path, 'r', encoding='utf-8').readlines() temp_prod = '' for item in prod_set_ori: item = item.strip() if item[0] != '|': temp = item.split(' ') temp_prod = temp[0] res = '' for ii in temp[2:]: res += '{} '.format(ii) res = res.strip() prod_set[temp_prod] = [] prod_set[temp_prod].append(res) if temp_prod not in self.Vn: self.Vn.append(temp_prod) else: temp = item.split(' ') res = '' for ii in temp[1:]: res += '{} '.format(ii) res = res.strip() prod_set[temp_prod].append(res) ff_set = {} ff_set_ori = open(ff_path, 'r', encoding='utf-8').readlines() for item in ff_set_ori: item = item.replace('\n', '') item = item.split('\t') end_symbol = item[0] eps_flag = item[1] fi_set = item[2].split(' ') if len(item) == 4: fo_set = item[3].split(' ') else: fo_set = [] ff_set[end_symbol] = { 'eps_flag': eps_flag, 'fi_set': fi_set, 'fo_set': fo_set } self.table = [[] for row in range(len(self.Vn))] # 预测分析表 for item in self.Vn: item_prod = prod_set[item] item_ff = ff_set[item] if item_ff['eps_flag'] == 'true': item_ff['fi_set'].remove('eps') for non in item_ff['fi_set']: if non not in self.Vt: self.Vt.append(non) for n in range(len(self.Vn)): self.table[n].append('') aim_prod = None aim2_prod = None for temp_prod in item_prod: temp_shit = temp_prod.split(' ') temp_first = temp_shit[0] if temp_first == 'eps' and len(temp_shit) > 1: aim2_prod = temp_prod if non == temp_first: aim_prod = temp_prod break elif temp_first in ff_set: if non in ff_set[temp_first]['fi_set'] or ff_set[ temp_first]['eps_flag'] == 'true': aim_prod = temp_prod break if aim_prod is None: aim_prod = aim2_prod self.table[self.Vn.index(item)][self.Vt.index(non)] = aim_prod if item_ff['eps_flag'] == 'true': for non in item_ff['fo_set']: if non not in self.Vt: self.Vt.append(non) for n in range(len(self.Vn)): self.table[n].append('') self.table[self.Vn.index(item)][self.Vt.index(non)] = 'eps' def load_stack(self, token_list, start): self.stack_anls = [] self.stack_anls.append('#') self.stack_anls.append(start) self.stack_toke = [] self.stack_toke.append('#') temp = list(reversed(token_list)) self.stack_toke.extend(temp) self.err_info = [] self.node_parent_dict = {start: [None]} def table_show(self): res = '' # print(self.Vt) res += "{}\n".format(str(self.Vt)) idx = 0 for item in self.table: # print('{}'.format(self.Vn[idx]), end='\t') res += "{}\t".format(self.Vn[idx]) idx2 = 0 for jt in item: # print('\'{}\'({})'.format(jt, self.Vt[idx2]), end=' ') res += "'{}'({}) ".format(jt, self.Vt[idx2]) idx2 += 1 # print() res += '\n' idx += 1 return res def ans_show(self): print(self.stack_anls) print(self.stack_toke) print() def creat_node(self, tag, parent, data): if self.AST_Tree.size() == 0: node = self.AST_Tree.create_node(tag='{}'.format(tag), data=data) self.AST_Tree_root = node else: node = self.AST_Tree.create_node(tag='{}'.format(tag), parent=parent, data=data) return node.identifier def create_dotPic(self, root_dir): # root_dir = './treePic' self.AST_Tree.to_graphviz(filename='{}/tree.dot'.format(root_dir)) string = open('{}/tree.dot'.format(root_dir)).read() dot = graphviz.Source(string) dot.render('{}/tree'.format(root_dir), format='png') def run(self, log=False): anlsRes = '' anlsLog = '' toke = self.stack_toke.pop(-1) symbol = self.stack_anls.pop(-1) while symbol != '#': if symbol in [toke.tag, toke.type]: # 刷新作用域 if symbol == '{': self.current_anal_scope += 1 elif symbol == '}': self.current_anal_scope -= 1 else: toke.set_scope(self.current_anal_scope) # 刷新真值 if toke.type == 'num': toke.set_value(toke.tag) # 创建节点并新增 self.creat_node(symbol, self.node_parent_dict[symbol][-1], toke) self.node_parent_dict[symbol].pop(-1) if len(self.node_parent_dict[symbol]) == 0: self.node_parent_dict.pop(symbol) toke = self.stack_toke.pop(-1) if log: # print('\t*HIT: {}\t<-\t{}'.format(symbol, toke)) anlsLog += "\t*HIT: {}\t<-\t{}\n".format(symbol, toke) if toke == '#': break elif symbol in self.Vn: if toke.type in ['var', 'num']: # 变量-数字转换 table_item = self.table[self.Vn.index(symbol)][ self.Vt.index(toke.type)] else: table_item = self.table[self.Vn.index(symbol)][ self.Vt.index(toke.tag)] table_item = table_item.split(' ') if table_item[0] == '': # 错误分析 # print('\t*ERROR: {}\t<-\t{}'.format(symbol, toke)) anlsLog += "\t*ERROR: {}\t<-\t{}\n".format(symbol, toke) self.err_info.append( "row: {}, col: {}, token: '{}' cont match '{}'\n". format(toke.row, toke.col, toke, symbol)) elif table_item[0] == 'eps': # 无效回溯 if len(table_item) > 1: # 有效分析 temp = list(reversed(table_item))[0:-1] self.stack_anls.extend(temp) # 添加节点-父节点Hash表 for item in temp: if item not in self.node_parent_dict: self.node_parent_dict[item] = [] self.node_parent_dict[item].append(self.parent_uid) else: # 有效分析 temp = list(reversed(table_item)) self.stack_anls.extend(temp) # 创建节点并新增 self.parent_uid = self.creat_node( symbol, self.node_parent_dict[symbol][-1], symbol) self.node_parent_dict[symbol].pop(-1) if len(self.node_parent_dict[symbol]) == 0: self.node_parent_dict.pop(symbol) # 添加节点-父节点Hash表 for item in temp: if item not in self.node_parent_dict: self.node_parent_dict[item] = [] self.node_parent_dict[item].append(self.parent_uid) if log: # print() # print("symb:\'{}\'----stack:{}".format(symbol, list(reversed(self.stack_anls)))) # print("toke:{}----stack:{}".format(toke, list(reversed(self.stack_toke)))) anlsLog += "\n" anlsLog += "symb:\'{}\'----stack:{}\n".format( symbol, list(reversed(self.stack_anls))) anlsLog += "toke:{}----stack:{}\n".format( toke, list(reversed(self.stack_toke))) symbol = self.stack_anls.pop(-1) self.node_parent_dict.clear() # self.ans_show() if len(self.err_info) == 0: # print('match compete!') anlsRes += "match compete!\n" for item in self.err_info: anlsRes += "{}".format(item) return anlsRes, anlsLog
def use_hyp(word2syn, output, data): un_change = [] dic = Tree() dic.create_node("100001740", "100001740") add = -1 while add != 0: add = 0 f = open(datapath + "wn_hyp.pl", "r") while True: line = f.readline() if not line: break else: l, r = re.findall('\d+', line) try: dic.create_node(l, l, parent=r) add += 1 except: pass print(dic.size()) entail = defaultdict(list) for n in dic.all_nodes(): for m in dic.subtree(n.tag).all_nodes(): if m.tag != n.tag: entail[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in entail[i]: if d[0] + "\t" + ">" + "\t" + d[1] not in output: output += [d[0] + "\t" + ">" + "\t" + d[1]] label.add(d) elif i in entail[j]: if d[0] + "\t" + "<" + "\t" + d[1] not in output: output += [d[0] + "\t" + "<" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] del entail data = un_change del un_change un_change = [] alter = defaultdict(list) for n in dic.all_nodes(): for m in dic.siblings(n.tag): if m.tag != n.tag and n.bpointer != m.tag: alter[n.tag].append(m.tag) label = set() for d in data: d0 = d[0] d1 = d[1] if p.singular_noun(d[0]) != False: d0 = p.singular_noun(d[0]) if p.singular_noun(d[1]) != False: d1 = p.singular_noun(d[1]) for i in word2syn[d0]: for j in word2syn[d1]: if j in alter[i]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) elif i in alter[j]: if d[0] + "\t" + "|" + "\t" + d[1] not in output: output += [d[0] + "\t" + "|" + "\t" + d[1]] label.add(d) if d not in un_change and d not in label: un_change += [d] del alter print("before single: " + str(len(data)) + " after: " + str(len(un_change))) output += ["\n"] return output, un_change
def count_of_all_distributions_of_linux(data): tree = Tree() root = tree.create_node('root', 'root') tree = build_tree(data=data["Linux"], tree=tree, parent=root) return tree.size() - 1
tablestocount=list(set(kimTables_tables)) for i in tablestocount: table_counts.append(kimTables_tables.count(i)) kimcount=dict(zip(tablestocount,table_counts)) count_list=list() sheetFrame_new=list() sheetFrame_new = list(dicttree.keys()) sheet_do=list() for k in sheetFrame_new: addedTree = list() count=0 d_list=dicttree[k] tree=Tree() tree,addedTree=treeBuildParent(k,fieldsIdentifierdict,addedTree) tree,addedTree,newAdded=treeBuild(k,dicttree,fieldsIdentifierdict,tree,addedTree) depth.append(tree.size()) depth1.append(len(addedTree)) sheet.append(k) for i in addedTree: if i in tablestocount: count+=kimcount[i] count_list.append(count) sheet_do.append(k) treedataframe= {'Name': sheet_do,'Count':count_list,'Depth':depth} treedataframe = pd.DataFrame(data=treedataframe) treedataframe.to_csv('tree.csv',index=None) max_value = max(depth) max_index = depth.index(max_value) addedTree=list() k='ANADJP'
class RST_DT: def load(self, path2file): self.id_EDUs = [] self.EDU = {} self.treeNS = Tree() self.tree = Tree() # nombre max d'espace pour init id_parents with open(path2file, "r") as f: max_space = 0 nb_line = 0 for i, line in enumerate(f): nb_space = 0 for c in line: if c == " ": nb_space += 1 else: break if nb_space > max_space: max_space = nb_space nb_line += 1 with open(path2file, "r") as f: id_parents = [0] * max_space NS_parents = [0] * max_space for i, line in enumerate(f): # nombre d'espace détermine le parent nb_space = 0 for c in line: if c == " ": nb_space += 1 else: break space = nb_space / 2 id_parents[space] = i parent = id_parents[space - 1] reg = "\(([\w\-\[\]]+)|(_!.+!_)" # récupération du contenu match = re.findall(reg, line)[0] if match[0] == "": content = match[1] # feuille EDU self.id_EDUs.append(i) # print content self.EDU[i] = re.findall("_!(.*)!_", content) else: content = match[0] reg2 = "\[(N|S)\]" # récupération NS match2 = re.findall(reg2, content) NS_parents[space] = match2 # ['N','S'] # création du noeud if i == 0: self.tree.create_node(content, 0) self.treeNS.create_node("Root", 0) else: id_NS = len(self.tree.is_branch(parent)) # 0 ou 1 car arbre binaire self.tree.create_node(content, i, parent=parent) self.treeNS.create_node(NS_parents[space - 1][id_NS], i, parent=parent) def toDEP(self): ############################### # Etape 1 : construction du head_tree # parcours en largeur de tree afin de récupérer chaque id_node # pour chaque profondeur (init à 0) _! sans compter !_ les feuilles (EDUs) nodes_depth = [-1] * self.tree.size() for i in xrange(self.tree.size()): id_nodes = [0] depth = [999] * self.tree.size() while id_nodes: # False if empty id_node = id_nodes.pop(0) node = self.tree.get_node(id_node) if node.bpointer != None: node_parent = self.tree.get_node(node.bpointer) depth[node.identifier] = depth[node_parent.identifier] + 1 else: depth[node.identifier] = 0 if id_node == i: # print 'noeud ',i,' en profondeur', depth[node.identifier] if node.fpointer: nodes_depth[i] = depth[i] break if node.fpointer: id_nodes.append(node.fpointer[0]) id_nodes.append(node.fpointer[1]) # print nodes_depth id_nodes_depth = [] for d in xrange(self.tree.depth()): id_nodes_depth.append([]) for i in xrange(self.tree.size()): if nodes_depth[i] == d: id_nodes_depth[d].append(i) # print id_nodes_depth # # construction du head_tree head_tree = [-1] * self.treeNS.size() # pour chaque noeud (non EDU/feuille) en partant de la plus grande profondeur dans l'arbre for d in range(len(id_nodes_depth) - 1, -1, -1): for id_node in id_nodes_depth[d]: node = self.treeNS.get_node(id_node) node_left = self.treeNS.get_node(node.fpointer[0]) node_right = self.treeNS.get_node(node.fpointer[1]) if node_left.tag == "N": if head_tree[node_left.identifier] == -1: identifier = node_left.identifier else: identifier = head_tree[node_left.identifier] else: if head_tree[node_right.identifier] == -1: identifier = node_right.identifier else: identifier = head_tree[node_right.identifier] head_tree[id_node] = identifier # print head_tree ############################### # Etape 2 : construction du DEP # # construction du DEP # init # root est le premier noeud de head # pour chaque EDU son père est le root dans DEP dep_tree = Tree() id_root = head_tree[0] root = self.tree.get_node(id_root) # dep_tree.create_node(root.tag, root.identifier) dep_tree.create_node(root.tag, root.identifier) for id_EDU in xrange(len(head_tree)): if head_tree[id_EDU] == -1 and id_EDU != id_root: node = self.tree.get_node(id_EDU) # dep_tree.create_node(node.tag, node.identifier, parent=id_root) # dep_tree.create_node(str(id_EDU), node.identifier, parent=id_root) dep_tree.create_node(node.tag, node.identifier, parent=id_root) # print '//////////////////////' # print 'EDU', id_root # pour chaque EDU for id_EDU in xrange(len(head_tree)): if head_tree[id_EDU] == -1 and id_EDU != id_root: EDU_NS = self.treeNS.get_node(id_EDU) # print '.......................' # print 'EDU', id_EDU # print 'TAG', EDU_NS.tag if EDU_NS.tag == "N": # parcours en largeur jusqu'à trouver un S avec un head donc qui soit pas EDU id_nodes = [EDU_NS.identifier] visited = [False] * self.treeNS.size() while id_nodes: id_node = id_nodes.pop(0) EDU = self.tree.get_node(id_node) # print 'visited EDU', EDU.identifier visited[EDU.identifier] = True # cas d'arret head_EDU = head_tree[EDU.identifier] == -1 head_EDU = False node_tag = self.treeNS.get_node(EDU.identifier).tag # print ' head_EDU', head_EDU # print ' node_tag', node_tag if not head_EDU and node_tag == "S": break if EDU.bpointer: if not visited[EDU.bpointer]: id_nodes.append(EDU.bpointer) if EDU.fpointer: # sécurité if not visited[EDU.fpointer[0]]: id_nodes.append(EDU.fpointer[0]) if not visited[EDU.fpointer[1]]: id_nodes.append(EDU.fpointer[1]) # puis ajouter au DEP comme enfant du head du parent du noeud S id_head = head_tree[EDU.bpointer] # si parent S else: # parcours en largeur des ancêtre jusqu'à trouver un ancêtre avec un head parent = self.treeNS.get_node(EDU_NS.bpointer) id_head = head_tree[parent.identifier] # puis ajouter au DEP comme enfant de ce head if id_EDU != id_head: dep_tree.move_node(id_EDU, id_head) EDU = self.tree.get_node(id_EDU) # print '---- ajout de',EDU.identifier,' à',id_head # if id_EDU == id_head: # dep_tree.show() return dep_tree # showDepth(dep_tree, 4) # dep_tree.show() # node = dep_tree. def toString(self): """ affiche comme la sortie de Hilda """ showDepth(self.tree, 0)