def act_header(cc, sources, args): htree, headers = cc.includes(sources) for node in anytree.LevelOrderIter(htree): if node.header.count < args.min_refs or \ node.header.time: continue if filter_up( node, lambda x: x.header.ok() and x.header.time['cpu'] < args. min_duration): continue node.header.time = cc.time_header(node.name) # full content = '' for node in anytree.PreOrderIter(htree): if not node.parent: continue if not node.parent.parent: content += node.name + '\n' continue if not node.header.ok(): continue iter = anytree.LevelOrderGroupIter(node) children = [] try: next(iter) children = next(iter) except StopIteration: pass time_self = max( 0, node.header.time['cpu'] - sum([ child.header.time['cpu'] if child.header.ok() else 0 for child in children ])) time_total = node.header.time['cpu'] content += '%s%0.1f %0.1f %s\n' % (' ' * (node.depth - 1), time_total, time_self, node.name) atomic_write('header.full.txt', content) # common header common = set() common_min = args.common_pct * len(sources) / 100 for node in anytree.LevelOrderIter(htree): if node.header.ok() \ and node.header.count >= common_min \ and node.header.time['cpu'] >= args.min_duration \ and not filter_up(node, lambda x: x.name in common): common.add(node.name) write_header_csv('common', headers, common) # top headers write_header_csv('top', headers, [x for x in headers if headers[x].ok()])
def write_outputs(args, logger, tree): """Write outputs""" logger.info("Begin writing outputs") # Export JSON using anytree with open( "%s/%s.json" % (args.output_dir, constants.HIERARCHICAL_FDR_OUTPUTS), "w") as output_file: JsonExporter(indent=2).write(tree, output_file) # Write CSV with additional column for rejected or not with open("%s/%s.csv" % (args.output_dir, constants.HIERARCHICAL_FDR_OUTPUTS), "w", newline="") as output_file: writer = csv.writer(output_file) writer.writerow([ constants.NODE_NAME, constants.PARENT_NAME, constants.PVALUE_LOSSES, constants.REJECTED_STATUS, constants.ADJUSTED_PVALUE ]) for node in anytree.LevelOrderIter(tree): parent_name = "" if node.parent: parent_name = node.parent.name writer.writerow([ node.name, parent_name, node.pvalue, int(node.rejected), node.adjusted_pvalue ]) # Generate tree of rejected hypotheses with colour grading based on adjusted p-value generate_tree_of_rejected_hypotheses(args, logger, tree) logger.info("End writing outputs")
def color_nodes(args, tree): """Add fill and font color to nodes based on partition in sorted list""" def differentiator(node): """Differentiates between nodes""" return node.adjusted_pvalue if args.sorting_param == constants.ADJUSTED_PVALUE else node.effect_size nodes_sorted = sorted(anytree.LevelOrderIter(tree), key=differentiator, reverse=True) # sort nodes for color grading num_nodes = len(nodes_sorted) lower, upper = args.color_range num_colors = upper - lower + 1 assert 1 <= lower <= upper <= 9 for idx, node in enumerate(nodes_sorted): node = nodes_sorted[idx] node.color = idx + lower if num_nodes > num_colors: node.color = lower + (idx * num_colors) // num_nodes assert node.color in range(lower, upper + 1, 1) # Non-differentiated nodes should have the same color prev_node = None for node in nodes_sorted: if prev_node and differentiator(node) == differentiator(prev_node): node.color = prev_node.color prev_node = node node.fontcolor = "black" if node.color <= 5 else "white"
def expand(self, t=INFINITY): """ Increments 'self.fully_expanded', then expands the entire tree to a depth of 'self.fully_expanded' in the time alloted. If incomplete, just leaves as is. :param t: int <- time limit in seconds :return: None """ stop_time = time.time() + t if self.caught_up: self.fully_expanded += 1 self.tree_depth = max(self.tree_depth, self.fully_expanded) self.caught_up = False for node in anytree.LevelOrderIter(self.game_tree): if node.depth >= self.fully_expanded: break if time.time() > stop_time: return if node.is_leaf: self.expand_node(node) self.caught_up = True
def visualize_hierarchical(args, features): """Visualize hierarchical feature importance results""" tree = features[0].root opts = SimpleNamespace(output_dir=args.output_dir, effect_name="Importance Score", color_scheme="ylorrd9", color_range=[1, 9], sorting_param=constants.EFFECT_SIZE, minimal_labels=False, rectangle_leaves=True) nodes = {} for node in anytree.LevelOrderIter(tree): if node.important or node.name == constants.DUMMY_ROOT: parent = nodes[node.parent.name] if node.parent else None newnode = anytree.Node(node.name, parent=parent, description=node.description, effect_size=node.effect_size, was_leaf=node.is_leaf) nodes[newnode.name] = newnode if len(nodes) <= 1: print( "No important features identified, skipping feature importance hierarchy visualization." ) return newtree = next(iter(nodes.values())).root # identify root if newtree.name == constants.DUMMY_ROOT and len(newtree.children) == 1: # Get rid of dummy node if not required to maintain tree newtree = newtree.children[0] newtree.parent = None color_nodes(opts, newtree) render_tree(opts, newtree)
def get_non_continuous_min_dep(root): """ Finds minimal LHS combinations on a tree of a non-continuous RHS. All node.name on the tree need to be lists for this function to work properly, excluding the root. Keyword Arguments: root -- root of a tree where minimal LHS combinations are to be searched. all children's names need to be a list. strategy -- 'greedy' or 'complete'. Strategy with which tree has been created. """ candidates = {} if root.search_strategy == 'complete': for node in tree.LevelOrderIter(root.children[0]): if node.score >= node.parent.score*0.98: candidates[tuple(node.name)] = node.score parent = node.parent if not parent.is_root: try: del candidates[tuple(node.parent.name)] except KeyError: pass elif root.search_strategy == 'greedy': parent_is_minimal = True newest_children = root.get_newest_children() for child in newest_children: if child.score >= 0.98*child.parent.score: candidates[tuple(child.name)] = child.score parent_is_minimal = False parent = newest_children[0].parent if parent_is_minimal and (not parent.is_root): candidates[tuple(parent.name)] = parent.score return candidates
def _make_leafs_lvl(self): leafs_list = [] leafs_lvl = [] for node in anytree.LevelOrderIter(self.get_root()): if node.is_leaf: leafs_list.append(node.name.get_leaf()[:2]) leafs_lvl.append(node.depth + 1) return leafs_list, leafs_lvl
def prune_tree_on_effect_size(args, tree): """Prune tree by thresholding on effect size""" if not tree.effect_size: return # No effect_size column in input file effect_size_threshold = tree.effect_size * ( 1 + args.tree_effect_size_threshold) for node in anytree.LevelOrderIter(tree): if node.effect_size > effect_size_threshold: node.parent = None
def compute_link_transformations( self, joint_values: Optional[Dict[str, float]] = None) -> Dict[str, tuple]: """ Compute transformations of all links for given joint values and return them in a dictionary in which link name serves as a key and link pose is a value. """ if joint_values is None: joint_values = {} link_transforms = {} for link in anytree.LevelOrderIter(self.root_node): # type: Link if link.is_root: link_transforms[link.name] = self.root_pose continue parent_transform = link_transforms[link.parent.name] joint_value = joint_values.get(link.joint_from_parent.name, None) relative_pose = link.joint_from_parent.transformation_from_parent_to_child_link( joint_value) link_transforms[link.name] = multiply_transformations( parent_transform, relative_pose) return link_transforms
def color_nodes(opts, tree): """Add fill and font color to nodes based on partition in sorted list""" nodes_sorted = sorted( anytree.LevelOrderIter(tree), key=lambda node: node.effect_size) # sort nodes for color grading num_nodes = len(nodes_sorted) lower, upper = opts.color_range num_colors = upper - lower + 1 assert 1 <= lower <= upper <= 9 for idx, node in enumerate(nodes_sorted): node = nodes_sorted[idx] node.color = idx + lower if num_nodes > num_colors: node.color = lower + (idx * num_colors) // num_nodes assert node.color in range(lower, upper + 1, 1) # Non-differentiated nodes should have the same color prev_node = None for node in nodes_sorted: if prev_node and node.effect_size == prev_node.effect_size: node.color = prev_node.color prev_node = node node.fontcolor = "black" if node.color <= 5 else "white"
def generate_tree_of_rejected_hypotheses(args, logger, tree): """Generate tree of rejected hypotheses with colour grading based on adjusted p-value""" # Generate tree of rejected hypotheses if not tree.rejected: logger.warn( "No hypothesis rejected, so no tree will be generated. If this is unexpected, check your input p-values" ) return nodes = {} for node in anytree.LevelOrderIter(tree): if node.rejected: parent = nodes[node.parent.name] if node.parent else None newnode = anytree.Node(node.name, parent=parent, adjusted_pvalue=node.adjusted_pvalue, description=node.description, effect_size=node.effect_size, was_leaf=node.is_leaf) nodes[newnode.name] = newnode newtree = next(iter(nodes.values())).root # identify root prune_tree_on_effect_size(args, newtree) color_nodes(args, newtree) render_tree(args, newtree)
def nodes_by_bfs(self): if self.is_empty: return for node in anytree.LevelOrderIter(self.root_node): # 广度优先 yield node
df_types = df_types[['short_name', 'coretype', 'primary', 'secondary', 'cre', 'major_dissection', 'layer_dissectoin']] ############################################################################### # create breath-first binary search tree with tasic clusters tree_dict = {'label': 'root'} # base dictionary # parses binary node positions into a dictionary with tree structure for label, bin_str in zip(dendro['cluster'], dendro['position']): parse_binary(tree_dict, bin_str, label) importer = DictImporter() tree = importer.import_(tree_dict) for ind, node in enumerate(anytree.LevelOrderIter(tree)): node.pos = ind + 1 # node index starting at one if node.is_leaf: node.name = str(ind + 1) + " " + node.label else: node.name = str(ind + 1) DotExporter(tree).to_picture("dendro.png") for leaf in tree.leaves: # TODO: phenotypes & deal with mismatching # edge cases for the last two endothelial cells if leaf.label == 'Endo Tbc1d4': leaf.cluster_id = 'f48' elif leaf.label == 'Endo Myl9': leaf.cluster_id = 'f49' else:
def run_task(self, current_task): for task in anytree.LevelOrderIter( current_task, maxlevel=1 ): # We ALWAYS have maxlevel = 1 even if there are nested things because it handles all nested children recursively and we don't want the tree iterator to find them. This is sorta stupid instead of just giving the tree itself at the top node. # If the function is not the root execute function, go ahead and run it. Can't run execute this way because it doesn't have a parent. if not task.function.__name__ == 'execute': # Set task_dirs and cur_dirs based on tree position if task.parent.type == 'task': if task.parent is not None and getattr( task.parent, 'task_dir', None): if getattr(task, 'task_dir', None): L.critical( 'Logic of task_dirs does not make sense here. In particular, a parent was given an explicit task_dir AND its child task was too instead of being derived' ) else: task.task_dir = os.path.join( task.parent.task_dir, task.function.__name__) elif isinstance(task, (InputTask, GeneratedInputTask)): pass # Skip, cause it should have been set in init of InputTask. else: if getattr(task, 'task_dir_override', None): pass # Expected outcome if given an override. This case is for when you want to specify a task look somewhere else. Though note it is hard then to make the override programatically determined. else: task.task_dir = os.path.join( self.intermediate_dir, task.function.__name__) self.cur_dir = task.task_dir elif task.parent.type == 'iterator': task.task_dir = os.path.join(self.cur_dir_parent_dir, task.name) self.cur_dir = task.task_dir else: raise NameError('Unknown Node type') # Set the project level task_dirs object to have an attribute equal to the current name. This makes it possible for functions later in the analysis script to have access to # previous task_dir locations. setattr(self, task.name + '_dir', task.task_dir) # In addition to self.cur_dir, there are also these two project-level convenience funcitons. self.cur_task = task self.run_this = task.run # NYI, task skipping enabled here. if isinstance(task, (hb.InputTask, GeneratedInputTask)): self.skip_existing = 0 # Don't want to skip InputTasks because these have internal logig for what to skip. else: self.skip_existing = task.skip_existing if self.skip_existing: if os.path.exists(self.cur_dir): self.run_this = 0 if not os.path.exists( self.cur_dir) and task.creates_dir and task.run: hb.create_directories(self.cur_dir) # # NYI, but I want to implement task-level logging conditionals. # L.setLevel(task.logging_level) if task.type == 'task': if self.run_this: if task.creates_dir: hb.create_directories(self.cur_dir) assert os.path.exists(self.cur_dir) # If the task's parent is an iterator, we want to report different info, otherwise these are the same. if task.parent.type == 'iterator': r = task.function(self) elif isinstance(task, InputTask): self.prepend = '' L.info(self.prepend + 'Running InputTask ' + str(task.name) + ' in dir ' + str(self.cur_dir)) task.function( ) # Running the Task including anyting in p.run_this else: self.prepend = '' L.info(self.prepend + 'Running task ' + str(task.name) + ' in dir ' + str(self.cur_dir)) task.function( ) # Running the Task including anyting in p.run_this # NYI, task skipping enabled here. else: if os.path.isdir(self.cur_dir): if task.run: if task.parent.type == 'iterator': L.info('Skipping task ' + str(task.name) + ' because dir existed at ' + str(self.cur_dir)) r = task.function(self) elif isinstance(task, InputTask): self.prepend = '' L.info(self.prepend + 'Running InputTask ' + str(task.name) + ' in dir ' + str(self.cur_dir)) task.function( ) # Running the Task including anyting in p.run_this else: self.prepend = '' L.info(self.prepend + 'Skipping task ' + str(task.name) + ' in dir ' + str(self.cur_dir)) task.function( ) # Running the Task including anyting in p.run_this else: L.info('Instructed to skip task ' + str(task.name) + ' and loading from ' + str(self.cur_dir)) if task.parent.type == 'iterator': r = task.function(self) else: self.prepend = '' task.function( ) # Running the Task including anyting in p.run_this # # CALL THE TASK FUNCTION # task.function() # Running the Task EXCLUDING anyting in p.run_this else: L.info('Running task ' + str(task.name) + ' and loading from ' + str(self.cur_dir)) if task.parent.type == 'iterator': r = task.function(self) elif isinstance(task, InputTask): self.prepend = '' L.info(self.prepend + 'Running InputTask ' + str(task.name) + ' in dir ' + str(self.cur_dir)) task.function( ) # Running the Task including anyting in p.run_this else: self.prepend = '' task.function( ) # Running the Task including anyting in p.run_this elif task.type == 'iterator': self.prepend += ' ' L.info('Creating iterator ' + str(task.name)) # Run the function for defining the iterator if task.run: # HACK, I failed to understand why sometiems the dirs weren't created in time. Thus I force it here. hb.create_directories(self.cur_dir) assert os.path.exists(self.cur_dir) task.function() else: # NYI, task skipping enabled here. L.info('Skipping running Iterator.') task.function() # Whether run or not, search for children if len(task.children) > 0: # If the current task is an iterator, then check for replacements before calling the child task. # Definition of the projects' self.iterator_replacements is the one part of ProjectFlow that the analysis script needs to be aware of, # creating a dict of key-value pairs that are replaced with each step in the iterator. if task.type == 'iterator' and task.run: # First check dimensions of iterator_replacements: replacement_lengths = [] for replacement_attribute_name, replacement_attribute_value in self.iterator_replacements.items( ): replacement_lengths.append( len(replacement_attribute_value)) assert (len(set(replacement_lengths)) == 1 ) # Check that all of the same size. num_iterations = replacement_lengths[0] self.run_in_parallel = True # TODOO Connect to UI if not getattr(self, 'num_workers', None): self.num_workers = 11 if self.run_in_parallel: # OPTIMIZATION NOTE: It's slow to spawn 460 processes when they are just going to be skipped, thus run_this for iterators needs to be improved. worker_pool = multiprocessing.Pool( self.num_workers ) # NOTE, worker pool and results are LOCAL variabes so that they aren't pickled when we pass the project object. results = [] # Once all the iterations are done, iterate through the stored results and call their get functions, which blocks running past this point until all are done. # SUPER CONFUSING POINT. the project object will be modified independently by all tasks. Cant think of a good way ro rejoin them returns_from_parallel_tasks = [] iterations_start = 0 total_iteration_counter = 0 num_processes_to_spawn_simultaneously = 2000 # Memory issue after ~10,000? # for thread_counter in range(1000): for iteration_outer_counter in range(0, num_iterations): if iterations_start + num_processes_to_spawn_simultaneously < num_iterations: iterations_stop = iterations_start + num_processes_to_spawn_simultaneously else: iterations_stop = num_iterations # Iterate through all children of the iterator with new replacement values. for iteration_counter in range(iterations_start, iterations_stop): # for iteration_counter in range(num_iterations): # NOTICE strange dimensionality here: even within a single iteration, we have to iterate through self.iterator_replacements because we might have more than 1 var that needs replacing replacements = OrderedDict() for replacement_attribute_name, replacement_attribute_values in self.iterator_replacements.items( ): current_replacement_value = self.iterator_replacements[ replacement_attribute_name][ iteration_counter] replacements[ replacement_attribute_name] = replacement_attribute_values setattr(self, replacement_attribute_name, current_replacement_value) if replacement_attribute_name == 'cur_dir_parent_dir': setattr(self, 'cur_dir', current_replacement_value) project_copy = copy.copy( self ) # Freeze it in place (necessary for parallelizing) # For multiprocessing, you cannot pickle a Gdal DS or Band, so I manually unset them here. For some reason, using the k.close_data corrupted the geotiff headers for i, k in project_copy.__dict__.items(): if type(k) in [ hb.GlobalPyramidFrame, hb.ArrayFrame ]: k.band = None k.ds = None # k.close_data() if self.run_in_parallel: L.info('Initializing PARALLEL task ' + str(iteration_counter) + ' ' + task.name + ' with replacements: ' + str(replacements)) # We use apply_async, which immediately lets the next line calculate. It is blocked below with results.get() result = worker_pool.apply_async( func=run_iterator_in_parallel, args=(project_copy, task, iteration_counter)) results.append(result) else: print('bork') raise # L.info('Starting task ' + str(iteration_counter) + ' ' + task.name + ' while replacing ' + str(replacement_attribute_name)) # for child in task.children: # self.run_task(child) iterations_start = iterations_stop # worker_pool.close() # worker_pool.join() for i in results: for j in i.get(): if j is not None: returns_from_parallel_tasks.append(j) for i in returns_from_parallel_tasks: if i[1] == 'append_to_list': if isinstance(i[2], list): getattr(self, i[0]).extend(i[2]) else: getattr(self, i[0]).append(i[2]) # p.layers_to_stitch.append(5) # Task is an iterator's child else: if task.run: for child in task.children: self.run_task( child ) # Run the child found by iterating the task-node's children # Task is not an iterator, thus we just call it's child directly elif task.parent is not None: if task.parent.type is 'iterator': for child in task.children: self.run_task( child ) # Run the child found by iterating the task-node's children else: for child in task.children: self.run_task( child ) # Run the child found by iterating the task-node's children # raise NameError('wtf') try: if (len(r)) > 0: return r except: 'nothing needed returning'