Exemple #1
0
def act_header(cc, sources, args):
    htree, headers = cc.includes(sources)
    for node in anytree.LevelOrderIter(htree):
        if node.header.count < args.min_refs or \
           node.header.time:
            continue

        if filter_up(
                node, lambda x: x.header.ok() and x.header.time['cpu'] < args.
                min_duration):
            continue
        node.header.time = cc.time_header(node.name)

    # full
    content = ''
    for node in anytree.PreOrderIter(htree):
        if not node.parent:
            continue
        if not node.parent.parent:
            content += node.name + '\n'
            continue

        if not node.header.ok():
            continue

        iter = anytree.LevelOrderGroupIter(node)
        children = []
        try:
            next(iter)
            children = next(iter)
        except StopIteration:
            pass
        time_self = max(
            0, node.header.time['cpu'] - sum([
                child.header.time['cpu'] if child.header.ok() else 0
                for child in children
            ]))
        time_total = node.header.time['cpu']

        content += '%s%0.1f %0.1f %s\n' % (' ' * (node.depth - 1), time_total,
                                           time_self, node.name)
    atomic_write('header.full.txt', content)

    # common header
    common = set()
    common_min = args.common_pct * len(sources) / 100
    for node in anytree.LevelOrderIter(htree):
        if node.header.ok() \
           and node.header.count >= common_min \
           and node.header.time['cpu'] >= args.min_duration \
           and not filter_up(node, lambda x: x.name in common):
            common.add(node.name)
    write_header_csv('common', headers, common)

    # top headers
    write_header_csv('top', headers, [x for x in headers if headers[x].ok()])
Exemple #2
0
def write_outputs(args, logger, tree):
    """Write outputs"""
    logger.info("Begin writing outputs")
    # Export JSON using anytree
    with open(
            "%s/%s.json" %
        (args.output_dir, constants.HIERARCHICAL_FDR_OUTPUTS),
            "w") as output_file:
        JsonExporter(indent=2).write(tree, output_file)
    # Write CSV with additional column for rejected or not
    with open("%s/%s.csv" %
              (args.output_dir, constants.HIERARCHICAL_FDR_OUTPUTS),
              "w",
              newline="") as output_file:
        writer = csv.writer(output_file)
        writer.writerow([
            constants.NODE_NAME, constants.PARENT_NAME,
            constants.PVALUE_LOSSES, constants.REJECTED_STATUS,
            constants.ADJUSTED_PVALUE
        ])
        for node in anytree.LevelOrderIter(tree):
            parent_name = ""
            if node.parent:
                parent_name = node.parent.name
            writer.writerow([
                node.name, parent_name, node.pvalue,
                int(node.rejected), node.adjusted_pvalue
            ])
    # Generate tree of rejected hypotheses with colour grading based on adjusted p-value
    generate_tree_of_rejected_hypotheses(args, logger, tree)
    logger.info("End writing outputs")
Exemple #3
0
def color_nodes(args, tree):
    """Add fill and font color to nodes based on partition in sorted list"""
    def differentiator(node):
        """Differentiates between nodes"""
        return node.adjusted_pvalue if args.sorting_param == constants.ADJUSTED_PVALUE else node.effect_size

    nodes_sorted = sorted(anytree.LevelOrderIter(tree),
                          key=differentiator,
                          reverse=True)  # sort nodes for color grading
    num_nodes = len(nodes_sorted)
    lower, upper = args.color_range
    num_colors = upper - lower + 1
    assert 1 <= lower <= upper <= 9
    for idx, node in enumerate(nodes_sorted):
        node = nodes_sorted[idx]
        node.color = idx + lower
        if num_nodes > num_colors:
            node.color = lower + (idx * num_colors) // num_nodes
        assert node.color in range(lower, upper + 1, 1)
    # Non-differentiated nodes should have the same color
    prev_node = None
    for node in nodes_sorted:
        if prev_node and differentiator(node) == differentiator(prev_node):
            node.color = prev_node.color
        prev_node = node
        node.fontcolor = "black" if node.color <= 5 else "white"
Exemple #4
0
    def expand(self, t=INFINITY):
        """
        Increments 'self.fully_expanded', then expands the entire tree to a
        depth of 'self.fully_expanded' in the time alloted. If incomplete, just
        leaves as is.
        :param t: int <- time limit in seconds
        :return: None
        """

        stop_time = time.time() + t

        if self.caught_up:
            self.fully_expanded += 1
            self.tree_depth = max(self.tree_depth, self.fully_expanded)
            self.caught_up = False

        for node in anytree.LevelOrderIter(self.game_tree):
            if node.depth >= self.fully_expanded:
                break
            if time.time() > stop_time:
                return
            if node.is_leaf:
                self.expand_node(node)

        self.caught_up = True
def visualize_hierarchical(args, features):
    """Visualize hierarchical feature importance results"""
    tree = features[0].root
    opts = SimpleNamespace(output_dir=args.output_dir,
                           effect_name="Importance Score",
                           color_scheme="ylorrd9",
                           color_range=[1, 9],
                           sorting_param=constants.EFFECT_SIZE,
                           minimal_labels=False,
                           rectangle_leaves=True)
    nodes = {}
    for node in anytree.LevelOrderIter(tree):
        if node.important or node.name == constants.DUMMY_ROOT:
            parent = nodes[node.parent.name] if node.parent else None
            newnode = anytree.Node(node.name,
                                   parent=parent,
                                   description=node.description,
                                   effect_size=node.effect_size,
                                   was_leaf=node.is_leaf)
            nodes[newnode.name] = newnode
    if len(nodes) <= 1:
        print(
            "No important features identified, skipping feature importance hierarchy visualization."
        )
        return
    newtree = next(iter(nodes.values())).root  # identify root
    if newtree.name == constants.DUMMY_ROOT and len(newtree.children) == 1:
        # Get rid of dummy node if not required to maintain tree
        newtree = newtree.children[0]
        newtree.parent = None
    color_nodes(opts, newtree)
    render_tree(opts, newtree)
Exemple #6
0
def get_non_continuous_min_dep(root):
    """ Finds minimal LHS combinations on a tree of a non-continuous
    RHS. All node.name on the tree need to be lists for this function
    to work properly, excluding the root.

    Keyword Arguments:
    root -- root of a tree where minimal LHS combinations are to be
    searched. all children's names need to be a list.
    strategy -- 'greedy' or 'complete'. Strategy with which tree has been
    created. """
    candidates = {}
    if root.search_strategy == 'complete':
        for node in tree.LevelOrderIter(root.children[0]):
            if node.score >= node.parent.score*0.98:
                candidates[tuple(node.name)] = node.score
                parent = node.parent
                if not parent.is_root:
                    try:
                        del candidates[tuple(node.parent.name)]
                    except KeyError:
                        pass
    elif root.search_strategy == 'greedy':
        parent_is_minimal = True
        newest_children = root.get_newest_children()
        for child in newest_children:
            if child.score >= 0.98*child.parent.score:
                candidates[tuple(child.name)] = child.score
                parent_is_minimal = False
        parent = newest_children[0].parent
        if parent_is_minimal and (not parent.is_root):
            candidates[tuple(parent.name)] = parent.score
    return candidates
Exemple #7
0
 def _make_leafs_lvl(self):
     leafs_list = []
     leafs_lvl = []
     for node in anytree.LevelOrderIter(self.get_root()):
         if node.is_leaf:
             leafs_list.append(node.name.get_leaf()[:2])
             leafs_lvl.append(node.depth + 1)
     return leafs_list, leafs_lvl
Exemple #8
0
def prune_tree_on_effect_size(args, tree):
    """Prune tree by thresholding on effect size"""
    if not tree.effect_size:
        return  # No effect_size column in input file
    effect_size_threshold = tree.effect_size * (
        1 + args.tree_effect_size_threshold)
    for node in anytree.LevelOrderIter(tree):
        if node.effect_size > effect_size_threshold:
            node.parent = None
Exemple #9
0
 def compute_link_transformations(
         self,
         joint_values: Optional[Dict[str,
                                     float]] = None) -> Dict[str, tuple]:
     """ Compute transformations of all links for given joint values and return them in a dictionary in which link
     name serves as a key and link pose is a value. """
     if joint_values is None:
         joint_values = {}
     link_transforms = {}
     for link in anytree.LevelOrderIter(self.root_node):  # type: Link
         if link.is_root:
             link_transforms[link.name] = self.root_pose
             continue
         parent_transform = link_transforms[link.parent.name]
         joint_value = joint_values.get(link.joint_from_parent.name, None)
         relative_pose = link.joint_from_parent.transformation_from_parent_to_child_link(
             joint_value)
         link_transforms[link.name] = multiply_transformations(
             parent_transform, relative_pose)
     return link_transforms
Exemple #10
0
def color_nodes(opts, tree):
    """Add fill and font color to nodes based on partition in sorted list"""
    nodes_sorted = sorted(
        anytree.LevelOrderIter(tree),
        key=lambda node: node.effect_size)  # sort nodes for color grading
    num_nodes = len(nodes_sorted)
    lower, upper = opts.color_range
    num_colors = upper - lower + 1
    assert 1 <= lower <= upper <= 9
    for idx, node in enumerate(nodes_sorted):
        node = nodes_sorted[idx]
        node.color = idx + lower
        if num_nodes > num_colors:
            node.color = lower + (idx * num_colors) // num_nodes
        assert node.color in range(lower, upper + 1, 1)
    # Non-differentiated nodes should have the same color
    prev_node = None
    for node in nodes_sorted:
        if prev_node and node.effect_size == prev_node.effect_size:
            node.color = prev_node.color
        prev_node = node
        node.fontcolor = "black" if node.color <= 5 else "white"
Exemple #11
0
def generate_tree_of_rejected_hypotheses(args, logger, tree):
    """Generate tree of rejected hypotheses with colour grading based on adjusted p-value"""
    # Generate tree of rejected hypotheses
    if not tree.rejected:
        logger.warn(
            "No hypothesis rejected, so no tree will be generated. If this is unexpected, check your input p-values"
        )
        return
    nodes = {}
    for node in anytree.LevelOrderIter(tree):
        if node.rejected:
            parent = nodes[node.parent.name] if node.parent else None
            newnode = anytree.Node(node.name,
                                   parent=parent,
                                   adjusted_pvalue=node.adjusted_pvalue,
                                   description=node.description,
                                   effect_size=node.effect_size,
                                   was_leaf=node.is_leaf)
            nodes[newnode.name] = newnode
    newtree = next(iter(nodes.values())).root  # identify root
    prune_tree_on_effect_size(args, newtree)
    color_nodes(args, newtree)
    render_tree(args, newtree)
Exemple #12
0
 def nodes_by_bfs(self):
     if self.is_empty:
         return
     for node in anytree.LevelOrderIter(self.root_node):  # 广度优先
         yield node
Exemple #13
0
df_types = df_types[['short_name', 'coretype', 'primary', 'secondary',
                     'cre', 'major_dissection', 'layer_dissectoin']]

###############################################################################

# create breath-first binary search tree with tasic clusters
tree_dict = {'label': 'root'}  # base dictionary

# parses binary node positions into a dictionary with tree structure
for label, bin_str in zip(dendro['cluster'], dendro['position']):
    parse_binary(tree_dict, bin_str, label)

importer = DictImporter()
tree = importer.import_(tree_dict)

for ind, node in enumerate(anytree.LevelOrderIter(tree)):
    node.pos = ind + 1  # node index starting at one
    if node.is_leaf:
        node.name = str(ind + 1) + " " + node.label
    else:
        node.name = str(ind + 1)

DotExporter(tree).to_picture("dendro.png")

for leaf in tree.leaves:  # TODO: phenotypes & deal with mismatching
    # edge cases for the last two endothelial cells
    if leaf.label == 'Endo Tbc1d4':
        leaf.cluster_id = 'f48'
    elif leaf.label == 'Endo Myl9':
        leaf.cluster_id = 'f49'
    else:
Exemple #14
0
    def run_task(self, current_task):
        for task in anytree.LevelOrderIter(
                current_task, maxlevel=1
        ):  # We ALWAYS have maxlevel = 1 even if there are nested things because it handles all nested children recursively and we don't want the tree iterator to find them. This is sorta stupid instead of just giving the tree itself at the top  node.
            # If the function is not the root execute function, go ahead and run it. Can't run execute this way because it doesn't have a parent.
            if not task.function.__name__ == 'execute':
                # Set task_dirs and cur_dirs based on tree position
                if task.parent.type == 'task':
                    if task.parent is not None and getattr(
                            task.parent, 'task_dir', None):
                        if getattr(task, 'task_dir', None):
                            L.critical(
                                'Logic of task_dirs does not make sense here. In particular, a parent was given an explicit task_dir AND its child task was too instead of being derived'
                            )
                        else:
                            task.task_dir = os.path.join(
                                task.parent.task_dir, task.function.__name__)
                    elif isinstance(task, (InputTask, GeneratedInputTask)):
                        pass
                        # Skip, cause it should have been set in init of InputTask.
                    else:
                        if getattr(task, 'task_dir_override', None):
                            pass  # Expected outcome if given an override. This case is for when you want to specify a task look somewhere else. Though note it is hard then to make the override programatically determined.
                        else:
                            task.task_dir = os.path.join(
                                self.intermediate_dir, task.function.__name__)
                    self.cur_dir = task.task_dir
                elif task.parent.type == 'iterator':
                    task.task_dir = os.path.join(self.cur_dir_parent_dir,
                                                 task.name)
                    self.cur_dir = task.task_dir
                else:
                    raise NameError('Unknown Node type')

                # Set the project level task_dirs object to have an attribute equal to the current name. This makes it possible for functions later in the analysis  script to have access to
                # previous task_dir locations.
                setattr(self, task.name + '_dir', task.task_dir)

                # In addition to self.cur_dir, there are also these two project-level convenience funcitons.
                self.cur_task = task
                self.run_this = task.run  # NYI, task skipping enabled here.
                if isinstance(task, (hb.InputTask, GeneratedInputTask)):
                    self.skip_existing = 0  # Don't want to skip InputTasks because these have internal logig for what to skip.
                else:
                    self.skip_existing = task.skip_existing

                if self.skip_existing:
                    if os.path.exists(self.cur_dir):
                        self.run_this = 0

                if not os.path.exists(
                        self.cur_dir) and task.creates_dir and task.run:
                    hb.create_directories(self.cur_dir)

                # # NYI, but I want to implement task-level logging conditionals.
                # L.setLevel(task.logging_level)
                if task.type == 'task':
                    if self.run_this:
                        if task.creates_dir:
                            hb.create_directories(self.cur_dir)
                            assert os.path.exists(self.cur_dir)

                        # If the task's parent is an iterator, we want to report different info, otherwise these are the same.
                        if task.parent.type == 'iterator':
                            r = task.function(self)
                        elif isinstance(task, InputTask):
                            self.prepend = ''
                            L.info(self.prepend + 'Running InputTask ' +
                                   str(task.name) + ' in dir ' +
                                   str(self.cur_dir))
                            task.function(
                            )  # Running the Task including anyting in p.run_this
                        else:
                            self.prepend = ''
                            L.info(self.prepend + 'Running task ' +
                                   str(task.name) + ' in dir ' +
                                   str(self.cur_dir))
                            task.function(
                            )  # Running the Task including anyting in p.run_this

                    # NYI, task skipping enabled here.
                    else:

                        if os.path.isdir(self.cur_dir):
                            if task.run:
                                if task.parent.type == 'iterator':
                                    L.info('Skipping task ' + str(task.name) +
                                           ' because dir existed at ' +
                                           str(self.cur_dir))
                                    r = task.function(self)
                                elif isinstance(task, InputTask):
                                    self.prepend = ''
                                    L.info(self.prepend +
                                           'Running InputTask ' +
                                           str(task.name) + ' in dir ' +
                                           str(self.cur_dir))
                                    task.function(
                                    )  # Running the Task including anyting in p.run_this
                                else:
                                    self.prepend = ''
                                    L.info(self.prepend + 'Skipping task ' +
                                           str(task.name) + ' in dir ' +
                                           str(self.cur_dir))
                                    task.function(
                                    )  # Running the Task including anyting in p.run_this

                            else:
                                L.info('Instructed to skip task ' +
                                       str(task.name) + ' and loading from ' +
                                       str(self.cur_dir))
                                if task.parent.type == 'iterator':
                                    r = task.function(self)
                                else:
                                    self.prepend = ''
                                    task.function(
                                    )  # Running the Task including anyting in p.run_this

                            # # CALL THE TASK FUNCTION
                            # task.function()  # Running the Task EXCLUDING anyting in p.run_this

                        else:
                            L.info('Running task ' + str(task.name) +
                                   ' and loading from ' + str(self.cur_dir))
                            if task.parent.type == 'iterator':
                                r = task.function(self)
                            elif isinstance(task, InputTask):
                                self.prepend = ''
                                L.info(self.prepend + 'Running InputTask ' +
                                       str(task.name) + ' in dir ' +
                                       str(self.cur_dir))
                                task.function(
                                )  # Running the Task including anyting in p.run_this

                            else:
                                self.prepend = ''
                                task.function(
                                )  # Running the Task including anyting in p.run_this

                elif task.type == 'iterator':
                    self.prepend += '    '
                    L.info('Creating iterator ' + str(task.name))

                    # Run the function for defining the iterator
                    if task.run:

                        # HACK, I failed to understand why sometiems the dirs weren't created in time. Thus I force it here.
                        hb.create_directories(self.cur_dir)
                        assert os.path.exists(self.cur_dir)

                        task.function()

                    else:
                        # NYI, task skipping enabled here.
                        L.info('Skipping running Iterator.')
                        task.function()

            # Whether run or not, search for children
            if len(task.children) > 0:

                # If the current task is an iterator, then check for replacements before calling the child task.
                # Definition of the projects' self.iterator_replacements is the one part of ProjectFlow that the analysis script needs to be aware of,
                # creating a dict of key-value pairs that are replaced with each step in the iterator.
                if task.type == 'iterator' and task.run:

                    # First check dimensions of iterator_replacements:
                    replacement_lengths = []
                    for replacement_attribute_name, replacement_attribute_value in self.iterator_replacements.items(
                    ):
                        replacement_lengths.append(
                            len(replacement_attribute_value))
                        assert (len(set(replacement_lengths)) == 1
                                )  # Check that all of the same size.
                    num_iterations = replacement_lengths[0]

                    self.run_in_parallel = True  # TODOO Connect to UI
                    if not getattr(self, 'num_workers', None):
                        self.num_workers = 11
                    if self.run_in_parallel:
                        # OPTIMIZATION NOTE: It's slow to spawn 460 processes when they are just going to be skipped, thus run_this for iterators needs to be improved.
                        worker_pool = multiprocessing.Pool(
                            self.num_workers
                        )  # NOTE, worker pool and results are LOCAL variabes so that they aren't pickled when we pass the project object.
                    results = []

                    # Once all the iterations are done, iterate through the stored results and call their get functions, which blocks running past this point until all are done.
                    # SUPER CONFUSING POINT. the project object will be modified independently by all tasks. Cant think of a good way ro rejoin them
                    returns_from_parallel_tasks = []

                    iterations_start = 0
                    total_iteration_counter = 0
                    num_processes_to_spawn_simultaneously = 2000  # Memory issue after ~10,000?
                    # for thread_counter in range(1000):
                    for iteration_outer_counter in range(0, num_iterations):
                        if iterations_start + num_processes_to_spawn_simultaneously < num_iterations:
                            iterations_stop = iterations_start + num_processes_to_spawn_simultaneously
                        else:
                            iterations_stop = num_iterations
                        # Iterate through all children of the iterator with new replacement values.
                        for iteration_counter in range(iterations_start,
                                                       iterations_stop):
                            # for iteration_counter in range(num_iterations):

                            # NOTICE strange dimensionality here: even within a single iteration, we have to iterate through self.iterator_replacements because we might have more than 1 var that needs replacing
                            replacements = OrderedDict()
                            for replacement_attribute_name, replacement_attribute_values in self.iterator_replacements.items(
                            ):
                                current_replacement_value = self.iterator_replacements[
                                    replacement_attribute_name][
                                        iteration_counter]
                                replacements[
                                    replacement_attribute_name] = replacement_attribute_values
                                setattr(self, replacement_attribute_name,
                                        current_replacement_value)
                                if replacement_attribute_name == 'cur_dir_parent_dir':
                                    setattr(self, 'cur_dir',
                                            current_replacement_value)
                                project_copy = copy.copy(
                                    self
                                )  # Freeze it in place (necessary for parallelizing)

                                # For multiprocessing, you cannot pickle a Gdal DS or Band, so I manually unset them here. For some reason, using the k.close_data corrupted the geotiff headers
                                for i, k in project_copy.__dict__.items():
                                    if type(k) in [
                                            hb.GlobalPyramidFrame,
                                            hb.ArrayFrame
                                    ]:
                                        k.band = None
                                        k.ds = None
                                        # k.close_data()
                            if self.run_in_parallel:
                                L.info('Initializing PARALLEL task ' +
                                       str(iteration_counter) + ' ' +
                                       task.name + ' with replacements: ' +
                                       str(replacements))

                                # We use apply_async, which immediately lets the next line calculate. It is blocked below with results.get()
                                result = worker_pool.apply_async(
                                    func=run_iterator_in_parallel,
                                    args=(project_copy, task,
                                          iteration_counter))

                                results.append(result)

                            else:
                                print('bork')
                                raise
                                # L.info('Starting task ' + str(iteration_counter) + ' ' + task.name + ' while replacing ' + str(replacement_attribute_name))
                                # for child in task.children:
                                #     self.run_task(child)
                            iterations_start = iterations_stop
                            # worker_pool.close()
                            # worker_pool.join()

                        for i in results:
                            for j in i.get():
                                if j is not None:
                                    returns_from_parallel_tasks.append(j)
                        for i in returns_from_parallel_tasks:
                            if i[1] == 'append_to_list':
                                if isinstance(i[2], list):
                                    getattr(self, i[0]).extend(i[2])
                                else:
                                    getattr(self, i[0]).append(i[2])
                            # p.layers_to_stitch.append(5)

                    # Task is an iterator's child
                    else:
                        if task.run:
                            for child in task.children:
                                self.run_task(
                                    child
                                )  # Run the child found by iterating the task-node's children

                # Task is not an iterator, thus we just call it's child directly
                elif task.parent is not None:
                    if task.parent.type is 'iterator':
                        for child in task.children:
                            self.run_task(
                                child
                            )  # Run the child found by iterating the task-node's children
                else:
                    for child in task.children:
                        self.run_task(
                            child
                        )  # Run the child found by iterating the task-node's children

                    # raise NameError('wtf')
        try:
            if (len(r)) > 0:
                return r
        except:
            'nothing needed returning'