Ejemplo n.º 1
0
    def get_obj_ref_graph(self,
                          *obj_ids: IdT,
                          direction=archives.OUTGOING,
                          max_dist: int = None) -> networkx.DiGraph:
        obj_ids = set(obj_ids)
        graph = self._archive.get_obj_ref_graph(*obj_ids,
                                                direction=direction,
                                                max_dist=max_dist)

        # If there is a transaction then we should fix up the graph to contain information from that
        # too
        trans = self._historian.current_transaction(
        )  # type: transactions.Transaction
        if trans is not None:
            _update_from_transaction(graph, trans)

            # Now cull all the nodes not reachable from the nodes of interest

            # Now, get the subgraph we're interested in
            reachable = set()
            for obj_id in obj_ids:
                if direction == archives.OUTGOING:
                    reachable.update(dag.descendants(graph, obj_id))
                else:
                    reachable.update(dag.ancestors(graph, obj_id))

            # Remove all non-reachable nodes except obj_ids as these can stay even if they have no
            # edges
            graph.remove_nodes_from(set(graph.nodes) - obj_ids - reachable)

        return graph
Ejemplo n.º 2
0
def _run(execution, session, task_queue):
    """
    Do the execution!
    """
    execution.log.info('Executing TaskGraph')

    available_cores = True
    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, execution)
            available_cores = False

        for task in _process_finished_tasks(execution.jobmanager):
            if task.status == TaskStatus.failed and task.must_succeed:
                # pop all descendents when a task fails
                task_queue.remove_nodes_from(descendants(task_queue, task))
                task_queue.remove_node(task)
                execution.status = ExecutionStatus.failed_but_running
                execution.log.info('%s tasks left in the queue' %
                                   len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError(
                    'Unexpected finished task status %s for %s' %
                    (task.status, task))
            available_cores = True

        # only commit Task changes after processing a batch of finished ones
        session.commit()
        time.sleep(.3)
Ejemplo n.º 3
0
def network_to_echart(write_to_file: bool = False, layout: bool = False):
    nodes = []
    no_nodes = len(LANGS_NETWORK.nodes)
    for node in LANGS_NETWORK.nodes:
        lang_name = node.split('-')[0]
        no_ancestors = len(ancestors(LANGS_NETWORK, node))
        no_descendants = len(descendants(LANGS_NETWORK, node))
        size = min(
            20,
            max(2, ((no_ancestors / no_nodes) * 100 +
                    (no_descendants / no_nodes) * 100)))
        node = {
            'name': node,
            'symbolSize': size,
            'id': node,
            'category': lang_name
        }
        nodes.append(node)
    nodes.sort(key=lambda x: x['name'])
    edges = []
    for edge in LANGS_NETWORK.edges:
        edges.append({'source': edge[0], 'target': edge[1]})
    if write_to_file:
        with open(
                os.path.join(os.path.dirname(static_file),
                             'languages-network.json'), 'w') as f:
            f.write(json.dumps({'nodes': nodes, 'edges': edges}))
        LOGGER.info(f'Wrote network nodes and edges to static file.')
    return nodes, edges
def outer_in_graph_iter(
    g: nx.DiGraph,
    c: Optional[nx.DiGraph] = None
) -> Generator[Tuple[Set[nxGraphNodeID], Set[nxGraphNodeID]], None, None]:
    """For a directed graph with unique node IDs with type int, iterates
    from outer / leafmost / least depended upon nodes to inner nodes
    yielding sets of node IDs. Optionally, takes a precomputed condensed
    DAG of g.

    Properties:

    * yields each node ID once
    * successive node ID sets only depend on/point to previously visited
    nodes or other nodes within their set
    """
    if len(g.nodes) == 0:
        raise StopIteration("graph has no nodes")

    # > C – The condensation graph C of G. The node labels are integers
    # > corresponding to the index of the component in the list of strongly
    # > connected components of G. C has a graph attribute named ‘mapping’ with
    # > a dictionary mapping the original nodes to the nodes in C to which they
    # > belong. Each node in C also has a node attribute ‘members’ with the set
    # > of original nodes in G that form the SCC that the node in C represents.
    #
    # https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.condensation.html#networkx.algorithms.components.condensation
    if not c:
        c = condensation(g)
    assert is_directed_acyclic_graph(c)
    for scc_ids in outer_in_dag_iter(c):
        descendant_scc_ids: Set[int] = set()
        descendant_scc_ids.update(
            *[descendants(c, scc_id) for scc_id in scc_ids])
        yield scc_ids_to_graph_node_ids(c, scc_ids), scc_ids_to_graph_node_ids(
            c, descendant_scc_ids)
Ejemplo n.º 5
0
def _run(execution, session, task_queue):
    """
    Do the execution!
    """
    execution.log.info('Executing TaskGraph')

    available_cores = True
    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, execution)
            available_cores = False

        for task in _process_finished_tasks(execution.jobmanager):
            if task.status == TaskStatus.failed and task.must_succeed:
                # pop all descendents when a task fails
                task_queue.remove_nodes_from(descendants(task_queue, task))
                task_queue.remove_node(task)
                execution.status = ExecutionStatus.failed_but_running
                execution.log.info('%s tasks left in the queue' % len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError('Unexpected finished task status %s for %s' % (task.status, task))
            available_cores = True

        # only commit Task changes after processing a batch of finished ones
        session.commit()
        time.sleep(.3)
Ejemplo n.º 6
0
def build_hierarchy_matrix(go_dag, goids, h=None):
    """
    *goids*: the leaf terms to use to get a sub-graph of the DAG.
        All ancestor terms will be included in the DAG
    """

    # UPDATE: limit to only the GO terms in R
    print(
        "Limiting DAG to only the %d %s GO terms that have at least 1 annotation (assuming annotations already propagated up the DAG)"
        % (len(goids), h))
    ancestor_goids = set()
    for goid in goids:
        # if we already have the ancestors of this goid, then skip
        if goid in ancestor_goids:
            continue
        ancestor_goids.update(descendants(go_dag, goid))
    ancestor_goids.update(goids)
    goids_list = sorted(ancestor_goids)

    G = nx.subgraph(go_dag, ancestor_goids)
    if h is not None:
        print("\t%s DAG has %d nodes and %d edges" %
              (h, G.number_of_nodes(), G.number_of_edges()))
    else:
        print("\thierarchy DAG has %d nodes and %d edges" %
              (h, G.number_of_nodes(), G.number_of_edges()))

    # convert the GO DAG to a sparse matrix, while maintaining the order of goids so it matches with the annotation matrix
    dag_matrix = nx.to_scipy_sparse_matrix(G, nodelist=goids_list, weight=None)

    return dag_matrix, goids_list
Ejemplo n.º 7
0
def _run(workflow, session, task_queue):
    """
    Do the workflow!
    """
    workflow.log.info('Executing TaskGraph')
    available_cores = True

    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, workflow)
            available_cores = False

        for task in _process_finished_tasks(workflow.jobmanager):
            if task.status == TaskStatus.failed and not task.must_succeed:
                pass  # it's ok if the task failed

            elif task.status == TaskStatus.failed and task.must_succeed:

                if workflow.info['fail_fast']:
                    workflow.log.info(
                        '%s Exiting run loop at first Task failure, exit_status: %s: %s',
                        workflow, task.exit_status, task)
                    workflow.terminate(due_to_failure=True)
                    return

                # pop all descendents when a task fails; the rest of the graph can still execute
                remove_nodes = descendants(task_queue, task).union({
                    task,
                })
                # graph_failed.add_edges(task_queue.subgraph(remove_nodes).edges())

                task_queue.remove_nodes_from(remove_nodes)
                workflow.status = WorkflowStatus.failed_but_running
                workflow.log.info('%s tasks left in the queue' %
                                  len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError(
                    'Unexpected finished task status %s for %s' %
                    (task.status, task))
            available_cores = True

        # only commit Task changes after processing a batch of finished ones
        session.commit()

        # conveniently, this returns early if we catch a signal
        time.sleep(workflow.jobmanager.poll_interval)

        if workflow.termination_signal:
            workflow.log.info(
                '%s Early termination requested (%d): stopping workflow',
                workflow, workflow.termination_signal)
            workflow.terminate(due_to_failure=False)
            return
Ejemplo n.º 8
0
def centrality_ancestor(dg):
    """
    dmargo has edges swapped? DUNNO in his paper, == total # of descendents

    in our graph == total # of ancestors
    """
    V = float(len(dg.nodes()))
    return dict((node, len(descendants(dg, node)) / V) for node in dg.nodes())
Ejemplo n.º 9
0
def outgraph(g, source, distance=None):
    outs = descendants(g, source)
    if distance is not None:
        outs = {
            o
            for o in outs
            if shortest_path_length(g, source=source, target=o) <= distance
        }
    return g.subgraph(outs | {source})
Ejemplo n.º 10
0
def G2intervention(G):
    intervention_matrix = pd.DataFrame(np.zeros(
        [len(G.nodes()), len(G.nodes())]),
                                       index=list(G.nodes()),
                                       columns=list(G.nodes()))
    for i in list(G.nodes()):
        for j in descendants(G, i):
            intervention_matrix.loc[j, i] = 1
    return intervention_matrix
Ejemplo n.º 11
0
    def _check_valid_adjustment_set_(self, graph, adjustment_set):
        """Checks the adjustment set as valid using the following 6 steps
        Step 1) check no descendants of X are included in adjustment set
        Step 2) delete variables that meet certain definitions
        Step 3) delete all arrows that originate from exposure
        Step 4) connect all source nodes (to assess for collider stratification)
        Step 5) convert to undirected graph
        Step 6) check whether a path exists between exposure & outcome
        """
        dag = graph.copy()

        # List of all nodes valid for adjustment
        all_nodes = list(dag.nodes())
        all_nodes.remove(self.exposure)
        all_nodes.remove(self.outcome)

        # Step 1) Check no descendants of X
        desc_x = descendants(dag, self.exposure)
        if desc_x & set(adjustment_set):
            return False

        # Step 2) Delete all variables that: (a) non-ancestors of X, (b) non-ancestors of Y, (c) non-ancestors
        #         of adjustment set
        set_check = set(adjustment_set).union([self.exposure, self.outcome])
        set_remove = set(dag.nodes)
        for n in set_check:
            set_remove = set_remove & (dag.nodes - ancestors(dag, n))
        set_remove = set_remove - set([self.exposure, self.outcome
                                       ]) - set(adjustment_set)
        dag.remove_nodes_from(set_remove)

        # Step 3) Delete all arrows with X as the source
        for endpoint in list(dag.successors(self.exposure)):
            dag.remove_edge(self.exposure, endpoint)

        # Step 4) Directly connect all source nodes pointing to same endpoint (for collider assessment)
        for n in dag:
            sources = list(dag.predecessors(n))
            if len(sources) > 1:
                for s1, s2 in combinations(sources, 2):
                    if not (dag.has_edge(s2, s1) or dag.has_edge(s1, s2)):
                        dag.add_edge(s1, s2)

        # Step 5) Remove arrow directionality
        uag = dag.to_undirected()

        # Step 6) Remove nodes from the adjustment set
        uag.remove_nodes_from(adjustment_set)

        # Checking whether a a path between X and Y exists now
        if nx.has_path(uag, self.exposure, self.outcome):
            return False
        else:
            return True
Ejemplo n.º 12
0
    def backprop_iterator(self, start_tensor_name):
        self._finalise_graph()
        start_node = self._get_node_by_ouput_tensor(start_tensor_name)
        yield self.nodes[start_node]

        backprop_graph = self._get_graph_without_attributes().reverse()
        contributing_nodes = descendants(backprop_graph, start_node)

        for node in nx.topological_sort(backprop_graph):
            if node in contributing_nodes:
                yield self.nodes[node]
Ejemplo n.º 13
0
def get_marker_prev(dom_clade, clade_genome, tax_graph):
    """
    Determining the markers' prevalence across all genomes for the dominant clade.
    Args:
      dom_clade : tax_id
    """
    # all genomes in the dominant clade    
    all_clade_genomes = [desc for desc in descendants(tax_graph, dom_clade[-1])]
    # all genomes for the cluster in the clade
    cluster_clade_genomes = clade_genome[dom_clade]
    prev = len(cluster_clade_genomes) / float(len(all_clade_genomes))
    return prev
Ejemplo n.º 14
0
def satisfies_backdoor_criteria(dag, X, outcome, S):
    # follows https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-8-70#Fig4
    # test this on the M graph

    S = set(S)
    dag = dag.copy()

    # step 1:
    # The covariates chosen to reduce bias should not be descendants of X
    descendants_X = descendants(dag, X)

    if descendants_X & S:
      return False

    # step 2:
    # Delete all variables that satisfy all of the following:
    # 1) non-ancestors (an ancestor is a variable that causes another variable either directly or indirectly) of X,
    # 2) non-ancestors of the Outcome and
    # 3) non-ancestors of the covariates that one is including to reduce bias

    nodes_to_check = S.union([X, outcome])
    nodes_to_remove = set(dag.nodes)
    for node in nodes_to_check:
        nodes_to_remove = nodes_to_remove & (dag.nodes - ancestors(dag, node))

    nodes_to_remove = nodes_to_remove - set([X, outcome]) - S
    dag.remove_nodes_from(nodes_to_remove)

    # step 3:
    # Delete all lines emanating from X
    for child in list(dag.successors(X)):
        dag.remove_edge(X, child)

    # step 4:
    # Connect any two parents (direct causes of a variable) sharing a common child (this step appears simple but it requires practice not to miss any)
    for node in dag:
        parents = list(dag.predecessors(node))
        if len(parents) > 1:
            for a, b in combinations(parents, 2):
                if not (dag.has_edge(a, b) or dag.has_edge(b, a)):
                    # order doesn't matter, as the next step removes all direction
                    dag.add_edge(a, b)


    # Step 5: Strip all arrowheads from lines
    g = dag.to_undirected()

    # Step 6 : Delete all lines between the covariates in the model and any other variables
    g.remove_nodes_from(S)

    # Finally: does there exist a path between X and outcome?
    # If so, then we failed the criteria.
    return not nx.has_path(g, X, outcome)
Ejemplo n.º 15
0
def get_descendant(nodes, parent_name):
    contain_count = 1
    for descendant in nodes:
        edge_count = graph.number_of_edges(parent_name, descendant)
        nested_descendants = list(dag.descendants(graph, descendant))

        if not nested_descendants:
            contain_count += edge_count
        else:
            contain_count += edge_count * get_descendant(nested_descendants, descendant)
        continue

    return contain_count
Ejemplo n.º 16
0
def _run(workflow, session, task_queue):
    """
    Do the workflow!
    """
    workflow.log.info('Executing TaskGraph')

    # graph_failed = nx.DiGraph()
    #
    # def handler(signal, frame):
    #     task_queue.add_edges(graph_failed.edges())
    #     for task in graph_failed.nodes():
    #         task.attempt +=1
    #         task.status = TaskStatus.no_attempt
    #     graph_failed.remove_nodes_from(graph_failed.nodes())

    # signal.signal(signal.SIGUSR1, handler)

    available_cores = True
    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, workflow)
            available_cores = False

        for task in _process_finished_tasks(workflow.jobmanager):
            if task.status == TaskStatus.failed and task.must_succeed:
                # pop all descendents when a task fails; the rest of the graph can still execute
                remove_nodes = descendants(task_queue, task).union({
                    task,
                })
                # graph_failed.add_edges(task_queue.subgraph(remove_nodes).edges())

                task_queue.remove_nodes_from(remove_nodes)
                workflow.status = WorkflowStatus.failed_but_running
                workflow.log.info('%s tasks left in the queue' %
                                  len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError(
                    'Unexpected finished task status %s for %s' %
                    (task.status, task))
            available_cores = True

        # only commit Task changes after processing a batch of finished ones
        session.commit()
        time.sleep(.3)
Ejemplo n.º 17
0
def _run(workflow, session, task_queue):
    """
    Do the workflow!
    """
    workflow.log.info('Executing TaskGraph')
    available_cores = True

    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, workflow)
            available_cores = False

        for task in _process_finished_tasks(workflow.jobmanager):
            if task.status == TaskStatus.failed and task.must_succeed:

                if workflow.info['fail_fast']:
                    workflow.log.info('%s Exiting run loop at first Task failure, exit_status: %s: %s',
                                      workflow, task.exit_status, task)
                    workflow.terminate(due_to_failure=True)
                    return

                # pop all descendents when a task fails; the rest of the graph can still execute
                remove_nodes = descendants(task_queue, task).union({task, })
                # graph_failed.add_edges(task_queue.subgraph(remove_nodes).edges())

                task_queue.remove_nodes_from(remove_nodes)
                workflow.status = WorkflowStatus.failed_but_running
                workflow.log.info('%s tasks left in the queue' % len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError('Unexpected finished task status %s for %s' % (task.status, task))
            available_cores = True

        # only commit Task changes after processing a batch of finished ones
        session.commit()

        # conveniently, this returns early if we catch a signal
        time.sleep(workflow.jobmanager.poll_interval)

        if workflow.termination_signal:
            workflow.log.info('%s Early termination requested (%d): stopping workflow',
                              workflow, workflow.termination_signal)
            workflow.terminate(due_to_failure=False)
            return
Ejemplo n.º 18
0
    def connected_subgraph(self, node):
        """Returns the subgraph containing the given node, its ancestors, and
        its descendants.

        Parameters
        ----------
        node: str
            We want to create the subgraph containing this node.

        Returns
        -------
        subgraph: networkx.DiGraph
            The subgraph containing the specified node.
        """
        G = self.G

        subgraph_nodes = set()
        subgraph_nodes.add(node)
        subgraph_nodes.update(dag.ancestors(G, node))
        subgraph_nodes.update(dag.descendants(G, node))

        # Keep adding the ancesotrs and descendants on nodes of the graph
        # until we can't do so any longer
        graph_changed = True
        while graph_changed:
            initial_count = len(subgraph_nodes)

            old_nodes = set(subgraph_nodes)
            for n in old_nodes:
                subgraph_nodes.update(dag.ancestors(G, n))
                subgraph_nodes.update(dag.descendants(G, n))

            current_count = len(subgraph_nodes)
            graph_changed = current_count > initial_count

        return G.subgraph(subgraph_nodes)
Ejemplo n.º 19
0
    def connected_subgraph(self, node):
        """Returns the subgraph containing the given node, its ancestors, and
        its descendants.

        Parameters
        ----------
        node : str
            We want to create the subgraph containing this node.

        Returns
        -------
        subgraph : networkx.DiGraph
            The subgraph containing the specified node.
        """
        G = self.G

        subgraph_nodes = set()
        subgraph_nodes.add(node)
        subgraph_nodes.update(dag.ancestors(G, node))
        subgraph_nodes.update(dag.descendants(G, node))

        # Keep adding the ancesotrs and descendants on nodes of the graph
        # until we can't do so any longer
        graph_changed = True
        while graph_changed:
            initial_count = len(subgraph_nodes)

            old_nodes = set(subgraph_nodes)
            for n in old_nodes:
                subgraph_nodes.update(dag.ancestors(G, n))
                subgraph_nodes.update(dag.descendants(G, n))

            current_count = len(subgraph_nodes)
            graph_changed = current_count > initial_count

        return G.subgraph(subgraph_nodes)
Ejemplo n.º 20
0
def find_earliest_ancester(fsa, nd_set):
    for nd1 in nd_set:
        is_ancester = True
        for nd2 in nd_set:
            if nd1 == nd2:
                continue

            if nd1 in descendants(fsa, nd2):
                is_ancester = False
                break

        if is_ancester:
            return nd1

    return None  # there is a loop
def get_descendants(T=nx.DiGraph(), node):
    """
    Recupérer Tous les descendants du noeud node sur l'arbre T
    Parameters
    ----------
    T: nx.DiGraph()
      Le graphe orienté
    node: str
         Le noeud dont on recherche les descendants
    Returns
    -------
    list
        La liste des descendants
    """

    return list(dag.descendants(T, node))
Ejemplo n.º 22
0
	def build_graph(self):
		datalineage_graph = nx.DiGraph()
	
		for row in self.list_data[DATA_ROW_START_INDEX:]:
			target_node = None
			for idx, v in enumerate(row[BQ_TABLE_COLUMN_START_INDEX:]):  # find the output
				if v == 'O':
					target_node = self.list_data[0][BQ_TABLE_COLUMN_START_INDEX+idx]
					filter_full_match = True if (self.filter_raw != '(.*?)' and self.filter_pattern.fullmatch(target_node.split(':')[-1])) else False
					datalineage_graph.add_node(target_node, id=target_node, entity="node", value=1, size=10, filter_full_match=filter_full_match)
					break

			for idx, v in enumerate(row[BQ_TABLE_COLUMN_START_INDEX:]):  # find the input
				if v == 'I' and target_node:
					datalineage_graph.add_edge(target_node, self.list_data[0][BQ_TABLE_COLUMN_START_INDEX+idx],
						relationship="consumed_by", entity="link", value=1)

		edges = datalineage_graph.edges()
		node_names = datalineage_graph.nodes()

		filtered_nodes = set()
		node_color = {}

		#get nodes that match the pattern
		for node in datalineage_graph.nodes():
			if self.filter_pattern.fullmatch(node):
				self.target_nodes.add(node)



		#get all the Ancestors and descendants of the identified nodes from the digraph:
		for node in self.target_nodes:
			# print("Node: ", node)
			# print("Ancestors: ", ancestors(datalineage_graph, node))
			# print("Descendents: ", descendants(datalineage_graph, node))
			filtered_nodes.add(node)
			filtered_nodes.update(ancestors(datalineage_graph, node))
			filtered_nodes.update(descendants(datalineage_graph, node))


		self.G = datalineage_graph.subgraph(list(filtered_nodes))

		print('Node count: %s' % self.G.number_of_nodes())
		print('Edge count: %s' % self.G.number_of_edges())
   
		nx.write_graphml(self.G, "data_lineage.graphml")
Ejemplo n.º 23
0
def _query_tax(tax_queries,
               G,
               qtax,
               ttax,
               lca_frac=1.0,
               max_tips=100,
               verbose=False):
    """
    Querying list of taxonomic names
    """
    pid = os.getpid()
    idx = {}
    status = {'hit': 0, 'no hit': 0}
    # iterating queries
    for Q in tax_queries:
        tips = []
        try:
            # getting descendents of the node
            tips = [desc for desc in descendants(G[qtax], Q[0]) if \
                    G[qtax].nodes[desc]['taxonomy'] == 'strain']
            status['hit'] += 1
        except nx.exception.NetworkXError:
            status['no hit'] += 1
        # if tips, getting LCA in target-taxonomy
        n_tips = len(tips)
        if n_tips > 0:
            if n_tips > max_tips:
                tips = random.sample(tips, k=max_tips)
            LCA = lca_many_nodes(G[ttax], tips, lca_frac=lca_frac)
            idx[Q[1]] = LCA
        else:
            idx[Q[1]] = ['unclassified', 'NA', 'NA']
        # status
        x = status['hit'] + status['no hit']
        if verbose and x % 1000 == 0:
            frac = round(float(x) / len(tax_queries) * 100, 2)
            logging.info('PID{}: Queries processed: {} ({}%)'.format(
                pid, x, frac))
    # status
    msg = 'PID{}: Finished! Queries={}, Hits={}, No-Hits={}'
    logging.info(
        msg.format(pid, status['hit'] + status['no hit'], status['hit'],
                   status['no hit']))
    # return
    return idx
Ejemplo n.º 24
0
    def connected_subgraph(self, node):
        """Returns the subgraph containing the given node, its ancestors, and
        its descendants.

        Parameters
        ----------
        node: str
            We want to create the subgraph containing this node.

        Returns
        -------
        subgraph: networkx.DiGraph
            The subgraph containing the specified node.
        """
        G = self.G

        subgraph_nodes = set()
        subgraph_nodes.add(node)
        subgraph_nodes.update(dag.ancestors(G, node))
        subgraph_nodes.update(dag.descendants(G, node))
        return G.subgraph(subgraph_nodes)
Ejemplo n.º 25
0
def branch_separation(G, pos):
    # separates node coordinates into branches for coloring purposes (adjacent branches use different colors)

    # finds the ancestor node on depth 1, i.e. the first node of this branch
    def get_branch(node):
        if is_root(G, node):
            return (node)

        while not is_root(G, get_parent(G, node)):
            node = get_parent(G, node)
        return (node)

    root = get_root(G)
    root_children = list(get_children(G, root))
    # print('root children', list(root_children))

    branches = {child: {child: pos[child]} for child in root_children}
    branches[root] = {root: pos[root]}

    for child in root_children:
        sub_tree = descendants(G, child)
        for node in sub_tree:
            #print(child, 'node', node)
            branches[child][node] = pos[node]

    #print('branches', branches)

    branch_list = list(branches)
    pos_list = [branches[b] for b in branch_list]

    def sort_key(b):
        node = get_branch(list(b)[0])
        if is_root(G, node):
            return (-9999999)
        else:
            return (b[node][0])

    pos_list.sort(key=sort_key)
    return (pos_list)
Ejemplo n.º 26
0
def _run(workflow, session, task_queue, lethal_signals):
    """
    Do the workflow!
    """
    def signal_handler(signum, frame):
        workflow.log.critical(
            f"caught signal: {signum}, shutdown procedure will initiate shortly"
        )
        workflow.termination_signal = signum

    for sig in lethal_signals:
        # catch lethal signals (like a ctrl+c)
        signal.signal(sig, signal_handler)

    workflow.log.info("Executing TaskGraph")
    available_cores = True
    last_log_timestamp = time.time()

    while len(task_queue) > 0:
        if available_cores:
            _run_queued_and_ready_tasks(task_queue, workflow)
            available_cores = False

        for task in _process_finished_tasks(workflow.jobmanager):
            if task.status == TaskStatus.failed and not task.must_succeed:
                pass  # it's ok if the task failed

            elif task.status == TaskStatus.failed and task.must_succeed:

                if workflow.info["fail_fast"]:
                    workflow.log.info(
                        "%s Exiting run loop at first Task failure, exit_status: %s: %s",
                        workflow,
                        task.exit_status,
                        task,
                    )
                    workflow.terminate(due_to_failure=True)
                    return

                # pop all descendents when a task fails; the rest of the graph can still execute
                remove_nodes = descendants(task_queue, task).union({
                    task,
                })
                # graph_failed.add_edges(task_queue.subgraph(remove_nodes).edges())

                task_queue.remove_nodes_from(remove_nodes)
                workflow.status = WorkflowStatus.failed_but_running
                workflow.log.info("%s tasks left in the queue" %
                                  len(task_queue))
            elif task.status == TaskStatus.successful:
                # just pop this task
                task_queue.remove_node(task)
            elif task.status == TaskStatus.no_attempt:
                # the task must have failed, and is being reattempted
                pass
            else:
                raise AssertionError(
                    "Unexpected finished task status %s for %s" %
                    (task.status, task))
            available_cores = True
            last_log_timestamp = time.time()

        # only commit Task changes after processing a batch of finished ones
        session.commit()

        if last_log_timestamp + WORKFLOW_LOG_AWKWARD_SILENCE_INTERVAL < time.time(
        ):
            num_running = len(list(workflow.jobmanager.running_tasks))
            workflow.log.info(
                "Cosmos is still alive, just waiting on %d running_tasks, task_queue is len %d",
                num_running,
                len(task_queue),
            )

            last_log_timestamp = time.time()

        # conveniently, this returns early if we catch a signal
        time.sleep(workflow.jobmanager.poll_interval)

        if workflow.termination_signal:
            workflow.log.info(
                "%s Early termination requested (%d): stopping workflow",
                workflow,
                workflow.termination_signal,
            )
            workflow.terminate(due_to_failure=False)
            return
Ejemplo n.º 27
0
def fetch_parent_nodes(G, nodes):
    #寻找一个节点的直接相连的子代
    neighbor = set(G.neighbors(nodes))
    descendant = descendants(G, nodes)
    return neighbor.intersection(descendant)
Ejemplo n.º 28
0
                   0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 1, 0, 1, 0,
                   0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0,
                   0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
G_mat = pd.DataFrame(
    G_mat,
    index=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'],
    columns=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'])
G = nx.DiGraph(G_mat)

for i in ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']:
    print(i)
    print(descendants(G, i))
    #ReturnsThe descendants ofsourceinG
    print(len(descendants(G, i)))

nx.draw_networkx(G)


def fetch_parent_nodes(G, nodes):
    #寻找一个节点的直接相连的子代
    neighbor = set(G.neighbors(nodes))
    descendant = descendants(G, nodes)
    return neighbor.intersection(descendant)


def fetch_parent_step(G, nodes, step=2):
    #在食物链上一定步长上的子代
Ejemplo n.º 29
0
def return_descendant_nodes(node: str):
    ''' Return possible outputs for a given input
    '''
    return [x for x in descendants(LANGS_NETWORK, node)]
Ejemplo n.º 30
0
 def get_full_dependencies(self, obj):
     return descendants(self.g, obj)
Ejemplo n.º 31
0
        for text in descendants_text:
            descendant_name = get_match_from_string(text, descendant_regex, 1)
            descendant_count = int(get_match_from_string(text, descendant_regex, 0))
            descendants.append((descendant_name, descendant_count))
    for descendant_name, descendant_count in descendants:
        for i in range(descendant_count):
            edges.append((parent, descendant_name))

graph = nx.MultiDiGraph()
graph.add_edges_from(edges)
ancestors = dag.ancestors(graph, "shiny gold")

print(f"Part 1: {len(ancestors)}")


descendants = dag.descendants(graph, "shiny gold")

def get_descendant(nodes, parent_name):
    contain_count = 1
    for descendant in nodes:
        edge_count = graph.number_of_edges(parent_name, descendant)
        nested_descendants = list(dag.descendants(graph, descendant))

        if not nested_descendants:
            contain_count += edge_count
        else:
            contain_count += edge_count * get_descendant(nested_descendants, descendant)
        continue

    return contain_count
Ejemplo n.º 32
0
 def _get_upper_closure(self, a):
     """ Returns a set of the upper closure of a """
     G = self._get_graph_closure_no_cycles()
     d = set(descendants(G, a))
     d.add(a)
     return d
Ejemplo n.º 33
0
def load_ontology(file_name, gene2id_mapping):

    dG = nx.DiGraph()
    term_direct_gene_map = {}
    term_size_map = {}

    file_handle = open(file_name)

    gene_set = set()

    for line in file_handle:

        line = line.rstrip().split()

        if line[2] == 'default':
            dG.add_edge(line[0], line[1])
        else:
            if line[1] not in gene2id_mapping:
                continue

            if line[0] not in term_direct_gene_map:
                term_direct_gene_map[line[0]] = set()

            term_direct_gene_map[line[0]].add(gene2id_mapping[line[1]])

            gene_set.add(line[1])

    file_handle.close()

    print('There are', len(gene_set), 'genes')

    for term in dG.nodes():

        term_gene_set = set()

        if term in term_direct_gene_map:
            term_gene_set = term_direct_gene_map[term]

        deslist = nxadag.descendants(dG, term)

        for child in deslist:
            if child in term_direct_gene_map:
                term_gene_set = term_gene_set | term_direct_gene_map[child]

        # jisoo
        if len(term_gene_set) == 0:
            print('There is empty terms, please delete term:', term)
            sys.exit(1)
        else:
            term_size_map[term] = len(term_gene_set)

    leaves = [n for n in dG.nodes if dG.in_degree(n) == 0]
    #leaves = [n for n,d in dG.in_degree() if d==0]

    uG = dG.to_undirected()
    connected_subG_list = list(nxacc.connected_components(uG))

    print('There are', len(leaves), 'roots:', leaves[0])
    print('There are', len(dG.nodes()), 'terms')
    print('There are', len(connected_subG_list), 'connected componenets')

    if len(leaves) > 1:
        print(
            'There are more than 1 root of ontology. Please use only one root.'
        )
        sys.exit(1)
    if len(connected_subG_list) > 1:
        print('There are more than connected components. Please connect them.')
        sys.exit(1)

    return dG, leaves[0], term_size_map, term_direct_gene_map
Ejemplo n.º 34
0
 def descendants(self, type: str):
     """ 
     Return descendants of type `type`
     """
     return list(descendants(self.type_tree, type))
Ejemplo n.º 35
0
 def _get_upper_closure(self, a):
     """ Returns a set of the upper closure of a """
     G = self._get_graph_closure_no_cycles()
     d = set(descendants(G, a))
     d.add(a)
     return d