Ejemplo n.º 1
0
def generate_test_graph(original_graph, state_type_node_count, chosen_state, state_wise_node_list,
                        no_of_types):
    global available_type, node_latitude, node_longitude, node_state, node_type
    test_graph_nodes = list()
    node_queue = list()
    print state_type_node_count
    for i in range(len(chosen_state)):
        for j in range(no_of_types):
            print 'Running for state', chosen_state[i]
            print state_type_node_count[i, j]
            while not state_type_node_count[i, j] == 0:
                node_index = random.randint(1, len(state_wise_node_list[chosen_state[i]]))
                node = state_wise_node_list[chosen_state[i]][node_index-1]
                state_wise_node_list[chosen_state[i]].remove(node)
                if not node_type[node] in available_type[:no_of_types] or node in node_queue or node in test_graph_nodes:
                    continue
                node_queue.append(node)
                state_type_node_count[i, j] -= 1

                expand_graph(node_queue, test_graph_nodes, original_graph, chosen_state, no_of_types, state_type_node_count)

    test_graph = networkx.DiGraph()
    test_graph = original_graph.subgraph(test_graph_nodes)


    components = networkx.weakly_connected_component_subgraphs(test_graph)
    i = 1
    print 'Components Before:'
    print '******************'
    for component in components:
        print 'Component: ' + str(i) + '- ' + str(networkx.number_of_nodes(component))
        i += 1


    # Check connectivity
    if i > 1:
        resolve_connectivity_issue(test_graph)
        components = networkx.weakly_connected_component_subgraphs(test_graph)
        i = 1
        print 'Components After:'
        print '******************'
        for component in components:
            print 'Component: ' + str(i) + '- ' + str(networkx.number_of_nodes(component))
            i += 1


    node_mapping = dict()
    test_graph_node_state_assgn = list()
    for i in range(len(test_graph_nodes)):
        node_mapping[i] = test_graph_nodes[i]
        test_graph_node_state_assgn.append(node_state[test_graph_nodes[i]])
    print 'No of nodes: ', len(test_graph_nodes)

    return test_graph, node_mapping, test_graph_node_state_assgn
Ejemplo n.º 2
0
def decompose(paths, args):
    """ runs decomposition
    Parameters
    ----------
    paths.bundle_file       : file
    paths.tmp1_file         : file
    paths.tmp2_file         : file
    paths.decomp_file       : file
    args.msize              : integer
    """

    # load the bundle graph.
    logging.info("loading info")
    BG = nx.read_gpickle(paths.bundle_file)
    #BG = test_bi()
    #BG = test_tri()

    # run decomposition until satisfied.
    BG.graph['redo'] = False
    while 1 == 1:

        # decomposition.
        DC = decomp0(BG, paths.tmp1_file, paths.tmp2_file, msize=args.msize)

        # check if only once.
        if args.msize == None or BG.graph['redo'] == False:
            break
        elif BG.graph['redo'] == True:
            BG.graph['redo'] = False

        # remove temp files.
        if os.path.isfile(paths.tmp1_file) == True:
            subprocess.call(["rm","-f",paths.tmp1_file])
        if os.path.isfile(paths.tmp2_file) == True:
            subprocess.call(["rm","-f",paths.tmp2_file])

    # compact decomposition.
    _compact_outter(DC)
    for subcc in nx.weakly_connected_component_subgraphs(DC):

        # call recursive compaction.
        _compact_inner(DC)

    # verify decomposition.
    for subcc in nx.weakly_connected_component_subgraphs(DC):

        # check its consistency.
        _validate_comp(subcc)

    # write to disk.
    nx.write_gpickle(DC, paths.decomp_file)
    nx.write_gpickle(BG, paths.bundle_file)
Ejemplo n.º 3
0
def __cut(graph):
    ''' param: 
            graph:a nx.DiGraph obj
	    return:
		    cs : edge cut set of the graph
		    g1 , g2 : subgraphs induced by cs
	
    '''
    assert isinstance(graph, nx.DiGraph), "graph class: %s " % graph.__class__
    assert graph.number_of_nodes() > 1,   "Number of nodes: %d" % graph.number_of_nodes()
    unigraph = nx.Graph( graph )          
    cs = nx.minimum_edge_cut( unigraph ) 
    if not cs:
        raise Exception,"Cut Set of this graph is Empty"

    #CS中的边,可能不存在于原来的有向图中,所以需要将这种边的方向颠倒
    #将所有real edge,存到RCS中
    rcs = []
    for eachEdge in cs:
        if not graph.has_edge( eachEdge[0], eachEdge[1] ):
            eachEdge = (eachEdge[1], eachEdge[0]) #调换方向
        rcs.append(eachEdge)
    graph.remove_edges_from(rcs)
    glist = []
    for eachCntComp in nx.weakly_connected_component_subgraphs(graph, copy = False):
        glist.append(eachCntComp)
    assert len(glist) == 2
    return rcs, glist[0], glist[1]
def longestPath(g, dict_seq):
    paths = []
    if g.number_of_nodes() == 0:
        return paths
    if g.number_of_nodes() == 1:
        paths.append(g.nodes())
        return paths
    if is_linear_graph(g)[0]:
        p = get_path_linear_graph(g)
        return [p]
    for c in nx.weakly_connected_component_subgraphs(g):
        if c.number_of_nodes() == 1:
            paths.append(c.nodes())
            continue
        dist = {}
        for node in nx.topological_sort(c):
            pairs = [(dist[v][0]+len(dict_seq[node])- g[v][node]['weight'], v) for v in c.pred[node]]
            if pairs:
                dist[node] = max(pairs)
            else:
                dist[node] = (len(dict_seq[node]), node)
        node, (length, _) = max(dist.items(), key=lambda x:x[1])
        path = []
        while length > len(dict_seq[node]):
            path.append(node)
            length, node = dist[node]
        paths.append(list(reversed(path)))
    return paths
def simplify_graph(g):
    for e in g.selfloop_edges():
        g.remove_edge(e[0], e[1])
    for node in g.nodes():
        neighbors = list(nx.all_neighbors(g, node))
        edges = g.in_edges(node, data=True)
        edges.extend(g.out_edges(node, data=True))
        plus = []
        minus = []
        for e in edges:
            if e[2][node] == '+':
                plus.append(e)
            else:
                minus.append(e)
            if not plus or not minus:
                continue
            if len(plus) >= len(minus):
                for e in minus:
                    if g.has_edge(e[0], e[1]):
                        g.remove_edge(e[0], e[1])
            if len(plus) <= len(minus):
                for e in plus:
                    if g.has_edge(e[0], e[1]):
                        g.remove_edge(e[0], e[1])
    remove_out_tips(g)
    remove_in_tips(g)
    for c in nx.weakly_connected_component_subgraphs(g):
        if c.number_of_nodes() <= 2:
            continue
        isLinear, ends, source, sink= is_linear_graph(c)
        if isLinear:
            if sink==1 and source == 1:
                continue
            adjust_edge_di(g, c, ends[0], ends[1])
Ejemplo n.º 6
0
def layer_layout(g, level_attribute = "t"):
    '''Lay out a directed graph by layer
    
    g - a NetworkX directed graph with the layer defined as the node's "t"
        attribute. The graph must be acyclic - a restriction that's guaranteed
        by TrackObjects since edges are always going forward in time.
        
    level_attribute - the attribute in the node attribute dictionary that
        specifies the level of the node
        
    on exit, each node will have a y attribute that can be used to place
    the node vertically on a display. "t" can be used for the horizontal
    display.
    
    The algorithm is a partial implementation of 
    Sugiyama, Kozo, Tagawa, Shojiro; Toda, Mitsuhiko (1981), 
    "Methods for visual understanding of hierarchical system structures", 
    IEEE Transactions on Systems, Man, and Cybernetics SMC-11 (2):109-125,
    doi:10.1109/TSMC.1981.4308636
	
    as described by sydney.edu.au/engineering/it/~visual/comp4048/slides03.ppt
    '''
    
    subgraphs = nx.weakly_connected_component_subgraphs(g)
    y = 0
    for subgraph in subgraphs:
        y = layer_layout_subgraph(g, subgraph, y, level_attribute)
Ejemplo n.º 7
0
def prune_transcript_graph(G, strand, transcript_map,
                           min_trim_length=0, 
                           trim_utr_fraction=0.0,
                           trim_intron_fraction=0.0):
    '''
    trim_utr_fraction: float specifying the fraction of the average UTR
    coverage below which the ends of the UTR will be trimmed

    trim_intron_fraction: float specifying the fraction of the average 
    intron coverage below which intronic nodes will be removed
    '''
    # trim utrs and intron retentions
    trim_nodes = trim_graph(G, strand, min_trim_length, 
                            trim_utr_fraction, 
                            trim_intron_fraction)
    G.remove_nodes_from(trim_nodes)
    # collapse consecutive nodes in graph
    H = collapse_strand_specific_graph(G, transcript_map, introns=True)
    # get connected components of graph which represent independent genes
    # unconnected components are considered different genes
    Gsubs = nx.weakly_connected_component_subgraphs(H)
    for Gsub in Gsubs:
        # get partial path data supporting graph
        transcript_node_map = get_transcript_node_map(Gsub)
        path_score_dict = collections.defaultdict(lambda: 0)
        for t_id, nodes in transcript_node_map.iteritems():
            # reverse path for negative strand transcripts
            if strand == NEG_STRAND:
                nodes.reverse()
            # get transcript scores
            t = transcript_map[t_id]
            path_score_dict[tuple(nodes)] += t.score
        yield Gsub, strand, path_score_dict.items()
Ejemplo n.º 8
0
def get_alternative_paths(subg,path):
	paths = []
	subg1 = subg.copy()
	for node in path:
		subg1.remove_node(node)

	for comp in nx.weakly_connected_component_subgraphs(subg1):
		if len(comp.nodes()) == 1:
			paths.append(comp.nodes())
		else:
			p = []
			for node in comp.nodes():
				if comp.out_degree(node) == 1 and comp.in_degree(node) == 0:
					p.append(node)
			for node in comp.nodes():
				if comp.out_degree(node) == 0 and comp.in_degree(node) == 1:
					p.append(node)

			if len(p) == 2:
				try:
					paths.append(nx.shortest_path(comp,p[0],p[1]))
				except:
					continue

	return paths
Ejemplo n.º 9
0
def find_reach_topsort(dags, c2n):
    node_reach = dict()
    cluster_reach = dict()

    wccs = nx.weakly_connected_component_subgraphs(dags)

    for hub in wccs:
        # treat hubs of size 1 and 2 specially
        if len(hub) == 1:
            cluster = hub.nodes()[0]
            cluster_reach[cluster] = c2n[cluster]

            node_reach.update(dict(zip(c2n[cluster], [len(c2n[cluster])]*len(c2n[cluster]))))
        elif len(hub) == 2:
            cluster1, cluster2 = hub.edges()[0]

            cluster_reach[cluster2] = c2n[cluster2]
            cluster_reach[cluster1] = c2n[cluster1] + c2n[cluster2]

            node_reach.update(dict(zip(c2n[cluster1], [len(cluster_reach[cluster1])]*len(c2n[cluster1]))))
            node_reach.update(dict(zip(c2n[cluster2], [len(cluster_reach[cluster2])]*len(c2n[cluster2]))))
        else:
            hub_ts = nx.topological_sort(hub, reverse=True)
            for cluster in hub_ts:
                reach = set()
                for _, out_cluster in dags.out_edges(cluster):
                    reach.update(cluster_reach[out_cluster])
                reach.update(c2n[cluster])
                cluster_reach[cluster] = reach

                node_reach.update(dict(zip(c2n[cluster], [len(reach)]*len(c2n[cluster]))))
    return node_reach
Ejemplo n.º 10
0
def connect_digraph(D):
	""" Take a DiGraph with weakly connected components, and coalesce into one component."""

	s = nx.weakly_connected_component_subgraphs(D)
	#s is sorted by the size of the subgraph

	if len(s) > 1:
		largest = s[0]
		remaining = s[1:]

		largest_edges = largest.edges()

		#Let's filter out the one degree edges (otherwise we'll disconnect 
		#the graph when we swap edges around).
		candidates = []
		for u,v in largest_edges:
			if D.degree(u) > 1 and D.degree(v) > 1:
				candidates.append((u,v))

		if len(candidates) < len(remaining):
			raise Exception("There are not enough candidates for swapping.")

		#Connect the largest subgraph to the remaining.
		for G in remaining:
			u,v = random.choice(candidates)
			x,y = random.choice(G.edges())

			D.remove_edge(u, v)
			D.remove_edge(x, y)
			D.add_edge(u, y)
			D.add_edge(x, v)

			largest_edges.remove((u,v))
Ejemplo n.º 11
0
    def compute_dependent_cohorts(self, objects, deletion):
        model_map = defaultdict(list)
        n = len(objects)
        r = range(n)
        indexed_objects = zip(r, objects)

        mG = self.model_dependency_graph[deletion]

        oG = DiGraph()

        for i in r:
            oG.add_node(i)

        for v0, v1 in mG.edges():
            try:
                for i0 in range(n):
                   for i1 in range(n):
                       if i0 != i1:
                            if not deletion and self.concrete_path_exists(
                                    objects[i0], objects[i1]):
                                oG.add_edge(i0, i1)
                            elif deletion and self.concrete_path_exists(objects[i1], objects[i0]):
                                oG.add_edge(i0, i1)
            except KeyError:
                pass

        components = weakly_connected_component_subgraphs(oG)
        cohort_indexes = [reversed(topological_sort(g)) for g in components]
        cohorts = [[objects[i] for i in cohort_index]
                   for cohort_index in cohort_indexes]

        return cohorts
Ejemplo n.º 12
0
def draw_graphs(G, folder_name):
    domain_name = G.graph['domain']
    dir = folder_name + '/' + domain_name
    if not os.path.exists(dir):
        os.makedirs(dir)
    subgraphs = nx.weakly_connected_component_subgraphs(G)
    add_root_to_subgraphs(subgraphs)
    subgraphs.sort(key=lambda subgraph: subgraph.number_of_nodes())
    for i in xrange(len(subgraphs)): 
        subgraph = subgraphs[i]
        pos = nx.spring_layout(subgraph)
        node_labels = get_node_labels(subgraph)
        positive_nodes = node_labels['positive'].keys()
        negative_nodes = node_labels['negative'].keys()
        labels = dict(node_labels['positive'], **(node_labels['negative']))
        edge_labels = get_edge_labels(subgraph)
        pl.figure(figsize=(16, 12))
        nx.draw_networkx_nodes(subgraph, pos, positive_nodes, alpha=0.5, node_color='w')
        nx.draw_networkx_nodes(subgraph, pos, negative_nodes, alpha=0.5, node_color='b')
        nx.draw_networkx_nodes(subgraph, pos, ['root'], node_color='g')
        nx.draw_networkx_edges(subgraph, pos, color='k')
        nx.draw_networkx_labels(subgraph, pos, labels, font_size=20)
        nx.draw_networkx_edge_labels(subgraph, pos, edge_labels, font_size=20)
        pl.axis('off')
        pl.savefig('%s/%s_subgraph_%d.png' % (dir, domain_name, i+1))
Ejemplo n.º 13
0
def main():
    file_path = sys.argv[1]
    global user_graph

    # Constructs the graph based on the dataset
    make_graph(file_path)

    # Get the weakly connected graph components. HITS is to be run on the largest of such components.
    weakly_connected_graph_components = nx.weakly_connected_component_subgraphs(user_graph)

    # Get the largest weekly connected graph component
    largest_weakly_connected_graph = weakly_connected_graph_components[0]

    (hub_score_counter, authority_score_counter) = run_hits_algorithm(largest_weakly_connected_graph)

    # Sort the lists
    sorted_hub_score_list = sorted(hub_score_counter.items(), key = lambda item: item[1], reverse = True)
    sorted_authority_score_list = sorted(authority_score_counter.items(), key = lambda item: item[1], reverse = True)

    # Print top 20 hubs
    print "Top 20 Hubs"
    print "==========="
    for i in range(0, 20):
        if sorted_hub_score_list[i] != None:
            print sorted_hub_score_list[i][0]

    print ""

    # Print top 20 authorities
    print "Top 20 Authorities"
    print "=================="
    for i in range(0, 20):
        if sorted_authority_score_list[i] != None:
            print sorted_authority_score_list[i][0]
Ejemplo n.º 14
0
    def weakly_connected_subgraphs(self):
        """
        Yields weakly connected subgraphs and their topolgical sort.

        """
        for subgraph in nx.weakly_connected_component_subgraphs(self.G):
            yield (subgraph, nx.topological_sort(subgraph))
Ejemplo n.º 15
0
 def split(self):
     '''splits into weakly connected component subgraphs'''
     # get connected components of graph which represent independent genes
     # unconnected components are considered different genes
     Gsubs = list(nx.weakly_connected_component_subgraphs(self.G))
     if len(Gsubs) == 1:
         yield self
         return
     # map nodes to components
     node_subgraph_map = {}
     subgraph_transfrag_map = collections.defaultdict(list)
     for i, Gsub in enumerate(Gsubs):
         for n_id in Gsub:
             n = self.get_node_interval(n_id)
             node_subgraph_map[n] = i
     # assign transfrags to components
     for t in self.itertransfrags():
         for n in split_transfrag(t, self.node_bounds):
             subgraph_id = node_subgraph_map[n]
             subgraph_transfrag_map[subgraph_id].append(t)
             break
     # create new graphs using the separate components
     for subgraph_transfrags in subgraph_transfrag_map.itervalues():
         yield SpliceGraph.create(subgraph_transfrags,
                                  guided_ends=self.guided_ends,
                                  guided_assembly=self.guided_assembly)
Ejemplo n.º 16
0
Archivo: net.py Proyecto: mkotov/habran
def main():
    G, karmas = read_data("karma.txt")
    cs = nx.weakly_connected_component_subgraphs(G)
    cs.sort(key=lambda c: c.number_of_nodes(), reverse=True)

    plt.clf()
    draw(cs[126], karmas)
    plt.show()
Ejemplo n.º 17
0
 def __init__(self, scaffold_graph):
     print "Entering PathFinder module:", str(datetime.now())
     self.G = scaffold_graph.copy()
     #Build strandless list of sequences
     sequences = set([n for n in self.G.nodes() if n > 0])
     #Define weakly connected components
     print "1... Defining weakly connected components"
     component_graphs = set([g for g in nx.weakly_connected_component_subgraphs(self.G)])
     single_node_graphs = set([g for g in component_graphs if len(g.nodes()) == 1])
     multi_node_graphs = set([g for g in component_graphs if len(g.nodes()) > 1])
     print "Number of single-node components:", len(single_node_graphs)
     print "Number of multi-node components:", len(multi_node_graphs)
     #Consolidate unscaffolded nodes, discard reverse strand
     print "2... Consolidating single-node components"
     unscaffolded = set([g.nodes()[0] for g in single_node_graphs])
     discard_nodes = set([n for n in unscaffolded if n < 0])
     for g in iter(single_node_graphs.copy()):
         if g.nodes()[0] in discard_nodes:
             single_node_graphs.discard(g)
     print "Number of unscaffolded sequences:", len(single_node_graphs)
     #Classify multi-node graphs
     print "3... Classifying multi-node components"
     DAG = set([])
     Euler = set([])
     for g in multi_node_graphs:
         if nx.is_directed_acyclic_graph(g):
             DAG.add(g)
         elif nx.is_eulerian(g):
             Euler.add(g)
         else:
             sys.exit("FATAL ERROR: Unknown multi-node graph type!")
     print "Number of directed acyclic graphs:",  len(DAG)
     print "Number of Eulerian graphs:", len(Euler)
     #Build scaffolds from DAGs
     print "4... Building scaffolds from directed acyclic graphs"
     self.scaffolds = set([])
     for g in DAG:
         self.build_dag_scaffold(g)
     #Consolidating complementary scaffolds, keep first found
     print "5... Consolidating complementary scaffolds"
     consolidated_scaff = set([])
     for seq in iter(self.scaffolds):
         comp = self.revc(seq)
         if comp in self.scaffolds:
             if comp not in consolidated_scaff:
                 consolidated_scaff.add(seq)
         else:
             print "WARNING: non-complemented scaffold"
     self.scaffolds = consolidated_scaff
     print "Number of scaffolds assembled:", len(self.scaffolds)
     #Build scaffolds from Eulerian graphs
     
     #Add unscaffolded seqs to scaffolds list
     print "6... Adding unscaffolded sequences to output"
     for g in single_node_graphs:
         seq = self.G.node[g.nodes()[0]]['seq']
         self.scaffolds.add(seq)
     print "Leaving PathFinder module:", str(datetime.now())
Ejemplo n.º 18
0
    def find_largest_component(self):
	G = self.graph
	list_Graphs = nx.weakly_connected_component_subgraphs(G)
	max_component = list_Graphs[0]
	for g in list_Graphs:
    	    if nx.number_of_nodes(g) > nx.number_of_nodes(max_component):
                max_component = g

        return  max_component
Ejemplo n.º 19
0
    def keep_weakly_connected(self):
        '''This method filters out exons (nodes) not involved in AS events'''
        # find weakly connected subgraphs
        weakly_connected_list = nx.weakly_connected_component_subgraphs(self.sub_graph)

        # iterate to find which subgraph has the target exon
        for subgraph in weakly_connected_list:
            if self.target in subgraph.nodes():
                self.sub_graph = subgraph  # assign subgraph that actually connects to target exon
Ejemplo n.º 20
0
def check_connected_balanced(graph):
    """
    :type graph: nx.DiGraph
    """
    for v in graph.nodes():
        assert graph.in_degree(v) == graph.out_degree(v)
    sub_graph_list = nx.weakly_connected_component_subgraphs(graph, True)
    for sub_graph in sub_graph_list:
        print 'connected component:', sub_graph.edges()
    print
Ejemplo n.º 21
0
def crgraph(g):
    #传进来的图每一个节点都是字符串。label属性是他们的cellref
    assert isinstance(g, nx.DiGraph)
    gg = g.copy()
    label = nx.get_node_attributes(gg, 'label')
    fds = [node for node in gg.nodes() if label[node] == dffkeyword]
    gg.remove_nodes_from( fds )
   
    cr = nx.DiGraph()
    cr.name = gg.name+"_crgraph"
    clouds = []
    ccnt = 0
    for cloud in nx.weakly_connected_component_subgraphs(gg):
        assert isinstance(cloud, nx.DiGraph )
        ccnt += 1
        cloud.name = "cloud%d" % ccnt 
        clouds.append( cloud )
        cr.add_node(cloud, label = cloud.name)
    cr.add_nodes_from(fds, label = dffkeyword)

    empty_cnt = 0
    for edge in g.edges_iter():
        pre = edge[0]
        succ = edge[1]
        credge = ()
        if  label[pre] == dffkeyword and label[succ] == dffkeyword:
            empty = nx.DiGraph(name = 'empty%d'% empty_cnt )
            empty_cnt += 1
            cr.add_edge( pre, empty )
            cr.add_edge( empty, succ )
            continue
        elif label[pre] != dffkeyword and label[succ] == dffkeyword:
            for cloud in clouds:
                if cloud.has_node(pre):
                    credge = (cloud, succ)
            if not credge:
                print "None of the cloud has prim: %s %s.in edge:%s" % (label[pre], pre, str(edge) )
                print "that node in originnal graph:",
                print "precs:%s succs:%s" %( str(g.predecessors(pre)), str(g.successors(pre)) )
                raise AssertionError
        elif label[pre] == dffkeyword and label[succ] != dffkeyword:
            for cloud in clouds:
                if cloud.has_node( succ):
                    credge = (pre, cloud)
            if not credge:
                print "None of the cloud has prim: %s %s.in edge:%s" % (label[succ], succ, str(edge))
                print "that node in originnal graph:"
                print "precs:%s succs:%s" %( str(g.predecessors(succ)), str(g.successors(succ)) )
                raise AssertionError
        else:
            continue
        cr.add_edge(credge[0], credge[1] )
    cr.fds = fds
    cr.clouds = clouds
    return clouds, fds, cr 
def find_intermittent_nodes_directed(g):
    tree = []
    node_cri = find_critical_nodes_directed(g)    
    for gi in nx.weakly_connected_component_subgraphs(g):
        tree += get_tree(gi,[])
    node_int = []
    for i in xrange(len(tree)):
        node,nb,_ = tree[i]
        if node in node_cri:
            tree[i] = (node, nb, 'c')
    node_int = get_ni(tree)
    return node_int
Ejemplo n.º 23
0
def _handle_contained_in(ctx):
    # for each 'contained' tree, recursively build new trees based on
    # scaling groups with generated ids
    for contained_tree in nx.weakly_connected_component_subgraphs(
            ctx.plan_contained_graph.reverse(copy=True)):
        # extract tree root node id
        node_id = nx.topological_sort(contained_tree)[0]
        _build_multi_instance_node_tree_rec(
            node_id=node_id,
            contained_tree=contained_tree,
            ctx=ctx)
    ctx.deployment_contained_graph = ctx.deployment_node_graph.copy()
Ejemplo n.º 24
0
def output_domain_data(G, domain_read_dict, folder_name):
    domain = G.graph['domain']
    dir = folder_name + '/' + domain
    if not os.path.exists(dir):
        os.makedirs(dir)
    out_file_name = dir + '/' + domain + '.data'
    subgraphs = nx.weakly_connected_component_subgraphs(G)
    subgraphs.sort(key=lambda subgraph:subgraph.number_of_nodes())
    with open(out_file_name, 'w') as f:
        for i in xrange(len(subgraphs)):
            subgraph = subgraphs[i]
            output_subgraph_data(subgraph, i+1, domain_read_dict, f)
Ejemplo n.º 25
0
def processNetwork():
    try:
        mydata = request.json
        nodes = mydata['nodes']
        edges = mydata['edges']
        networks = mydata['networks']
        commands = mydata['commands']
        commands_networks = mydata['commands_networks']
        commands_distances = mydata['distances']
        distances_methods = {d: getattr(distances,d) for d in commands_distances}
        #print distances_methods
        net = pn()
        #print 'nodes',nodes
        #print 'networks',networks
        for node in nodes:
            #print node
            net.add_node(str(node['id']));
            if ('label' in node) and (node['label']!=None):
                #print node['id'],node['label']
                net._labels[str(node['id'])] = str(node['label'])
        for edge in edges:
            net.add_edge(str(edge['source']),str(edge['target']))
    
        subnetworks = nx.weakly_connected_component_subgraphs(net)
        for subnetwork in subnetworks:
            #print 'sbn:'+subnetwork.eNewick()
            for node in subnetwork.nodes():
                if node in net._labels:
                    subnetwork._labels[node] = net._labels[node]
            #print 'sbn.'+subnetwork.eNewick()
        #print subnetworks;
        for network in networks:
            #print network['name']
            #print network['nodes']
            onenote = network['nodes'][0]
            for subnetwork in subnetworks:
                if onenote  in subnetwork.nodes():
                    subnetwork.name = network['name']
        for net in subnetworks:
            print net.eNewick()
        fs = {command:getattr(net,str(command)) for command in commands}
        data = {}
        data['nodes'] = {u:applyAll(fs,u) for u in net.nodes()}
    
        #fsnets = {command_network:getattr(net,str(command_network)) for command_network in commands_networks}
        #print 'aqui'
        data['networks'] = {n.name:applyAllNets(commands_networks,n,subnetworks,distances_methods) for n in subnetworks}
        #print datanetworks
        
        return jsonify(response = data);
    except Exception, err:
        print err
        return jsonify(response={'error':'Some error occurred. Please chech your data. If you think this is a bug, please contact us (see About section).<br> Error message: %s' % err})
Ejemplo n.º 26
0
Archivo: graph.py Proyecto: kpj/Bioto
    def __init__(self, graph, largest=False):
        """ Only considers largest weakly connected component if needed
        """
        self.graph = graph
        if largest:
            self.graph = max(nx.weakly_connected_component_subgraphs(self.graph), key=len)

        self.io = IOComponent(self)
        self.system = DynamicalSystem(self)
        self.math = Math(self)

        self.setup()
def author_interaction_weighted_graph(discussion_graph, json_data, limit=10):
    niter = 0
    for conn_subgraph in nx.weakly_connected_component_subgraphs(discussion_graph):
        interaction_graph = nx.DiGraph()
        origin = min(int(x) for x in conn_subgraph.nodes())
        add_to_weighted_graph(interaction_graph, discussion_graph, json_data, [origin], [])
        # print(json_data[origin])
        g1 = nx.to_agraph(interaction_graph)
        g1.draw("author_weighted/"+str(origin)+'.png', prog='circo')
        niter += 1
        if limit == niter and limit > 0:
            break
Ejemplo n.º 28
0
def output_graph_stat(G, mapped_read_lookup_dict):  
    domain = G.graph['domain']                  
    subgraphs = nx.weakly_connected_component_subgraphs(G)
    subgraph_num = len(subgraphs)
    subgraph_size_list = get_subgraph_size_list(subgraphs)
    mapped_read_num = len(mapped_read_lookup_dict[domain].keys())
    aligned_read_num = G.number_of_nodes()
    sys.stdout.write('%s:%d:%d:%d' % (domain, mapped_read_num, 
                                      aligned_read_num, subgraph_num))
    for positive_num, negative_num in subgraph_size_list:
        sys.stdout.write(' %d:%d' % (positive_num, negative_num)) 
    sys.stdout.write('\n')
Ejemplo n.º 29
0
    def read_network_file(self, networkfile):
        # Read the network from file
        net = nx.DiGraph()

        for line in networkfile:
            line = line.decode('UTF-8')
            items = [x.strip() for x in line.rstrip().split('\t')]

            # Skip empty lines or those beginning with '#' comments
            if line=='':
                continue
            if line[0]=='#':
                continue

            id1 = items[0]
            id2 = items[1]

            # Ignore self-edges
            if id1==id2:
                continue

            # Possibly use an edge weight
            eWeight = 1
            if len(items) > 2:
                eWeight = float(items[2])
            elif not self.page_rank:
                raise PathLinkerError('ERROR: All edges must have a weight, unless --PageRank is used. Edge (%s --> %s) does not have a weight entry.'%(id1, id2))

            # Assign the weight. Note in the PageRank case, "weight" is
            # interpreted as running PageRank and edgeflux on a weighted
            # graph.
            net.add_edge(id1, id2, ksp_weight=eWeight, weight=eWeight)

        # Operate on only the largest connected component
        if self.largest_connected_component:

            conn_comps = nx.weakly_connected_component_subgraphs(net)

            # This is the only portion of the program which prevents
            # compatibility between Python 2 & 3. In 2, this object is a
            # generator, but in 3 it is a list. Just check the type and
            # handle accordingly to provide cross-compatibility.
            if isinstance(conn_comps, types.GeneratorType):
                net = next(conn_comps)
            elif isinstance(conn_comps, list):
                net = conn_comps[0]
            else:
                raise PathLinkerError('Compatibility error between NetworkX and Python versions. Connected components object from NetworkX does not have acceptable type.')

            print("\n Using only the largest weakly connected component:\n" + nx.info(net))

        self.set_network(net)
        return net
Ejemplo n.º 30
0
	def filter_graph_for_weakly_connected_components(self, min_nodes=2): 
		"""
		Get weakly connected components in graph. 
		min_nodes : int 
			Return only connected components with a minimal number of min_nodes
		"""
		edges = [] 
		for g in nx.weakly_connected_component_subgraphs(self.graph): 
			if len(g.nodes(data=True)) >= min_nodes: 
				for e in g.edges(data=True): 
					edges.append(e)

		self.graph = nx.DiGraph(edges)
Ejemplo n.º 31
0
def compartmentalize_skeletongroup(skeleton_id_list, project_id, **kwargs):

    skelgroup = SkeletonGroup(skeleton_id_list, project_id)

    compartment_graph_of_skeletons = {}
    resultgraph = nx.DiGraph()

    for skeleton_id, skeleton in skelgroup.skeletons.items():
        if kwargs.has_key('confidence_threshold'):
            confidence_filtering(skeleton, kwargs['confidence_threshold'])
        elif kwargs.has_key('edgecount'):
            edgecount_filtering(skeleton, kwargs['edgecount'])

        subgraphs = nx.weakly_connected_component_subgraphs(skeleton.graph)
        compartment_graph_of_skeletons[skeleton_id] = subgraphs

        for i, subg in enumerate(subgraphs):
            for nodeid, d in subg.nodes_iter(data=True):
                d['compartment_index'] = i
                skeleton.graph.node[nodeid]['compartment_index'] = i

            if len(skeleton.neuron.name) > 30:
                neuronname = skeleton.neuron.name[:
                                                  30] + '...' + ' [{0}]'.format(
                                                      i)
            else:
                neuronname = skeleton.neuron.name + ' [{0}]'.format(i)

            resultgraph.add_node(
                '{0}_{1}'.format(skeleton_id, i), {
                    'neuronname': neuronname,
                    'skeletonid': str(skeleton_id),
                    'compartment_index': i,
                    'node_count': subg.number_of_nodes(),
                })

    connectors = {}
    for skeleton_id, skeleton in skelgroup.skeletons.items():
        for connector_id, v in skeleton.connected_connectors.items():
            if not connectors.has_key(connector_id):
                connectors[connector_id] = {'pre': [], 'post': []}

            if len(v['presynaptic_to']):
                # add the skeleton id for each treenode that is in v['presynaptic_to']
                # This can duplicate skeleton id entries which is correct
                for e in v['presynaptic_to']:
                    skeleton_compartment_id = '{0}_{1}'.format(
                        skeleton_id,
                        skeleton.graph.node[e]['compartment_index'])
                    connectors[connector_id]['pre'].append(
                        skeleton_compartment_id)

            if len(v['postsynaptic_to']):
                for e in v['postsynaptic_to']:
                    skeleton_compartment_id = '{0}_{1}'.format(
                        skeleton_id,
                        skeleton.graph.node[e]['compartment_index'])
                    connectors[connector_id]['post'].append(
                        skeleton_compartment_id)

    # merge connectors into graph
    for connector_id, v in connectors.items():
        for from_skeleton in v['pre']:
            for to_skeleton in v['post']:

                if not resultgraph.has_edge(from_skeleton, to_skeleton):
                    resultgraph.add_edge(from_skeleton, to_skeleton, {
                        'count': 0,
                        'connector_ids': set()
                    })

                resultgraph.edge[from_skeleton][to_skeleton]['count'] += 1
                resultgraph.edge[from_skeleton][to_skeleton][
                    'connector_ids'].add(connector_id)

    return resultgraph
Ejemplo n.º 32
0
def scoreERS(codes, iterations=25):

    #Utility to read importance scores generated by BONITA, and calculate total ancestor overlap in preparation for plotting

    allRes = {}
    for code in codes:
        print(code)
        temp_df2 = pd.DataFrame()
        allRes[str(code)] = {}
        originalGraph = nx.read_gpickle("gpickles/" + code + ".gpickle")
        scoreFunction6 = ruleScore6(originalGraph)
        graph = originalGraph
        graph = max(nx.weakly_connected_component_subgraphs(originalGraph),
                    key=len)  # get around the problem of disconnected graphs

        if len(graph) >= 3:

            for iteration in range(1, iterations + 1):

                allRes[str(code)][str(iteration)] = {}
                pickleFile = str('pickles/' + code + '_' + str(iteration) +
                                 '_local1.pickle')
                outputList = pickle.load(
                    open(pickleFile, 'rb')
                )  #python2 version #outputList=pickle.load(open(pickleFile, 'rb'), encoding='latin1') = python3 version
                bruteOut1, dev, storeModel, storeModel3, equivalents, dev2 = [
                    outputList[k] for k in range(len(outputList))
                ]
                model1 = modelHolder(storeModel3)

                if os.path.isfile(
                        "pickles/'+code+'_'+str(iteration)+'_scores1.pickle"):
                    pathVals = pickle.Unpickler(
                        open(
                            'pickles/' + code + '_' + str(iteration) +
                            '_scores1.pickle', "rb")).load()
                    ImportanceVals = {}
                else:
                    print(
                        "Importance scores not found, setting all values to 0")
                    isGeneric4 = True
                    pathVals = [0] * len(model1.nodeList)

                for node in range(0, len(model1.nodeList)):
                    allRes[str(code)][str(iteration)][str(
                        model1.nodeList[node])] = []
                    start1, end1 = findEnds2(model1, model1.nodeList[node],
                                             equivalents[node])
                    ers = equivalents[
                        node]  # find the bitstring for just this node
                    inEdges = findInEdges(
                        model1, model1.nodeList.index(model1.nodeList[node]))
                    plainRules = []
                    for rule in ers:
                        plainRules.append(
                            writeNode(
                                model1.nodeList.index(model1.nodeList[node]),
                                rule, model1))
                    ruleLengths = len(ers)
                    ersAllNodes = plainRules
                    rnAllNodes = [pr.count("or") + 1 for pr in plainRules]
                    ImportanceVals = pathVals[node]
                    inDegree = originalGraph.in_degree(model1.nodeList[node])
                    if model1.nodeList[node] in graph.nodes(
                    ):  #remember that we have just selected the largest component of the graph for graph theoretic analysis
                        allRes[str(code)][str(iteration)][str(
                            model1.nodeList[node])] = [
                                ruleLengths, rnAllNodes, ImportanceVals,
                                inDegree, scoreFunction6[model1.nodeList[node]]
                            ]
                    else:
                        allRes[str(code)][str(iteration)][str(
                            model1.nodeList[node])] = [
                                ruleLengths, rnAllNodes, ImportanceVals,
                                float('NaN'),
                                scoreFunction6[model1.nodeList[node]]
                            ]

        else:
            continue

    return (allRes)
Ejemplo n.º 33
0
import sys
import networkx as nx

G=nx.DiGraph()

filename = sys.argv[1]
out = sys.argv[2]


output= open(out, 'w')
with open(filename) as fp:
	for line in fp:
		var=line.split(	)
		if(var[0] == 'S'):
			G.add_node(var[1])
		else:
			G.add_edge(var[1],var[3])


#print(G.edges())
for x in list(nx.weakly_connected_component_subgraphs(G)):
	output.write(">")
	output.write(' '.join(nx.topological_sort(x)))

#print(x[0].nodes())

Ejemplo n.º 34
0
 def trim_graph(self):
     self.graph = max(weakly_connected_component_subgraphs(self.graph),
                      key=len)
Ejemplo n.º 35
0
def rand_split_train_test(G, train_frac=0.51):
    """
    Splits the edges of the input graph in sets of train and test and returns the results. Split is performed using the
    random split approach (see Notes). The resulting train edge set has the following properties: spans a graph
    (digraph) with a single connected (weakly connected) component.

    Parameters
    ----------
    G : graph
        A NetworkX graph or digraph.
    train_frac : float, optional
        The proportion of train edges w.r.t. the total number of edges in the input graph (range (0.0, 1.0]).
        Default is 0.51.

    Returns
    -------
    train_E : set
        The set of train edges.
    test_E : set
        The set of test edges.

    Raises
    ------
    ValueError
        If the train_frac parameter is not in range (0, 1].

    Notes
    -----
    The method proceeds as follows: (1) randomly remove 1-train_frac percent of edges from the input graph.
    (2) from the remaining edges compute the main connected component and these will be the train edges. (3) from the
    set of removed edges, those such that both end nodes exist in the train edge set computed in the previous step,
    are added to the final test set.
    """
    if train_frac <= 0.0 or train_frac > 1.0:
        raise ValueError('The train_frac parameter needs to be in range: (0.0, 1.0]')
    if train_frac == 1.0:
        return set(G.edges()), set()

    # Create a set of all edges in G
    E = set(G.edges)
    num_E = len(E)

    # Compute the potential number of train and test edges which corresponds to the fraction given
    num_train_E = int(np.ceil(train_frac * num_E))
    num_test_E = int(num_E - num_train_E)

    # Randomly remove 1-train_frac edges from the graph and store them as potential test edges
    pte_edges = set(random.sample(E, num_test_E))

    # The remaining edges are potential train edges
    ptr_edges = E - pte_edges

    # Create a graph containing all ptr_edges and compute the mainCC
    if G.is_directed():
        H = nx.DiGraph()
        H.add_edges_from(ptr_edges)
        maincc = max(nx.weakly_connected_component_subgraphs(H), key=len)
    else:
        H = nx.Graph()
        H.add_edges_from(ptr_edges)
        maincc = max(nx.connected_component_subgraphs(H), key=len)

    # The edges in the mainCC graph are the actual train edges
    train_E = set(maincc.edges)

    # Remove potential test edges for which the end nodes do not exist in the train_E
    test_E = set()
    for (src, dst) in pte_edges:
        if src in maincc.nodes and dst in maincc.nodes:
            test_E.add((src, dst))

    # Return the sets of edges
    return train_E, test_E
    donations_train["Project ID"].values.tolist())]
donors = donors[donors["Donor ID"].isin(
    donations_train["Donor ID"].values.tolist())]
#donG=nx.from_pandas_edgelist(donations_train,source="Donor ID",target="Project ID",edge_attr=True,create_using=nx.DiGraph(),)
donB = nx.DiGraph()
donB.add_nodes_from(donors["Donor ID"].values.tolist(), project=0)
donB.add_nodes_from(projects["Project ID"].values.tolist(), project=1)
donB.add_weighted_edges_from(
    donations_train[["Donor ID", "Project ID",
                     "Donation Amount"]].values.tolist())
projectlabel = nx.get_node_attributes(donB, "project")
donornodes = {n for n, d in donB.nodes(data=True) if d['project'] == 0}
donorG = bipartite.projected_graph(donB, donornodes)
#remove = [node for node,degree in list(donortodonor.degree()) if degree < 3]
#donortodonor.remove_nodes_from(remove)
wccs = list(nx.weakly_connected_component_subgraphs(donB))
sortedwc = sorted(wccs, key=lambda x: len(x.nodes()), reverse=True)
largestwcc = sortedwc[0]
len(donB.nodes())
len(list(largestwcc.nodes()))
comdf = pd.DataFrame()
for i in range(len(communities)):
    print(len(communities[i]))
start = time.time()
community_generator = community.girvan_newman(largestwcc)
for i in range(29):
    communities = next(community_generator)
    print("number of comm:" + str(len(communities)))
    for j in range(len(communities)):
        print("size:" + str(len(donB.subgraph(communities[j]).nodes())))
with open('communities30.txt', 'w') as filehandle:
Ejemplo n.º 37
0
def answer_four():
    G = answer_one()

    return len(
        max(nx.weakly_connected_component_subgraphs(G), key=len).nodes())
Ejemplo n.º 38
0
def get_lcc(di_graph):
    di_graph = max(nx.weakly_connected_component_subgraphs(di_graph), key=len)
    tdl_nodes = di_graph.nodes()
    nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes))))
    nx.relabel_nodes(di_graph, nodeListMap, copy=False)
    return di_graph, nodeListMap
Ejemplo n.º 39
0
def generate_haplotigs_for_ctg(input_):
   
    ctg_id, out_dir = input_
    global p_asm_G
    global h_asm_G
    global all_rid_to_phase
    global seqs
    arid_to_phase = all_rid_to_phase[ctg_id]

    mkdir( out_dir )

    ctg_G = p_asm_G.get_sg_for_ctg(ctg_id) 

    ctg_nodes = set(ctg_G.nodes())

    sg = nx.DiGraph()
    
    for v, w in ctg_G.edges():
        
        vrid = v[:9]
        wrid = w[:9]
            
        edge_data = p_asm_G.sg_edges[ (v, w) ]
        if edge_data[-1] != "G":
            continue

        vphase = arid_to_phase.get(vrid, (-1,0))
        wphase = arid_to_phase.get(wrid, (-1,0))
        if vphase[0] == wphase[0] and vphase[1] != wphase[1]:
            cross_phase = "Y"
        else:
            cross_phase = "N"

        sg.add_node( v, label= "%d_%d" % vphase, 
                        phase="%d_%d" % vphase, 
                        src="P" )

        sg.add_node( w, label= "%d_%d" % wphase, 
                        phase="%d_%d" % wphase, 
                        src="P" )

        sg.add_edge(v, w, src="OP", cross_phase = cross_phase)

        # we need to add the complimentary edges as the ctg_graph does not contain the dual edges
        rv = reverse_end(v)
        rw = reverse_end(w)
        sg.add_node( rv, label= "%d_%d" % vphase, 
                         phase="%d_%d" % vphase, 
                         src="P" )

        sg.add_node( rw, label= "%d_%d" % wphase, 
                         phase="%d_%d" % wphase, 
                         src="P" )

        sg.add_edge(rw, rv, src="OP", cross_phase = cross_phase)

    PG_nodes = set(sg.nodes())
    PG_edges = set(sg.edges())

    for v, w in h_asm_G.sg_edges:
        
        vrid = v[:9]
        wrid = w[:9]

        if vrid not in arid_to_phase:
            continue
        if wrid not in arid_to_phase:
            continue
        
        if (v, w) in PG_edges:
            if p_asm_G.sg_edges[(v,w)][-1] == "G":
                continue

        edge_data = h_asm_G.sg_edges[ (v, w) ]

        if edge_data[-1] != "G":
            continue

        cross_phase = "N"
        if v not in PG_nodes:
            sg.add_node( v, label= "%d_%d" % arid_to_phase[vrid], 
                            phase="%d_%d" % arid_to_phase[vrid], 
                            src="H" )

        if w not in PG_nodes:
            sg.add_node( w, label= "%d_%d" % arid_to_phase[wrid], 
                            phase="%d_%d" % arid_to_phase[wrid], 
                            src="H" )

        sg.add_edge(v, w, src="H", cross_phase = cross_phase)

        rv = reverse_end(v)
        rw = reverse_end(w)
        if rv not in PG_nodes:
            sg.add_node( rv, label= "%d_%d" % arid_to_phase[vrid], 
                             phase="%d_%d" % arid_to_phase[vrid], 
                             src="H" )

        if rw not in PG_nodes:
            sg.add_node( rw, label= "%d_%d" % arid_to_phase[wrid], 
                             phase="%d_%d" % arid_to_phase[wrid], 
                             src="H" )

        sg.add_edge(rw, rv, src="H", cross_phase = cross_phase)

    sg0 = sg.copy()
    for v, w in h_asm_G.sg_edges:
        vrid = v[:9]
        wrid = w[:9]
        if vrid not in arid_to_phase:
            continue
        if wrid not in arid_to_phase:
            continue
        
        if (v, w) in PG_edges:
            if p_asm_G.sg_edges[(v,w)][-1] == "G":
                continue

        edge_data = h_asm_G.sg_edges[ (v, w) ]

        if sg0.in_degree(w) == 0:
            cross_phase = "Y"
            if v not in PG_nodes:
                sg.add_node( v, label= "%d_%d" % arid_to_phase[vrid], 
                                phase="%d_%d" % arid_to_phase[vrid], 
                                src="H" )

            if w not in PG_nodes:
                sg.add_node( w, label= "%d_%d" % arid_to_phase[wrid], 
                                phase="%d_%d" % arid_to_phase[wrid], 
                                src="H" )

            sg.add_edge(v, w, src="ext", cross_phase = cross_phase)

            rv = reverse_end(v)
            rw = reverse_end(w)
            if rv not in PG_nodes:
                sg.add_node( rv, label= "%d_%d" % arid_to_phase[vrid], 
                                 phase="%d_%d" % arid_to_phase[vrid], 
                                 src="H" )

            if rw not in PG_nodes:
                sg.add_node( rw, label= "%d_%d" % arid_to_phase[wrid], 
                                 phase="%d_%d" % arid_to_phase[wrid], 
                                 src="H" )

            sg.add_edge(rw, rv, src="ext", cross_phase = cross_phase)

        if sg0.out_degree(v) == 0:
            cross_phase = "Y"
            if v not in PG_nodes:
                sg.add_node( v, label= "%d_%d" % arid_to_phase[vrid], 
                                phase="%d_%d" % arid_to_phase[vrid], 
                                src="H" )

            if w not in PG_nodes:
                sg.add_node( w, label= "%d_%d" % arid_to_phase[wrid], 
                                phase="%d_%d" % arid_to_phase[wrid], 
                                src="H" )

            sg.add_edge(v, w, src="ext", cross_phase = cross_phase)

            rv = reverse_end(v)
            rw = reverse_end(w)
            if rv not in PG_nodes:
                sg.add_node( rv, label= "%d_%d" % arid_to_phase[vrid], 
                                 phase="%d_%d" % arid_to_phase[vrid], 
                                 src="H" )

            if rw not in PG_nodes:
                sg.add_node( rw, label= "%d_%d" % arid_to_phase[wrid], 
                                 phase="%d_%d" % arid_to_phase[wrid], 
                                 src="H" )

            sg.add_edge(rw, rv, src="ext", cross_phase = cross_phase)

    sg2 = sg.copy()
    ctg_nodes_r = set([ reverse_end(v) for v in list(ctg_nodes) ])
    for v, w in ctg_G.edges():
        sg2.remove_edge(v, w)
        rv, rw = reverse_end(v), reverse_end(w)
        sg2.remove_edge(rw, rv)
    for v in sg2.nodes():
        if sg2.out_degree(v) == 0 and sg2.in_degree(v) == 0:
            sg2.remove_node(v)

    nodes_to_remove = set()
    edges_to_remove = set()
    for sub_g in nx.weakly_connected_component_subgraphs(sg2):
        sub_g_nodes = set(sub_g.nodes())
        if len(sub_g_nodes & ctg_nodes_r) > 0 and len(sub_g_nodes & ctg_nodes) > 0:
            # remove cross edge
            sources = [n for n in sub_g.nodes() if sub_g.in_degree(n) == 0 or n in ctg_nodes or n in ctg_nodes_r ]
            sinks = [n for n in sub_g.nodes() if sub_g.out_degree(n) == 0 or n in ctg_nodes or n in ctg_nodes_r ]
            edges_to_keep = set()
            for v in sources:
                for w in sinks:
                    path = []
                    if v in ctg_nodes and w not in ctg_nodes_r:
                        try:
                            path = nx.shortest_path( sub_g, v, w ) 
                        except nx.exception.NetworkXNoPath:
                            path = []
                    elif v not in ctg_nodes and w in ctg_nodes_r:
                        try:
                            path = nx.shortest_path( sub_g, v, w )
                        except nx.exception.NetworkXNoPath:
                            path = []

                    if len(path) >= 2:
                        v1 = path[0]
                        for w1 in path[1:]:
                            edges_to_keep.add( (v1, w1) )
                            rv1, rw1 = reverse_end(v1), reverse_end(w1)
                            edges_to_keep.add( (rw1, rv1) )
                            v1 = w1
            for v, w in sub_g.edges():
                if (v, w) not in edges_to_keep:
                    edges_to_remove.add( (v, w) )
                    rv, rw = reverse_end(v), reverse_end(w)
                    edges_to_remove.add( (rw, rv) )


        if len(sub_g_nodes & ctg_nodes_r) == 0 and len(sub_g_nodes & ctg_nodes) == 0:
            nodes_to_remove.update( sub_g_nodes )
            nodes_to_remove.update( set( [reverse_end(v) for v in list(sub_g_nodes)] ) )

    for v, w in list(edges_to_remove):
        sg.remove_edge(v, w)

    for v in nodes_to_remove:
        sg.remove_node(v)

    for v in sg.nodes():
        if sg.out_degree(v) == 0 and sg.in_degree(v) == 0:
            sg.remove_node(v)

    #nx.write_gexf(sg, "full_g.gexf")
    
    s_node = p_asm_G.ctg_data[ctg_id][5][0][0]
    t_node = p_asm_G.ctg_data[ctg_id][5][-1][-1]

    for v, w in sg.edges():
        phase0 = sg.node[v]["phase"].split("_")
        phase1 = sg.node[w]["phase"].split("_")
        if phase0 == phase1:
            sg[v][w]["weight"] = 10
            sg[v][w]["score"] = 1
            sg[v][w]["label"] = "type0" 
        else:
            if phase0[0] == phase1[0]:
                sg[v][w]["weight"] = 1
                sg[v][w]["score"] = 100000
                sg[v][w]["label"] = "type1"
            else:
                sg[v][w]["weight"] = 5
                sg[v][w]["score"] = 50
                sg[v][w]["label"] = "type2"


    sg2 = sg.copy()
    edge_to_remove = set()
    for v, w in sg2.edges():
        if sg2[v][w]["src"] == "ext":
            edge_to_remove.add( (v, w) )
            rv, rw = reverse_end(v), reverse_end(w)
            edge_to_remove.add( (rw, rv) )

        if sg2.node[v]["phase"] ==  sg2.node[w]["phase"]:
            continue
        flag1 = 0
        flag2 = 0
        for e in sg2.out_edges(v):
            if sg2.node[e[0]]["phase"] ==  sg2.node[e[1]]["phase"]:
                flag1 = 1
                break
        if flag1 == 1:
            for e in sg2.in_edges(w):
                if sg2.node[e[0]]["phase"] ==  sg2.node[e[1]]["phase"]:
                    flag2 = 1
                    break
        if flag2 == 1:
            edge_to_remove.add( (v, w) )
            rv, rw = reverse_end(v), reverse_end(w)
            edge_to_remove.add( (rw, rv) )


    for v, w in list(edge_to_remove):
        sg2.remove_edge(v, w)
    try: 
        s_path = nx.shortest_path(sg2, source=s_node, target=t_node, weight="score")
    except nx.exception.NetworkXNoPath:
        s_path = nx.shortest_path(sg, source=s_node, target=t_node, weight="score")

    s_path_edges = [] 
    for i in xrange(len(s_path)-1):
        v = s_path[i]
        w = s_path[i+1]
        sg[v][w]["weight"] = 15
        s_path_edges.append( (v,w) )

    s_path_edge_set = set(s_path_edges)


    
    #output the updated primary contig
    p_tig_path = open(os.path.join(out_dir, "p_ctg_path.%s" % ctg_id),"w")
    p_tig_fa = open(os.path.join(out_dir, "p_ctg.%s.fa" % ctg_id),"w")
    edges_to_remove1 = set()
    edges_to_remove2 = set()
    with open(os.path.join(out_dir, "p_ctg_edges.%s" % ctg_id), "w") as f:
        seq = []
        for v, w in s_path_edges:
            sg[v][w]["h_edge"] = 1
            vrid = v.split(":")[0]
            wrid = w.split(":")[0]
            vphase = arid_to_phase.get(vrid, (-1,0))
            wphase = arid_to_phase.get(wrid, (-1,0))
            print >>f, "%s" % ctg_id, v, w, sg[v][w]["cross_phase"], sg[v][w]["src"], vphase[0], vphase[1], wphase[0], wphase[1]

            if sg.edge[v][w]["src"] == "OP":
                edge_data = p_asm_G.sg_edges[ (v,w) ]
            else:
                edge_data = h_asm_G.sg_edges[ (v,w) ]

            seq_id, s, t = edge_data[0]
            if s < t:
                seq.append(seqs[ seq_id ][ s:t ])
            else:
                seq.append("".join([ RCMAP[c] for c in seqs[ seq_id ][ s:t:-1 ] ]))
            print >>p_tig_path, "%s" % ctg_id, v, w, seq_id, s, t, edge_data[1], edge_data[2], "%d %d" % arid_to_phase.get(seq_id, (-1,0))
            sg[v][w]["tig_id"] = "%s" % ctg_id

            rv, rw = reverse_end(v), reverse_end(w)
            edges_to_remove1.add( (v, w) )
            edges_to_remove2.add( (rw, rv) )

        print >> p_tig_fa, ">%s" % ctg_id
        print >> p_tig_fa, "".join(seq)

    p_tig_fa.close()
    p_tig_path.close()



    sg2 = sg.copy()
    reachable1 = nx.descendants(sg2, s_node)
    sg2_r = sg2.reverse()
    reachable2 = nx.descendants(sg2_r, t_node)

    reachable_all = reachable1 | reachable2
    reachable_both = reachable1 & reachable2


    for v, w in list(edges_to_remove2 | edges_to_remove1):
        sg2.remove_edge( v, w )

    for v, w in sg2.edges():
        if sg2[v][w]["cross_phase"] == "Y":
            sg2.remove_edge( v, w )

    for v in sg2.nodes():
        if v not in reachable_all:
            sg2.remove_node(v)

    for v in sg2.nodes():
        if sg2.out_degree(v) == 0 and sg2.in_degree(v) == 0:
            sg2.remove_node(v)
            continue
        if v in reachable_both:
            sg2.node[v]["reachable"] = 1
        else:
            sg2.node[v]["reachable"] = 0
        
    dump_graph = False # the code segement below is useful for showing the graph
    if dump_graph == True:
        nx.write_gexf(sg2, "%s_1.gexf" % ctg_id)
    
    p_path_nodes = set(s_path)
    p_path_rc_nodes = set( [reverse_end(v) for v in s_path] )

    sg2_nodes = set(sg2.nodes())
    for v in p_asm_G.get_sg_for_ctg(ctg_id).nodes():
        rv = reverse_end(v)
        p_path_rc_nodes.add( rv )
        if rv in sg2_nodes:
            sg2.remove_node(rv)

    
    h_tig_path = open(os.path.join(out_dir, "h_ctg_path.%s" % ctg_id),"w")
    h_tig_fa = open(os.path.join(out_dir, "h_ctg_all.%s.fa" % ctg_id),"w")
    edges_to_remove = set()

    labelled_node = set()
    with open(os.path.join(out_dir, "h_ctg_edges.%s" % ctg_id),"w") as f:
        h_tig_id = 1
        h_paths = {}
        #print "number of components:", len([tmp for tmp in nx.weakly_connected_component_subgraphs(sg2)])
        for sub_hg_0 in nx.weakly_connected_component_subgraphs(sg2):
            sub_hg = sub_hg_0.copy()
            while sub_hg.size() > 5:
                #print "sub_hg size:", len(sub_hg.nodes())
                sources = [n for n in sub_hg.nodes() if sub_hg.in_degree(n) != 1 ]
                sinks = [n for n in sub_hg.nodes() if sub_hg.out_degree(n) != 1 ]
                

                #print "number of sources", len(sources),  sources
                #print "number of sinks", len(sinks), sinks
                if len(sources) == 0 and len(sinks) == 0: #TODO, the rest of the sub-graph are circles, we need to break and print warnning message
                    break

                longest = [] 

                eliminated_sinks = set()
                s_longest = {}
                for s in sources:
                    #print "test source",s, len(eliminated_sinks)
                    if s in labelled_node:
                        continue
                    s_path = []
                    for t in sinks:
                        if t in eliminated_sinks:
                            continue
                        try:
                            path = nx.shortest_path(sub_hg, s, t, weight="score")
                            #print "test path len:", len(path), s, t
                        except nx.exception.NetworkXNoPath:
                            path = []
                            continue
                        s_path.append( [ path, t ] )
                    s_path.sort(key = lambda x: -len(x[0]))
                    if len(s_path) == 0:
                        continue
                    s_longest[s] = s_path[0][0]
                    if len(s_longest[s]) > len(longest):
                        longest = s_longest[s]
                        #print "s longest", longest[0], longest[-1], len(longest)
                    for path, t in s_path[1:]:
                        eliminated_sinks.add(t)
                        #print "elimated t", t
                            

                if len(longest) == 0:
                    break

                s = longest[0]
                t = longest[-1]
                h_paths[ ( s, t ) ] = longest
                
                labelled_node.add(s)
                rs = reverse_end(s)
                labelled_node.add(rs)

                for v in longest:
                    sub_hg.remove_node(v)

        for s, t in h_paths:
            longest = h_paths[ (s, t) ]
            #print "number of node in path", s,t,len(longest) 
            seq = []
            for v, w in zip(longest[:-1], longest[1:]):
                sg[v][w]["h_edge"] = 1
                if sg.edge[v][w]["src"] == "OP":
                    edge_data = p_asm_G.sg_edges[ (v,w) ]
                else:
                    edge_data = h_asm_G.sg_edges[ (v,w) ]
                vrid = v.split(":")[0]
                wrid = w.split(":")[0]
                vphase = arid_to_phase.get(vrid, (-1,0))
                wphase = arid_to_phase.get(wrid, (-1,0))
                print >>f, "%s_%03d" % (ctg_id, h_tig_id), v, w, sg[v][w]["cross_phase"], sg[v][w]["src"], vphase[0], vphase[1], wphase[0], wphase[1]

                if sg.edge[v][w]["src"] == "OP":
                    edge_data = p_asm_G.sg_edges[ (v,w) ]
                else:
                    edge_data = h_asm_G.sg_edges[ (v,w) ]

                seq_id, sp, tp = edge_data[0]
                if sp < tp:
                    seq.append(seqs[ seq_id ][ sp:tp ])
                else:
                    seq.append("".join([ RCMAP[c] for c in seqs[ seq_id ][ sp:tp:-1 ] ]))
                print >> h_tig_path, "%s_%03d" % (ctg_id, h_tig_id), v, w, seq_id, sp, tp, edge_data[1], edge_data[2], "%d %d" % arid_to_phase.get(seq_id, (-1,0))
                sg[v][w]["tig_id"] = "%s_%03d" % (ctg_id, h_tig_id)

                rv, rw = reverse_end(v), reverse_end(w)
                edges_to_remove.add( (v, w) )
                edges_to_remove.add( (rw, rv) )

            print >> h_tig_fa, ">%s_%03d" % (ctg_id, h_tig_id)
            print >> h_tig_fa, "".join(seq)
            h_tig_id += 1


    h_tig_fa.close()
    h_tig_path.close()

    dump_graph = False  # the code segement below is useful for showing the graph
    if dump_graph == True:
        for v, w in sg.edges():
            if "h_edge" not in sg[v][w]:
                sg[v][w]["h_edge"] = 0
            if v in reachable_all:
                sg.node[v]["reachable"] = 1
            else:
                sg.node[v]["reachable"] = 0
            if w in reachable_all:
                sg.node[w]["reachable"] = 1
            else:
                sg.node[w]["reachable"] = 0

        nx.write_gexf(sg, "%s_0.gexf" % ctg_id)
Ejemplo n.º 40
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-a',
                        '--assembly',
                        help='Contig assembly',
                        required=True)
    parser.add_argument('-g',
                        '--oriented_graph',
                        help='Oriented Graph of Contigs',
                        required=True)
    parser.add_argument('-s',
                        '--seppairs',
                        help='Separation pairs detected in the graph',
                        required=True)
    parser.add_argument('-o',
                        '--output',
                        help='Output file for scaffold sequences',
                        required=True)
    parser.add_argument('-e',
                        '--gfa',
                        help='Output file for graph in GFA format',
                        required=True)
    parser.add_argument('-f',
                        '--agp',
                        help='Output agp file for scaffolds',
                        required=True)
    parser.add_argument('-b', '--bub', help='Output bubbles', required=True)

    args = parser.parse_args()
    bub_output = open(args.bub, 'w')
    G = nx.read_gml(args.oriented_graph)
    write_GFA(G, args.gfa)
    #sys.exit()
    #G = nx.read_gml("small.gml")
    #nx.write_gexf(G,'original.gexf')
    pairmap = {}
    pair_list = []
    with open(args.seppairs, 'r') as f:
        for line in f:
            attrs = line.split()
            if attrs[0] <= attrs[1]:
                key = attrs[0] + '$' + attrs[1]
            else:
                key = attrs[1] + '$' + attrs[0]
            pairmap[key] = attrs[2:]
            pair_list.append(key)

    validated = {}
    contig2id = {}
    cnt = 0
    #write_dot(G,'graph.dot')

    # for key in pairmap:
    # 	print len(pairmap[key])
    '''
	OK. Lets fix this  now. 
	1. Validate the bubbles first and store them in a map, keep track of source and sink for each bubble
	'''
    valid_sources = {}  #valid source nodes
    valid_sink = {}  #valid sink nodes
    valid_bubble_id = 1  #valid bubble number, to be used in the new graph
    members = {}  #members of all the bubbles
    component_id_counter = 1
    valid_bubbles = {}  #store the subgraphs for the bubbles
    bubble_id_to_source = {}  #bubble to its source
    bubble_id_to_sink = {}  #bubble to its sink
    source_to_bubble = {}
    sink_to_bubble = {}
    member_to_bubble = {}
    bubble_to_graph = {}
    for key in pair_list:
        comp = pairmap[key]
        subg = G.subgraph(comp)
        contigs = key.split('$')
        to_check = True
        for each in comp:
            if each in members:
                to_check = False
                break

        if to_check:
            res = test_pair(subg, contigs[0], contigs[1], comp)
            #component is a valid boubble
            if res:
                #add valid members to the members:
                for each in comp:
                    members[each] = 1
                    member_to_bubble[each] = str(valid_bubble_id)

                #store the source and sink of the bubble
                valid_sources[contigs[0]] = 1
                valid_sink[contigs[1]] = 1
                valid_bubbles[valid_bubble_id] = subg
                bubble_id_to_sink[valid_bubble_id] = contigs[1]
                bubble_id_to_source[valid_bubble_id] = contigs[0]
                source_to_bubble[contigs[0]] = str(valid_bubble_id)
                sink_to_bubble[contigs[1]] = str(valid_bubble_id)
                bubble_to_graph[str(valid_bubble_id)] = subg
                valid_bubble_id += 1
                line = ''
                for each in subg.nodes():
                    line += str(each) + '\t'
                bub_output.write(line + '\n')

            else:

                res = test_pair(subg, contigs[1], contigs[0], comp)
                if res:
                    #add valid members to the members:
                    for each in comp:
                        members[each] = 1
                        member_to_bubble[each] = str(valid_bubble_id)
                    #store the source and sink of the bubble
                    valid_sources[contigs[1]] = 1
                    valid_sink[contigs[0]] = 1
                    valid_bubbles[valid_bubble_id] = subg
                    bubble_id_to_sink[valid_bubble_id] = contigs[0]
                    bubble_id_to_source[valid_bubble_id] = contigs[1]
                    source_to_bubble[contigs[1]] = str(valid_bubble_id)
                    sink_to_bubble[contigs[0]] = str(valid_bubble_id)
                    bubble_to_graph[str(valid_bubble_id)] = subg
                    valid_bubble_id += 1
                    line = ''
                    for each in subg.nodes():
                        line += str(each) + '\t'
                    bub_output.write(line + '\n')
    '''
	2. okay now we have all the valid bubbles. Create a new graph and add the edges which are not in the bubbles first, 
	Then deal with other things.
	'''

    G_new = nx.DiGraph()
    '''
	Now add nodes for the collapsed bubbles
	'''
    for key in valid_bubbles:
        G_new.add_node(str(key))

    for u, v, data in G.edges(data=True):
        if u not in members and v not in members:
            G_new.add_edge(u, v, data)

        if u not in members and v in members:
            G_new.add_edge(u, member_to_bubble[v], data)

        if v not in members and u in members:
            G_new.add_edge(member_to_bubble[u], v, data)
    '''
	Now add edges from all other nodes to sources and sinks if exist
	'''
    for node in G.nodes():
        if node not in valid_sources and node not in valid_sink:
            for source in valid_sources:
                if G.has_edge(node, source):
                    data = G.get_edge_data(node, source)
                    data['orientation'] = data['orientation'][0] + 'B'
                    G_new.add_edge(node, source_to_bubble[source], data)

                # if G.has_edge(source,node):
                # 	data = G.get_edge_data(source,node)
                # 	G_new.add_edge(source_to_bubble[source],node,data)

            for sink in valid_sink:
                if G.has_edge(sink, node):
                    data = G.get_edge_data(sink, node)
                    data['orientation'] = 'E' + data['orientation'][1]
                    G_new.add_edge(sink_to_bubble[sink], node, data)
                # if G.has_edge(node,sink):
                # 	data = G.get_edge_data(node,sink)
                # 	G_new.add_edge(node,sink_to_bubble[sink],data)
    '''
	Now finally add edges between sources and sinks if they are in original graphs
	'''

    for source in source_to_bubble:
        for sink in sink_to_bubble:
            if source_to_bubble[source] != sink_to_bubble[sink]:
                if G.has_edge(source, sink):
                    data = G.get_edge_data(source, sink)
                    data['orientation'] = 'BE'
                    G_new.add_edge(source_to_bubble[source],
                                   sink_to_bubble[sink], data)

                if G.has_edge(sink, source):
                    data = G.get_edge_data(sink, source)
                    data['orientation'] = 'EB'
                    G_new.add_edge(sink_to_bubble[sink],
                                   source_to_bubble[source], data)
    '''
	Add node attributes now
	'''
    node_info = {}
    for node in G.nodes(data=True):
        node_info[node[0]] = node[1]

    for node in G_new.nodes(data=True):
        if node[0] in node_info:
            info = node_info[node[0]]
            for each in info:
                node[1][each] = info[each]
            node[1]['type'] = 'contig'
        else:
            node[1]['type'] = 'bubble'

    # print G_new.has_edge('k99_79977','k99_192814')
    # in_bubble = {}
    # valid_source_sink = []
    # all_bubble_paths = {} #stores all heaviest paths in bubble
    # source_and_sinks = {}
    # '''
    # Here, first validate each source sink pair. To do this, sort them with largest number of nodes in the
    # biconnected component.
    # '''
    # #pair_list = sorted(pairmap, key=lambda k: len(pairmap[k]), reverse=True)

    # # for key in pair_list:
    # # 	print pairmap[key]

    # comp_to_id = {}
    # id_to_comp = {}
    # comp_to_pair = {}
    # id_to_longest_path = {}
    # comp2pairs = {}
    # prev_comp = ''
    # id = 1
    # for key in pair_list:
    # 	comp = pairmap[key]
    # 	if comp[0] == prev_comp:
    # 		continue

    # 	comp_to_id[comp[0]] = str(id)
    # 	comp2pairs[str(id)] = []
    # 	id_to_comp[str(id)] = comp
    # 	comp_to_pair[str(id)] = []
    # 	id_to_longest_path[str(id)] = -1
    # 	id += 1
    # 	prev_comp = comp[0]

    # for key in pair_list:
    # 	c = pairmap[key][0]
    # 	comp_id = comp_to_id[c]
    # 	comp_to_pair[comp_id].append(key)

    # valid_comps = {}

    # for key in pair_list:
    # 	contigs = key.split('$')
    # 	'''
    # 	First find the subgraph of bicomponent. Check if current source sink pair is longer that previously
    # 	validated source sink pair. If yes then only validate current source sink pair.
    # 	'''
    # 	subg = G.subgraph(pairmap[key])
    # 	comp_id = pairmap[key][0]
    # 	comp_id = comp_to_id[comp_id]
    # 	res = test_pair(G,contigs[0],contigs[1],pairmap[key])

    # 	if res:

    # 		cnt += 1
    # 		#validated[contigs[0]] = 1
    # 		source_and_sinks[contigs[0]] = 1
    # 		source_and_sinks[contigs[1]] = 1
    # 		#validated[contigs[1]] = 1
    # 		#subg = G.subgraph(comp)
    # 		valid_comps[comp_id] = 1

    # source = {}
    # sink = {}
    # source_sink_to_comp = {}
    # #print len(valid_comps)
    # cnt = 0
    # bubble_to_graph = {}
    # for key in valid_comps:
    # 	pairs = comp_to_pair[key]
    # 	#print "Length of pairs = " + str(len(pairs))
    # 	subg = G.subgraph(id_to_comp[key])
    # 	if not nx.is_directed_acyclic_graph(subg):
    # 		subg = make_acyclic(subg)
    # 	if nx.is_directed_acyclic_graph(subg):

    # 		#print subg.nodes()
    # 		max_path = 0
    # 		max_pair = -1
    # 		#print pairs
    # 		for pair in pairs:
    # 			#print pair
    # 			pair1 = pair.split('$')
    # 			no_paths = no_of_paths(subg,pair1[0],pair1[1])
    # 			if no_paths > max_path:
    # 				max_path = no_paths
    # 				max_pair = pair

    # 		if max_pair != -1:
    # 			# print "max_path = " + str(max_path)
    # 			# print "max_pair = " + str(max_pair)
    # 			# paths = get_variants(subg,max_pair.split('$')[0],max_pair.split('$')[1])
    # 			# print paths
    # 			cnt += 1
    # 			bubble_to_graph[key] = subg
    # 			line = ''
    # 			for each in subg.nodes():
    # 				line += str(each)+'\t'
    # 			bub_output.write(line+'\n')
    # 			valid_source_sink.append(max_pair)
    # 			source[max_pair.split('$')[0]] = 1
    # 			sink[max_pair.split('$')[1]] = 1
    # 			source_sink_to_comp[max_pair.split('$')[0]] = key
    # 			source_sink_to_comp[max_pair.split('$')[1]] = key
    # 			for contig in id_to_comp[key]:
    # 				in_bubble[contig] = 1
    # 				validated[contig] = 1
    # 	# else:
    # 	# 	subg = make_acyclic

    # #print cnt

    # '''
    # Here, find now the new graph by collapsing bubbles
    # TODO: Preserve node and edge attributes from the original non-collapsed graph
    # '''
    # #node to info map
    # node_info = {}
    # for node in G.nodes(data=True):
    # 	node_info[node[0]] = node[1]
    # G_new = nx.DiGraph()

    # # print source
    # # print sink
    # # for each in source:
    # # 	print len(G.in_edges(each))

    # # for each in sink:
    # # 	print len(G.out_edges(each))

    # # print source
    # # print sink
    # for key in valid_comps:
    # 	G_new.add_node(str(key))
    # for u,v,data in G.edges(data=True):
    # 	if u not in validated and v not in validated:
    # 		G_new.add_edge(u,v,data)

    # for node in G.nodes():
    # 	if node not in source and node not in sink:
    # 		for each in source:
    # 			if G.has_edge(node,each):
    # 				#print 'here'
    # 				data = G.get_edge_data(node,each)
    # 				G_new.add_edge(node,source_sink_to_comp[each],data)
    # 		for each in sink:
    # 			if G.has_edge(each,node):
    # 				#print 'here'
    # 				data = G.get_edge_data(each,node)
    # 				G_new.add_edge(source_sink_to_comp[each],node,data)

    # for s in source:
    # 	for t in sink:
    # 		if source_sink_to_comp[s] != source_sink_to_comp[t]:
    # 			if G.has_edge(s,t):
    # 				data = G.get_edge_data(s,t)
    # 				G_new.add_edge(source_sink_to_comp[s],source_sink_to_comp[t],data)
    # 			if G.has_edge(t,s):
    # 				data = G.get_edge_data(t,s)
    # 				G_new.add_edge(source_sink_to_comp[t],source_sink_to_comp[s],data)

    # for node in G_new.nodes(data=True):
    # 	if node[0] in node_info:
    # 		info = node_info[node[0]]
    # 		for each in info:
    # 			node[1][each] = info[each]
    # 		node[1]['type'] = 'contig'
    # 	else:
    # 		node[1]['type'] = 'bubble'
    #node[1]['size'] = len(bubble_to_graph[node[0]].nodes())
    # '''
    # Output the simplified Graph
    # '''
    # # for node in G_new.nodes(data=True):
    # # 	#print node
    # # 	m = node[1]
    # # 	node[1]['color'] = colmap[node[0]]
    # #nx.set_node_attribute(G_new,'color',colmap)
    # print len(G_new.nodes())
    # print len(G_new.edges())
    # #nx.write_gexf(G_new,'simplified.gexf')
    # #write_dot(G_new,'simplified.dot')
    # nx.write_gml(G_new,'simplified.gml')
    '''
	In this simplified, for each weakly connected component, find out the heaviest linear path. If path
	goes through the bubble, choose the heaviest path in the bubble and continue
	'''
    alternative_contigs = [
    ]  #this stores all variants. Tag these as variants while writing to file
    primary_contigs = []
    for subg in nx.weakly_connected_component_subgraphs(G_new):
        #print subg.nodes()
        # print 'here'
        #First get all edges
        edges = subg.edges(data=True)
        #sort edges by weights
        sorted_edges = sorted(edges,
                              key=lambda tup: tup[2]['bsize'],
                              reverse=True)
        #print sorted_edges
        #create a new graph
        G_sorted = nx.Graph()
        #add edges to this graph until for is created, this will be undirected graph and it will have
        #'B' and 'E' nodes
        nodes = set()
        for edge in sorted_edges:
            u = edge[0]
            v = edge[1]
            data = edge[2]
            orientation = data['orientation']

            u = u + '$' + orientation[0]
            v = v + '$' + orientation[1]
            if u not in G_sorted.nodes() and v not in G_sorted.nodes():
                G_sorted.add_edge(u, v, data)
                nodes.add(u.split('$')[0])
                nodes.add(v.split('$')[0])
        #add edges between B and E nodes of same contig
        for node in nodes:
            G_sorted.add_edge(node + '$B', node + '$E')

        #print len(G_sorted.edges())

        #now trace out all linear paths in this, each will be a scaffold
        for small_subg in nx.connected_component_subgraphs(G_sorted):
            #print small_subg.edges()
            p = []
            for node in small_subg.nodes():
                if small_subg.degree(node) == 1:
                    p.append(node)

            if len(p) == 2:
                path = nx.shortest_path(small_subg, p[0], p[1])

                #print path
                #if path has a bubble node, insert the contigs on the heaviest path on the bubble
                new_path = []
                new_path_ind = 0
                for i in xrange(1, len(path), 2):
                    node = path[i].split('$')[0]
                    if node not in bubble_to_graph:
                        new_path.append(path[i - 1])
                        new_path.append(path[i])
                        new_path_ind += 2
                        continue

                    bubble_graph = bubble_to_graph[node]
                    #print node
                    curr_source = ''
                    curr_sink = ''
                    for node1 in bubble_graph.nodes():
                        if node1 in source_to_bubble:
                            curr_source = node1
                        if node1 in sink_to_bubble:
                            curr_sink = node1
                    try:
                        bubble_paths = get_variants(bubble_graph, curr_source,
                                                    curr_sink)
                    except:
                        continue

                    heaviest = bubble_paths[0]

                    #print "HEAVIEST: " + str(heaviest)
                    # if len(heaviest) == 1:
                    # 	continue

                    ori = path[i - 1].split('$')[1] + path[i].split('$')[1]
                    if ori == "EB":
                        heaviest.reverse()

                    for each in heaviest:
                        #print 'appending heaviest'
                        # print each
                        orient = G.node[each]['orientation']
                        if orient == 'FOW':
                            new_path.append(each + '$B')
                            new_path.append(each + '$E')
                            new_path_ind += 2

                        if orient == 'REV':
                            new_path.append(each + '$E')
                            new_path.append(each + '$B')
                            new_path_ind += 2

                    alt_paths = get_alternative_paths(bubble_graph, heaviest)
                    if len(alt_paths) > 0:
                        for i in xrange(0, len(alt_paths)):
                            #print 'in alternate path'
                            alt_path = []
                            curr_path = alt_paths[i]
                            for each in curr_path:
                                o_node = G.node
                                if G.node[each]['orientation'] == 'FOW':
                                    alt_path.append(each + '$B')
                                    alt_path.append(each + '$E')

                                if G.node[each]['orientation'] == 'REV':
                                    alt_path.append(each + '$E')
                                    alt_path.append(each + '$B')

                            alternative_contigs.append(alt_path)
                primary_contigs.append(new_path)
                #print new_path

    # print len(primary_contigs)
    # print alternative_contigs
    assembly = open(args.assembly, 'r')
    sequences = parse_fasta(assembly.readlines())
    ofile = open(args.output, 'w')
    scaffolded = {}
    agpfile = open(args.agp, 'w')
    scaffold_id = 1
    for scaffold in primary_contigs:
        scaff_string = ''
        line = ''
        scaff_len = 0
        begin = 1
        local_comp = 0
        curr_contig = ''
        for i in xrange(0, len(scaffold) - 1, 2):
            line += 'scaffold_' + str(scaffold_id)
            line += '\t'
            line += str(begin) + '\t'
            curr = scaffold[i]
            next = scaffold[i + 1]
            curr_len = len(sequences[curr.split('$')[0]])
            scaff_len += curr_len
            last = curr_len + begin - 1
            line += str(last) + '\t'
            begin = last + 1
            line += str(local_comp) + '\t'
            local_comp += 1
            scaffolded[curr.split('$')[0]] = True
            scaffolded[next.split('$')[0]] = True
            contig = curr.split('$')[0]
            line += ('W\t' + contig + '\t1\t' + str(curr_len) + '\t')
            start = curr.split('$')[1]
            end = next.split('$')[1]
            if start == 'B' and end == 'E':
                scaff_string += sequences[contig]
                line += '+'
            else:
                scaff_string += revcompl(sequences[contig])
                line += '-'
            agpfile.write(line + '\n')
            line = ''
            if i != len(scaffold) - 2:
                for j in xrange(0, 100):
                    scaff_string += 'N'

        chunks = [
            scaff_string[i:i + 80] for i in xrange(0, len(scaff_string), 80)
        ]
        ofile.write('>scaffold_' + str(scaffold_id) + '\n')
        for chunk in chunks:
            ofile.write(chunk + '\n')
        scaffold_id += 1

    for scaffold in alternative_contigs:
        scaff_string = ''
        line = ''
        scaff_len = 0
        begin = 1
        local_comp = 0
        curr_contig = ''

        for i in xrange(0, len(scaffold) - 1, 2):
            line += 'scaffold_' + str(scaffold_id) + '_variant'
            line += '\t'
            line += str(begin) + '\t'
            curr = scaffold[i]
            next = scaffold[i + 1]
            curr_len = len(sequences[curr.split('$')[0]])
            scaff_len += curr_len
            last = curr_len + begin - 1
            line += str(last) + '\t'
            begin = last + 1
            line += str(local_comp) + '\t'
            local_comp += 1
            scaffolded[curr.split('$')[0]] = True
            scaffolded[next.split('$')[0]] = True
            contig = curr.split('$')[0]
            line += ('W\t' + contig + '\t1\t' + str(curr_len) + '\t')
            start = curr.split('$')[1]
            end = next.split('$')[1]
            if start == 'B' and end == 'E':
                scaff_string += sequences[contig]
                line += '+'
            else:
                scaff_string += revcompl(sequences[contig])
                line += '-'
            agpfile.write(line + '\n')
            line = ''
            if i != len(scaffold) - 2:
                for j in xrange(0, 100):
                    scaff_string += 'N'
        chunks = [
            scaff_string[i:i + 80] for i in xrange(0, len(scaff_string), 80)
        ]
        ofile.write('>scaffold_' + str(scaffold_id) + '_variant\n')
        for chunk in chunks:
            ofile.write(chunk + '\n')
        scaffold_id += 1

    for contig in sequences:
        if contig not in scaffolded:
            scaff_string = sequences[contig]
            chunks = [
                scaff_string[i:i + 80]
                for i in xrange(0, len(scaff_string), 80)
            ]
            line = ''
            line += 'scaffold_' + str(scaffold_id) + '\t'
            line += '0\t'
            line += str(len(scaff_string)) + '\t'
            line += '1\t'
            line += 'W\t' + contig + '\t1\t' + str(len(scaff_string)) + '\t+'
            agpfile.write(line + '\n')
            ofile.write('>scaffold_' + str(scaffold_id) + '\n')
            for chunk in chunks:
                ofile.write(chunk + '\n')
            scaffold_id += 1

    ofile.close()
Ejemplo n.º 41
0
def get_largest_wcc(G):
    print("Getting largest WCC...")
    largest_weakly = max(nx.weakly_connected_component_subgraphs(G), key=len)
    return largest_weakly
Ejemplo n.º 42
0
def compartmentalize_skeletongroup( skeleton_id_list, project_id, **kwargs ):

    skelgroup = SkeletonGroup( skeleton_id_list, project_id )

    compartment_graph_of_skeletons = {}
    resultgraph = nx.DiGraph()

    for skeleton_id, skeleton in skelgroup.skeletons.items():
        if 'confidence_threshold' in kwargs:
            confidence_filtering( skeleton, kwargs['confidence_threshold'] )
        elif 'edgecount' in kwargs:
            edgecount_filtering( skeleton, kwargs['edgecount'] )

        subgraphs = list(nx.weakly_connected_component_subgraphs( skeleton.graph))
        compartment_graph_of_skeletons[ skeleton_id ] = subgraphs

        for i,subg in enumerate(subgraphs):
            for nodeid, d in subg.nodes(data=True):
                d['compartment_index'] = i
                skeleton.graph.nodes[nodeid]['compartment_index'] = i

            if len(skeleton.neuron.name) > 30:
                neuronname = f'{skeleton.neuron.name[:30]}... [{i}]'
            else:
                neuronname = f'{skeleton.neuron.name} [{i}]'

            resultgraph.add_node(f'{skeleton_id}_{i}', **{
                'neuronname': neuronname,
                'skeletonid': str(skeleton_id),
                'compartment_index': i,
                'node_count': subg.number_of_nodes(),
            })

    connectors:Dict = {}
    for skeleton_id, skeleton in skelgroup.skeletons.items():
        for connector_id, v in skeleton.connected_connectors.items():
            if connector_id not in connectors:
                connectors[connector_id] = {
                    'pre': [], 'post': []
                }

            if len(v['presynaptic_to']):
                # add the skeleton id for each treenode that is in v['presynaptic_to']
                # This can duplicate skeleton id entries which is correct
                for e in v['presynaptic_to']:
                    skeleton_compartment_id = f'{skeleton_id}_{skeleton.graph.nodes[e]["compartment_index"]}'
                    connectors[connector_id]['pre'].append( skeleton_compartment_id )

            if len(v['postsynaptic_to']):
                for e in v['postsynaptic_to']:
                    skeleton_compartment_id = f'{skeleton_id}_{skeleton.graph.nodes[e]["compartment_index"]}'
                    connectors[connector_id]['post'].append( skeleton_compartment_id )

    # merge connectors into graph
    for connector_id, v in connectors.items():
        for from_skeleton in v['pre']:
            for to_skeleton in v['post']:

                if not resultgraph.has_edge( from_skeleton, to_skeleton ):
                    resultgraph.add_edge(from_skeleton, to_skeleton, **{
                        'count': 0,
                        'connector_ids': set(),
                    })

                resultgraph.edge[from_skeleton][to_skeleton]['count'] += 1
                resultgraph.edge[from_skeleton][to_skeleton]['connector_ids'].add( connector_id )


    return resultgraph
Ejemplo n.º 43
0
    ######################################################################
    # Store figures of call graph
    ######################################################################

    if options['output']:
        name2URL = lambda name: name2URLheader(name, file_list)

        # Draw the whole call graph
        A = graphBrownie.to_abigraph(options['styles'],
                                     type=defs_type,
                                     max_label=40,
                                     name2URL=name2URL)
        A.layout("dot")
        nameOut, extOut = os.path.splitext(options['output'])
        A.draw("%s%s" % (nameOut, extOut))

        # Draw subgraphs if needed
        if options['subgraphs']:
            from networkx import to_agraph, weakly_connected_component_subgraphs
            i = 0
            for subgraphBrownie in weakly_connected_component_subgraphs(\
                graphBrownie.clone(type=defs_type)):
                if len(subgraphBrownie.nodes()) > 1:
                    i += 1
                    # A = subgraphBrownie.to_agraph()
                    A = subgraphBrownie.to_abigraph(options['styles'],
                                                    max_label=40)
                    A.layout('dot')
                    A.draw("%s%i%s" % (nameOut, i, extOut))
Ejemplo n.º 44
0
    def _parse_loop_graph(self, subg, bigg):
        """
        Create a Loop object for a strongly connected graph, and any strongly
        connected subgraphs, if possible.

        :param subg:    A strongly connected subgraph.
        :param bigg:    The graph which subg is a subgraph of.

        :return:        A list of Loop objects, some of which may be inside others,
                        but all need to be documented.
        """
        loop_body_nodes = subg.nodes()[:]
        entry_edges = []
        break_edges = []
        continue_edges = []
        entry_node = None
        for node in loop_body_nodes:
            for pred_node in bigg.predecessors(node):
                if pred_node not in loop_body_nodes:
                    if entry_node is not None and entry_node != node:
                        l.warning("Bad loop: more than one entry point (%s, %s)", entry_node, node)
                        return None, []
                    entry_node = node
                    entry_edges.append((pred_node, node))
                    subg.add_edge(pred_node, node)
            for succ_node in bigg.successors(node):
                if succ_node not in loop_body_nodes:
                    break_edges.append((node, succ_node))
                    subg.add_edge(node, succ_node)
        if entry_node is None:
            entry_node = min(loop_body_nodes, key=lambda n: n.addr)
            l.info("Couldn't find entry point, assuming it's the first by address (%s)", entry_node)

        acyclic_subg = subg.copy()
        for pred_node in subg.predecessors(entry_node):
            if pred_node in loop_body_nodes:
                continue_edge = (pred_node, entry_node)
                acyclic_subg.remove_edge(*continue_edge)
                continue_edges.append(continue_edge)

        removed_exits = {}
        removed_entries = {}
        tops, alls = self._parse_loops_from_graph(acyclic_subg)
        for subloop in tops:
            if subloop.entry in loop_body_nodes:
                # break existing entry edges, exit edges
                # re-link in loop object
                # the exception logic is to handle when you have two loops adjacent to each other
                # you gotta link the two loops together and remove the dangling edge
                for entry_edge in subloop.entry_edges:
                    try:
                        subg.remove_edge(*entry_edge)
                    except networkx.NetworkXError:
                        if entry_edge in removed_entries:
                            subg.add_edge(removed_entries[entry_edge], subloop)
                            try:
                                subg.remove_edge(removed_entries[entry_edge], entry_edge[1])
                            except networkx.NetworkXError:
                                pass
                        else:
                            raise
                    else:
                        subg.add_edge(entry_edge[0], subloop)
                        removed_entries[entry_edge] = subloop
                for exit_edge in subloop.break_edges:
                    try:
                        subg.remove_edge(*exit_edge)
                    except networkx.NetworkXError:
                        if exit_edge in removed_entries:
                            subg.add_edge(subloop, removed_entries[exit_edge])
                            try:
                                subg.remove_edge(exit_edge[0], removed_entries[exit_edge])
                            except networkx.NetworkXError:
                                pass
                        else:
                            raise
                    else:
                        subg.add_edge(subloop, exit_edge[1])
                        removed_exits[exit_edge] = subloop
                subg = filter(lambda g: entry_node in g.nodes(),
                        networkx.weakly_connected_component_subgraphs(subg))[0]

        me = Loop(entry_node,
             entry_edges,
             break_edges,
             continue_edges,
             loop_body_nodes,
             subg,
             tops[:])
        return me, [me] + alls
Ejemplo n.º 45
0
        # S 238024 ACCAATTAT KC:i:37210
        if line_type == "S":
            v_name = int(line[1])
            v_length = len(line[2])
            G.add_node(v_name, length=v_length)

        # L 238322 + 19590 - 55M
        if line_type == "L":
            v1 = int(line[1])
            v2 = int(line[3])
            G.add_edge(v1, v2)

# remain only largest component
new_G = nx.DiGraph()
for g in nx.weakly_connected_component_subgraphs(G):
    #print(g.number_of_nodes())
    if new_G.number_of_nodes() < g.number_of_nodes():
        new_G = g.copy()
G = new_G.copy()

# Табличка с референсами
# Считываем файл ответа, как он есть
df_ref = pd.read_csv("refs/refs_edges.txt", header=None, names=["e"])
df_ref = df_ref["e"].str.split('\t', 1, expand=True)
df_ref.columns = ["e_id", "strains"]
df_ref = df_ref.set_index("e_id")
df_ref.index = df_ref.index.astype("int")

# Оставляем только ребра из большой компоненты:
df_ref = df_ref.loc[list(G.nodes)]
Ejemplo n.º 46
0
def supplementary4():

    #Correlation of BONITA's node impact score with graph theoretical measures

    codes = getCodes()
    maxReps = 6
    ersAllNodes = {}
    rnAllNodes = {}
    ruleLengths = {}
    allRes = {}  #three-layered dictionary to store results

    for code in codes:
        temp_df2 = pd.DataFrame()
        allRes[str(code)] = {}
        originalGraph = nx.read_gpickle("gpickles/" + code + ".gpickle")
        graph = originalGraph

        #Graph theoretic measures
        graph = max(
            nx.weakly_connected_component_subgraphs(originalGraph), key=len
        )  # get around the problem of disconnected graphs # Refs: https://stackoverflow.com/questions/26637644/in-r-how-do-igraph-and-statnet-handle-disconnected-graphs-in-measuring-network, http://reports-archive.adm.cs.cmu.edu/anon/isr2011/CMU-ISR-11-113.pdf,

        if len(graph) >= 3:
            eigenCentrality = nx.eigenvector_centrality_numpy(
                graph
            )  # get around the problem of failing when there are multiple eigenvalues with the same (largest) magnitude, perhaps when there are few peripheral nodes (star-like graph). See: https://stackoverflow.com/questions/43208737/using-networkx-to-calculate-eigenvector-centrality?rq=1
            hubs, authorities = nx.hits(
                graph, max_iter=10000, tol=1.0e-7, normalized=True
            )  #changed parameters so that calculation convergences. For alternative approach see: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.415.843&rep=rep1&type=pdf. Now running into divide by zero error, not sure how to fix that apart from remove normalization; this would be very wrong though. Edit: increased max_iter to 10000 and this seems to have fixed the problem. NB: the original publication suggests a max_iter of 20
            degreeCentrality = nx.degree_centrality(graph)
            cfCentrality = nx.current_flow_closeness_centrality(
                graph.to_undirected())
            eccentCentrality = nx.eccentricity(graph.to_undirected())
            betweenCentrality = nx.betweenness_centrality(graph)

            for iteration in range(1, maxReps):
                allRes[str(code)][str(iteration)] = {}
                pickleFile = str('pickles/' + code + '_' + str(iteration) +
                                 '_local1.pickle')
                outputList = pickle.load(
                    open(pickleFile, 'rb')
                )  #python2 version #outputList=pickle.load(open(pickleFile, 'rb'), encoding='latin1') #python3 version
                bruteOut1, dev, storeModel, storeModel3, equivalents, dev2 = [
                    outputList[k] for k in range(len(outputList))
                ]
                model1 = modelHolder(storeModel3)

                if os.path.isfile(
                        "pickles/'+code+'_'+str(iteration)+'_scores1.pickle"):
                    pathVals = pickle.Unpickler(
                        open(
                            'pickles/' + code + '_' + str(iteration) +
                            '_scores1.pickle', "rb")).load()
                    ImportanceVals = {}
                else:
                    print(
                        "Importance scores not found, setting all values to 0")
                    isGeneric4 = True
                    pathVals = [0] * len(model1.nodeList)

                for node in range(0, len(model1.nodeList)):
                    allRes[str(code)][str(iteration)][str(
                        model1.nodeList[node])] = []
                    ImportanceVals = pathVals[node]
                    inDegree = originalGraph.in_degree(model1.nodeList[node])
                    if model1.nodeList[node] in graph.nodes(
                    ):  #remember that we have just selected the largest component of the graph for graph theoretic analysis
                        allRes[str(code)][str(iteration)][str(
                            model1.nodeList[node])] = [
                                ImportanceVals,
                                degreeCentrality[model1.nodeList[node]],
                                eigenCentrality[model1.nodeList[node]],
                                hubs[model1.nodeList[node]],
                                authorities[model1.nodeList[node]], inDegree,
                                cfCentrality[model1.nodeList[node]],
                                eccentCentrality[model1.nodeList[node]],
                                betweenCentrality[model1.nodeList[node]]
                            ]
                    else:
                        allRes[str(code)][str(iteration)][str(
                            model1.nodeList[node])] = [
                                ImportanceVals,
                                float('NaN'),
                                float('NaN'),
                                float('NaN'),
                                float('NaN'), inDegree,
                                float('NaN'),
                                float('NaN'),
                                float('NaN')
                            ]
        else:
            continue

    allRes_flat = flatdict.FlatDict(allRes)
    allRes_df = pd.DataFrame(allRes_flat.iteritems())

    allRes_df[[
        "ImportanceVals", "degreeCentrality", "eigenCentrality", "hubs",
        "auth", "inDegree", "cfCentrality", "eccentCentrality",
        "betweenCentrality"
    ]] = pd.DataFrame(
        [item for sublist in allRes_df[[1]].values for item in sublist],
        index=allRes_df.index)
    allRes_df[["Pathway", "Iteration", "Node"]] = pd.DataFrame(
        [x[0].split(":", 2) for x in allRes_df[[0]].values],
        index=allRes_df.index)
    allRes_df[[
        "ImportanceVals", "degreeCentrality", "eigenCentrality", "hubs",
        "auth", "inDegree", "cfCentrality", "eccentCentrality",
        "betweenCentrality"
    ]] = allRes_df[[
        "ImportanceVals", "degreeCentrality", "eigenCentrality", "hubs",
        "auth", "inDegree", "cfCentrality", "eccentCentrality",
        "betweenCentrality"
    ]].apply(pd.to_numeric, axis=1)

    # Aggregate results by iteration
    allRes_df = allRes_df.groupby(['Node', 'Pathway'])
    allRes_df = allRes_df["ImportanceVals", "degreeCentrality",
                          "eigenCentrality", "hubs", "auth", "cfCentrality",
                          "eccentCentrality", "betweenCentrality"].agg(np.mean)
    # Overall Pearson correlation between importance metrics
    sns.set_context(context='paper', font_scale=1.1)
    sns.set_style("ticks")
    #fig, ax = plt.subplots(figsize=[5.2,4])
    temp_correl = allRes_df.loc[:, [
        "ImportanceVals", "degreeCentrality", "eigenCentrality", "hubs",
        "auth", "cfCentrality", "eccentCentrality", "betweenCentrality"
    ]].corr(method='pearson')
    mask = np.triu(temp_correl, k=1)
    figTemp = sns.heatmap(
        temp_correl,
        xticklabels=[
            "BONITA Score", "Degree Centrality", "Eigenvector Centrality",
            "Hub Score", "Authority", "Current Flow Centrality",
            "Eccentricity Centrality", "Betweenness Centrality"
        ],
        yticklabels=[
            "BONITA Score", "Degree Centrality", "Eigenvector Centrality",
            "Hub Score", "Authority", "Current Flow Centrality",
            "Eccentricity Centrality", "Betweenness Centrality"
        ],
        mask=mask,
        square=True,
        vmax=1,
        vmin=-1,
        center=0,
        cmap='RdBu_r',
        linewidths=.5,
        cbar_kws={
            "shrink": .5,
            'label': 'Pearson Correlation'
        },
        annot=True)  #, annot_kws={'fontsize': 'large'})

    plt.xticks(rotation=90)
    figTemp.figure.tight_layout()
    figTemp = figTemp.get_figure()
    figTemp.savefig("Overall_Pearson_correlation.svg")
    plt.close()

    # Overall Spearman correlation between importance metrics
    sns.set_context(context='paper', font_scale=1.1)
    sns.set_style("ticks")
    #fig, ax = plt.subplots(figsize=[5.2,4])
    temp_correl = allRes_df.loc[:, [
        "ImportanceVals", "degreeCentrality", "eigenCentrality", "hubs",
        "auth", "cfCentrality", "eccentCentrality", "betweenCentrality"
    ]].corr(method='spearman')
    mask = np.triu(temp_correl, k=1)
    figTemp = sns.heatmap(
        temp_correl,
        xticklabels=[
            "BONITA Score", "Degree Centrality", "Eigenvector Centrality",
            "Hub Score", "Authority", "Current Flow Centrality",
            "Eccentricity Centrality", "Betweenness Centrality"
        ],
        yticklabels=[
            "BONITA Score", "Degree Centrality", "Eigenvector Centrality",
            "Hub Score", "Authority", "Current Flow Centrality",
            "Eccentricity Centrality", "Betweenness Centrality"
        ],
        mask=mask,
        square=True,
        vmax=1,
        vmin=-1,
        center=0,
        cmap='RdBu_r',
        linewidths=.5,
        cbar_kws={
            "shrink": .5,
            'label': 'Spearman Correlation'
        },
        annot=True)  #, annot_kws={'fontsize': 'large'})

    plt.xticks(rotation=90)
    figTemp.figure.tight_layout()
    figTemp = figTemp.get_figure()
    figTemp.savefig("Overall_Spearman_correlation.svg")
    plt.close()