Python isolatesの例、networkx.isolates Pythonの例

コード例 #1

0

ファイルを表示

ファイル: utils.py プロジェクト: russelljjarvis/3Drodent

 def dumpjson_graph(self):
     assert self.COMM.rank==0        
     import json
     import networkx as nx
     from networkx.readwrite import json_graph
     h=self.h
     #import pickle
     #json_graph.node_link_graph
     #Create a whole network of both transmitter types.
     self.global_whole_net=nx.compose(self.global_ecg, self.global_icg)
     self.global_whole_net.remove_nodes_from(nx.isolates(self.global_whole_net))
     self.global_icg.remove_nodes_from(nx.isolates(self.global_icg))
     self.global_ecg.remove_nodes_from(nx.isolates(self.global_ecg))
     
     d =[]
     whole=nx.to_numpy_matrix(self.global_whole_net)  
     #TODO sort whole (network) here in Python, as Python is arguably easier to understand than JS. 
     d.append(whole.tolist()) 
     #d.append(self.global_whole_net.tolist())
     #d.append(json_graph.node_link_data(self.global_whole_net))                 
     d.append(self.global_namedict)
     json.dump(d, open('web/js/global_whole_network.json','w'))
     d=json.load(open('web/js/global_whole_network.json','r'))
     #read the object just to prove that is readable.
     d=None #destroy the object.    
     print('Wrote JSON data to web/js/network.json')
 
     print('Wrote node-link JSON data to web/js/network.json')

コード例 #2

0

ファイルを表示

ファイル: syntheticThreeLayerGraph_time.py プロジェクト: mboudour/GraphMultilayerity

def create_3comms_bipartite(n,m,p,No_isolates=True):
    
    import community as comm

    from networkx.algorithms import bipartite as bip
    u=0
    while  True:
        G=nx.bipartite_random_graph(n,m,p)
        list_of_isolates=nx.isolates(G)
        if No_isolates:
            G.remove_nodes_from(nx.isolates(G))
        partition=comm.best_partition(G)
        sel=max(partition.values())
        if sel==2 and nx.is_connected(G):
            break
        u+=1
        print u,sel
    ndlss=bip.sets(G)
    ndls=[list(i) for i in ndlss]
    slayer1=ndls[0]
    slayer2=ndls[1]
    layer1=[i for i,v in partition.items() if v==0]
    layer2=[i for i,v in partition.items() if v==1]
    layer3=[i for i,v in partition.items() if v==2]
    edgeList=[]
    for e in G.edges():
        if (e[0] in slayer1 and e[1] in slayer2) or (e[0] in slayer2 and e[1] in slayer1):
            edgeList.append(e)
    return G,layer1,layer2,layer3,slayer1,slayer2,edgeList,partition

コード例 #3

0

ファイルを表示

ファイル: syntheticThreeLayerGraph_time.py プロジェクト: mboudour/GraphMultilayerity

def synthetic_three_level(n,p1,p2,p3,J_isolates=False,F_isolates=False,D_isolates=False):#,isolate_up=True,isolate_down=True):
    
    k=n

    J=nx.erdos_renyi_graph(n,p1) #The first layer graph
    Jis = nx.isolates(J)
    F=nx.erdos_renyi_graph(n,p2) #The second layer graph
    Fis = nx.isolates(F)
    D=nx.erdos_renyi_graph(n,p3) #The third layer graph
    Dis = nx.isolates(D)

    def translation_graph(J,F,D):
        H1=nx.Graph()
        H2=nx.Graph()
        for i in range(n):
            H1.add_edges_from([(J.nodes()[i],F.nodes()[i])])
            H2.add_edges_from([(F.nodes()[i],D.nodes()[i])])
        return H1, H2

    Jed = set(J.edges())
    Fed = set(F.edges())
    Ded = set(D.edges())
    l=[Jed,Fed,Ded]
    lu = list(set.union(*l))
    JFD=nx.Graph()
    JFD.add_edges_from(lu)

    G=nx.Graph()  #The synthetic two-layer graph
    
    # Relabing nodes maps
    
    mappingF={}
    for i in range(2*n):
        mappingF[i]=n+i
    FF=nx.relabel_nodes(F,mappingF,copy=True)
    
    mappingD={}
    for i in range(2*n):
        if i >n-1:
            mappingD[i]=i-n
        else:
            mappingD[i]=2*n+i
    DD=nx.relabel_nodes(D,mappingD,copy=True)
    
    H1, HH2 = translation_graph(J,FF,DD)
    
    G.add_edges_from(J.edges())
    G.add_edges_from(H1.edges())
    G.add_edges_from(DD.edges())
    G.add_edges_from(HH2.edges())
    G.add_edges_from(FF.edges())

    edgeList = []
    for e in H1.edges():
        edgeList.append(e)
    for e in HH2.edges():
        edgeList.append(e)
    
    return G, J, FF, DD, JFD, edgeList

コード例 #4

0

ファイルを表示

ファイル: test.py プロジェクト: michaly/Risk_Ranking_System

def test_dim_error():
    import sys
    authority_dict={}
    graph_file = '/home/michal/SALSA_files/tmp/real_run/middle_graph_authority'
    G_new = gm.read_graph_from_file(graph_file)
    isolates = nx.isolates(G_new)
    print 'num of isolates: '+str(len(isolates)); sys.stdout.flush()
    num_of_not_isolates = G_new.number_of_nodes() - len(isolates)
    authority_dict = {}
    classes = nx.strongly_connected_component_subgraphs(G_new)
    print 'num of classes including isolates: '+str(len(classes)); sys.stdout.flush()
    #remove classes of isolated nodes:   
    classes[:] = [ c for idx,c in enumerate(classes) if c.nodes()[0] not in isolates ]
    
    print 'num of classes NOT including isolates: '+str(len(classes)); sys.stdout.flush()
    for subG in classes:
        #print type(subG)
        out_file = ''.join(['/home/michal/SALSA_files/tmp/real_run/graph_',str(classes.index(subG))])
        gm.write_graph_to_file(subG, out_file)
        tmp_d = salsa.eig_calc(subG, normalize=num_of_not_isolates)    #power_iteration(subG)
    '''    
        for k,v in tmp_d.items():
            authority_dict[G.nodes()[k]] = v
        #print power_iteration(subG, tol=1.0e-10)
    for i in isolates:
        authority_dict[G.nodes()[i]] = 0
    #print authority_dict
    print '\n--- calc_salsa_per_class took: '+str(datetime.now()-startTime); sys.stdout.flush()'''
    return

コード例 #5

0

ファイルを表示

ファイル: Degree_Betweenness_correlation.py プロジェクト: python27/NetworkControllability

def correlation_betweenness_degree_on_ErdosNetwork():
    G = nx.read_pajek("dataset/Erdos971.net")
    isolated_nodes = nx.isolates(G)
    G.remove_nodes_from(isolated_nodes)

    print nx.info(G)
    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    print "ND = ", ND
    print "ND lambda:", ND_lambda
    ND, driverNodes = ECT.get_driver_nodes(G)
    print "ND =", ND

    degrees = []
    betweenness = []
    tot_degree = nx.degree_centrality(G)
    tot_betweenness = nx.betweenness_centrality(G,weight=None)

    for node in driverNodes:
        degrees.append(tot_degree[node])
        betweenness.append(tot_betweenness[node])

    with open("results/driver_degree_Erdos.txt", "w") as f:
        for x in degrees:
            print >> f, x
    with open("results/driver_betweenness_Erdos.txt", "w") as f:
        for x in betweenness:
            print >> f, x
    with open("results/tot_degree_Erdos.txt", "w") as f:
        for key, value in tot_degree.iteritems():
            print >> f, value

    with open("results/tot_betweenness_Erdos.txt", "w") as f:
        for key, value in tot_betweenness.iteritems():
            print >> f, value

コード例 #6

0

ファイルを表示

ファイル: helper.py プロジェクト: Aurite/twitterlyzer

def reciprocated_graph(D):
	G=D.to_undirected() # copy 
	for (u,v) in D.edges(): 
		if not D.has_edge(v,u): 
			G.remove_edge(u,v)
	G.remove_nodes_from(nx.isolates(G))
	return G

コード例 #7

0

ファイルを表示

ファイル: wikipedia_scraping.py プロジェクト: brianckeegan/Wikipedia

def make_shared_user_editing_network(alter_revisions_dict,threshold):
    # Make the graph
    net = nx.DiGraph()
    for editor,revisions in alter_revisions_dict.iteritems():
        articles = [r['title'] for r in revisions]
        for num,article in enumerate(articles[:-1]):
            if net.has_edge(article,articles[num+1]):
                net[article][articles[num+1]]['weight'] += 1
            else:
                net.add_edge(article,articles[num+1],weight=1)
                
    # If edge is below threshold, remove it            
    for i,j,d in net.edges_iter(data=True):
        if d['weight'] < threshold:
            net.remove_edge(i,j)
            
    # Remove self-loops
    for i,j,d in net.edges_iter(data=True):
        if i == j:
            net.remove_edge(i,j)
    
    # Remove resulting isolates
    isolates = nx.isolates(net)
    for isolate in isolates:
        net.remove_node(isolate)
    
    return net

コード例 #8

0

ファイルを表示

ファイル: graph_analysis.py プロジェクト: kyrgyzbala/NewSystems

def graph_preprocessing_with_counts(G_input=None, save_file=None):

    if not G_input:
        graph_file = os.path.join(work_dir, "adj_graph.p")
        G = nx.read_gpickle(graph_file)
    else:
        G = G_input.copy()

    print "Raw graph size:", G.size()
    print "Raw graph nodes", G.number_of_nodes()

    profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))}

    for edge in G.edges(data=True):
        nodes = edge[:2]
        _weight = edge[2]['weight']
        _count = edge[2]['count']
        
        if _count < 3:
            G.remove_edge(*nodes)

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()

    G.remove_nodes_from(nx.isolates(G))

    print "Pre-processed graph size", G.size()
    print "Pre-processed graph nodes", G.number_of_nodes()
    
    if save_file:
        print "Saving to", save_file
        nx.write_gpickle(G,save_file)

    return G

コード例 #9

0

ファイルを表示

ファイル: pageranker.py プロジェクト: JunZhuSecurity/restingstate_bibliometrics

def getRandomPageRanks(filename):
	Ga=nx.read_graphml(sys.argv[1])

	# create a copy of the graph and extract giant component
	# get component size distribution
	cc=nx.connected_components(Ga)
	cc_dict={}
	for x in range(0,len(cc)):
		try:
			cc_dict[len(cc[x])].append(x)
		except KeyError:
			cc_dict[len(cc[x])]=[]
			cc_dict[len(cc[x])].append(x)

	isolates=nx.isolates(Ga)

	rg=nx.fast_gnp_random_graph(Ga.number_of_nodes(),2.0*Ga.number_of_edges()/(Ga.number_of_nodes()*(Ga.number_of_nodes()-1)))
	c_rg=nx.average_clustering(rg)
	rg_cc=nx.connected_component_subgraphs(rg)[0]
	rg_asp=nx.algorithms.shortest_paths.generic.average_shortest_path_length(rg_cc)

	p_rg=community.best_partition(rg_cc)
	m_rg=community.modularity(p_rg,rg_cc)

	pageranks = nx.pagerank_numpy(rg)
	return pageranks

コード例 #10

0

ファイルを表示

ファイル: wikipedia_scraping.py プロジェクト: brianckeegan/Wikipedia

def make_shared_page_editing_network(alter_revisions_dict,threshold):
    
    inverted_alter_revisions_dict = invert_alter_revisions(alter_revisions_dict)
    
    # Make the graph
    g = nx.DiGraph()
    for page,users in inverted_alter_revisions_dict.iteritems():
        user_list = users.keys()
        for num,user in enumerate(user_list[:-1]):
            next_user = user_list[num+1]
            if g.has_edge(user,next_user):
                g[user][next_user]['weight'] += 1
            else:
                g.add_edge(user,next_user,weight=1)
                
    # If edge is below threshold, remove it            
    for i,j,d in g.edges_iter(data=True):
        if d['weight'] < threshold:
            g.remove_edge(i,j)
            
    # Remove self-loops
    for i,j,d in g.edges_iter(data=True):
        if i == j:
            g.remove_edge(i,j)
    
    # Remove resulting isolates
    isolates = nx.isolates(g)
    for isolate in isolates:
        g.remove_node(isolate)
    
    return g

コード例 #11

0

ファイルを表示

ファイル: zachary_regen.py プロジェクト: drewconway/GMM

def rand_delete(G, num_nodes):
    G=nx.convert_node_labels_to_integers(G,first_label=0)
    nodes_to_delete=list(random_integers(low=0,high=len(G.nodes()),size=num_nodes))
    G.remove_nodes_from(nodes_to_delete)
    isos=nx.isolates(G)
    G.remove_nodes_from(isos)
    return(G)

コード例 #12

0

ファイルを表示

ファイル: generate_features.py プロジェクト: YuxinSun/LPBoost-Using-String-and-Fisher-Features

    def _proba(self, G):
        """
        [TO BE TESTED]
        Compute transition probabilities. Only available when feature_type is 'fisher'.
        Parameters
        -------
        :param G: DAG of Fisher features.
            Attribute 'proba_': edge attribute, float
            Transition probability that one node transfers to another.
        :return: G, DAG with edge attribute 'proba_' assigned.
        """
        for node in G.nodes():
            s = (np.sum(G[node][x]['kern_unnorm_']) for x in G.successors(node))
            s = sum(s)
            for successor_ in G.successors(node):
                if s == 0:
                    G[node][successor_]['proba_'] = 0.
                else:
                    G[node][successor_]['proba_'] = np.sum(G[node][successor_]['kern_unnorm_'])/s
                if G[node][successor_]['proba_'] < self.proba_threshold:
                    G.remove_edge(node, successor_)

        isolated_ = nx.isolates(G)
        G.remove_nodes_from(isolated_)

        return G

コード例 #13

0

ファイルを表示

ファイル: min_vertex_cover.py プロジェクト: MthwRobinson/CSE6140proj

def residual_graph(G,v):
    # Input, G, the original graph
    # v, the vertex added to the vertex cover
    # degreeQ, the priority queue with node degrees
    #    from the original graph
    # Output: G', the graph consisting of edges not
    #    convered by C and the nodes not in C

    G1 = nx.Graph()
    for node in G.nodes():
        G1.add_node(node)
    for edge in G.edges():
        G1.add_edge(edge[0],edge[1])
    
    # Remove all edges in G that are covered by v
    neighbors = G1.neighbors(v)
    for u in neighbors:
        G1.remove_edge(v,u)
    # Remove v from G
    G1.remove_node(v)
        
    # Remove isolated nodes from G (this will include v)
    isolates = nx.isolates(G1)
    for node in isolates:
        G1.remove_node(node)
    #    degreeQ.remove_node(node)  
    return G1

コード例 #14

0

ファイルを表示

ファイル: network_compute.py プロジェクト: sidh0/dbw

def whole_graph_metrics(graph, weighted=False):
    graph_metrics = {}

    # Shortest average path length
    graph_metrics['avg_shortest_path'] = \
        nx.average_shortest_path_length(graph, weight=weighted)

    # Average eccentricity
    ecc_dict = nx.eccentricity(graph)
    graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values()))

    # Average clustering coefficient
    # NOTE: Option to include or exclude zeros
    graph_metrics['avg_ccoeff'] = \
        nx.average_clustering(graph, weight=weighted, count_zeros=True)

    # Average node betweeness
    avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_node_btwn'] = \
        np.mean(np.array(avg_node_btwn_dict.values()))

    # Average edge betweeness
    avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_edge_btwn'] = \
        np.mean(np.array(avg_edge_btwn_dict.values()))

    # Number of isolates
    graph_metrics['isolates'] = len(nx.isolates(graph))

    return graph_metrics

コード例 #15

0

ファイルを表示

ファイル: helper.py プロジェクト: changwu-tw/accusation-graph

def vehicle_accusation_graph(n, p, seed=None, directed=True):
    """Return a random vehicle accusation graph G_{n,p}.
    Chooses each of the possible edges with accusation probability p.
    Parameters
    ----------
    n : int
        The number of vehicles.
    p : float
        Probability for accusation.
    seed : int, optional
        Seed for random number generator (default=None).
    directed : bool, optional (default=True)
        If True return a directed graph
    """

    if directed:
        G=nx.DiGraph()
    else:
        G=nx.Graph()
    G.add_nodes_from(range(n))
    G.name='Vehicle_accusation_graph({}, {})'.format(n, p)
    if p<=0:
        return G
    if p>=1:
        return complete_graph(n,create_using=G)

    if not seed is None:
        random.seed(seed)

    if G.is_directed():
        edges=itertools.permutations(range(n),2)
    else:
        edges=itertools.combinations(range(n),2)

    for e in edges:
        if random.random() < p:
            G.add_edge(*e)

    """
    Remove all isolates in the graph & relabel the nodes of the graph
    """
    if nx.isolates(G):
        G.remove_nodes_from(nx.isolates(G))
        mapping = dict(zip(G.nodes(), range(G.number_of_nodes())))
        G = nx.relabel_nodes(G, mapping)

    return G

コード例 #16

0

ファイルを表示

ファイル: chAs.py プロジェクト: mboudour/LIterature_Networks

def create_conn_random_graph(nodes,p):
    while  True:
        # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100)
        G=nx.erdos_renyi_graph(nodes,p)
        if nx.is_connected(G):
            break
    G.remove_nodes_from(nx.isolates(G))
    return G

コード例 #17

0

ファイルを表示

ファイル: utils.py プロジェクト: candsvincent/edgesense

def set_isolated(nodes_list, mdg):
    ts = int(datetime.now().strftime("%s"))
    dsg = extract_dpsg(mdg, ts, True)
    usg = dsg.to_undirected()
    isolated_nodes = set(nx.isolates(usg))
    for node in nodes_list:
        if node['id'] in isolated_nodes:
            node['isolated'] = True

コード例 #18

0

ファイルを表示

ファイル: utils.py プロジェクト: Wikitalia/edgesense

def set_isolated(nodes_list, mdg):
    ts = int(time.mktime(datetime.now().timetuple()))   # Windows-compatible
    dsg = extract_dpsg(mdg, ts, True)
    usg = dsg.to_undirected()
    isolated_nodes = set(nx.isolates(usg))
    for node in nodes_list:
        if node['id'] in isolated_nodes:
            node['isolated'] = True

コード例 #19

0

ファイルを表示

ファイル: utils.py プロジェクト: mboudour/GraphMultilayerity

def create_conn_random_graph(nodes,p):
    while  True:
        # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100)
        G=nx.erdos_renyi_graph(nodes,p)
        if nx.is_connected(G):
            break
    G.remove_nodes_from(nx.isolates(G))
    sstt="Erdos-Renyi Random Graph with %i nodes and probability %.02f" %(nodes,p)
    return G, sstt

コード例 #20

0

ファイルを表示

ファイル: OWNxCanvasQt.py プロジェクト: janezd/orange3-network

 def _generate_nlist():
     G = self.graph
     # TODO: imaginative, but shit. revise.
     isolates = set(nx.isolates(G))
     independent = set(nx.maximal_independent_set(G)) - isolates
     dominating = set(nx.dominating_set(G)) - independent - isolates
     rest = set(G.nodes()) - dominating - independent - isolates
     nlist = list(map(sorted, filter(None, (isolates, independent, dominating, rest))))
     return nlist

コード例 #21

0

ファイルを表示

ファイル: network.py プロジェクト: ryanbanderson/autocnet

    def island_nodes(self):
        """
        Finds single nodes that are completely disconnected from the rest of the graph

        Returns
        -------
        : list
          A list of disconnected nodes, nodes of degree zero, island nodes, etc.
        """
        return nx.isolates(self)

コード例 #22

0

ファイルを表示

ファイル: syntheticThreeLayerGraph_time.py プロジェクト: mboudour/GraphMultilayerity

def synthetic_multi_level(k,n,p=[],No_isolates=True):

    list_of_Graphs=[]
    list_of_isolates=[]
    list_of_Graphs_final=[]
    for ij in range(k):
        list_of_Graphs.append(nx.erdos_renyi_graph(n,p[ij]))
        list_of_isolates.append(nx.isolates(list_of_Graphs[ij]))

    Gagr=nx.Graph()
    for i in list_of_Graphs:
        Gagr.add_edges_from(i.edges())
        Gagr.add_nodes_from(i.nodes())

    G=nx.Graph()  #The synthetic two-layer graph
    
    # Relabing nodes maps
    mapping={}
    for i in range(k):
        mapping[i]={}
        for ij in range(n):
            mapping[i][ij]=ij+i*n

        list_of_Graphs_final.append(nx.relabel_nodes(list_of_Graphs[i],mapping[i],copy=True))

    list_of_translation_graphs=[]
    for ij in range(k-1):
        H1=nx.Graph()
        #### A small fix to pain in the ass
        g1=sorted(list_of_Graphs_final[ij].nodes())
        g2=sorted(list_of_Graphs_final[ij+1].nodes())
        #######

        for ji in range(n):

            H1.add_edge(g1[ji],g2[ji]) #a small fix

        list_of_translation_graphs.append(H1)

    luf=set()
    for i in list_of_Graphs_final:
        luf=luf.union(set(i.edges()))
    luf=list(luf)
    G.add_edges_from(luf)
    luf=set()
    for i in list_of_translation_graphs:
        luf=luf.union(set(i.edges()))
    edgeList=list(luf)
    G.add_edges_from(luf)
    nmap={}
    for i  in mapping:
        for j in mapping[i]:
            nmap[mapping[i][j]]=j

    return G, list_of_Graphs_final, Gagr, edgeList ,nmap ,mapping#F

コード例 #23

0

ファイルを表示

ファイル: utils.py プロジェクト: mboudour/GraphMultilayerity

def draw_network(G,sstt,pos={},with_edgewidth=False,withLabels=True,pernode_dict={},labfs=10,valpha=0.4,ealpha=0.4):


# GI = graph_dic[ract_dic[cnum[3]]]
# print "The number of actors in Macbeth's Act IV is", len(GI.nodes())
# print "The number of conversational relationships in Macbeth's Act IV is", len(GI.edges())

    G.remove_nodes_from(nx.isolates(G))
    # if with_weights:
    #     weights={(i[0],i[1]):i[2]['weight'] for i in G.edges(data=True) }#if all((i[0],i[1])) in G.nodes() }
    plt.figure(figsize=(12,12))
    # try:
    #     f=open('positions_of_Mc_Shake.dmp')
    #     pos_dict=pickle.load(f)
    #     pos =pos_dict[3]
    # except:
        
    #     pos=nx.spring_layout(G,scale=50)
    #     pos_dict[3]=pos
    if len(pos)==0:
        pos=nx.spring_layout(G,scale=50)


# pos=nx.spring_layout(G,scale=50)
# pos_dict[3]=pos
    # if:
    #     labels={i:v for v,i in pernode_dict.items() if i in G.nodes()}
    # else:
    #     labels={i:v for v,i in pernode_dict.items() if i in G.nodes()}
    if with_edgewidth:
        edgewidth=[]
        for (u,v,d) in G.edges(data=True):
            edgewidth.append(d['weight'])
    else:
        edgewidth=[1 for i in G.edges()]
    nx.draw_networkx_nodes(G,pos=pos,with_labels=False,alpha=0.4)
    if withLabels:
        if len(pernode_dict)>0:
            labels={i:v for v,i in pernode_dict.items() if i in G.nodes()}
            labe=nx.draw_networkx_labels(G,pos=pos,labels=labels,font_size=20)
        else:
            labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs)
    nx.draw_networkx_edges(G,pos=pos,edge_color='b',width=edgewidth, alpha=0.5)#,edge_labels=weights,label_pos=0.2)


    # pos=nx.spring_layout(G,scale=50)
    # plt.figure(figsize=(12,12))
    # nx.draw_networkx_nodes(G,pos=pos,with_labels=withLabels,alpha=valpha)
    # if withLabels:
    #     labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs)
    # # nx.draw_networkx_edges(G,pos=pos,edge_color='b',alpha=ealpha)
    plt.title(sstt,fontsize=20)
    kk=plt.axis('off')
    return pos

コード例 #24

0

ファイルを表示

ファイル: test_edgelist.py プロジェクト: yamaguchiyuto/networkx

 def test_edgelist_integers(self):
     G = nx.convert_node_labels_to_integers(self.G)
     (fd, fname) = tempfile.mkstemp()
     nx.write_edgelist(G, fname)
     H = nx.read_edgelist(fname, nodetype=int)
     # isolated nodes are not written in edgelist
     G.remove_nodes_from(list(nx.isolates(G)))
     assert_nodes_equal(list(H), list(G))
     assert_edges_equal(list(H.edges()), list(G.edges()))
     os.close(fd)
     os.unlink(fname)

コード例 #25

0

ファイルを表示

ファイル: app.py プロジェクト: msmexplorer/msmexplorer-d3

def make_json_graph(msm, request):
    c = float(request.get_argument('cutoff'))
    e = str(request.get_argument('resize'))
    t = sparse.csr_matrix(msm.transmat_.copy())
    t.data[t.data < c] = 0.0
    t.eliminate_zeros()
    G = nx.from_scipy_sparse_matrix(t, create_using=nx.DiGraph())
    metric = resize[e](G, msm, t)
    nx.set_node_attributes(G, 'size', metric)
    G.remove_nodes_from(nx.isolates(G))
    return json_graph.node_link_data(G)

コード例 #26

0

ファイルを表示

ファイル: chAs.py プロジェクト: mboudour/LIterature_Networks

def create_conn_random_graph_chrom(nodes,p,x):
    while  True:
        # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100)
        G=nx.erdos_renyi_graph(nodes,p)
        if nx.is_connected(G):
            g=Graph(G)
            cn=vertex_coloring(g, value_only=True)
            if cn==x:
                break
    G.remove_nodes_from(nx.isolates(G))
    return G

コード例 #27

0

ファイルを表示

ファイル: basic.py プロジェクト: adrianco/networkx

def color(G):
    """Returns a two-coloring of the graph.

    Raises an exception if the graph is not bipartite.

    Parameters
    ----------
    G : NetworkX graph 

    Returns
    -------
    color : dictionary
       A dictionary keyed by node with a 1 or 0 as data for each node color.

    Raises
    ------
    NetworkXError if the graph is not two-colorable.

    Examples
    --------
    >>> from networkx.algorithms import bipartite
    >>> G = nx.path_graph(4)
    >>> c = bipartite.color(G)
    >>> print(c)
    {0: 1, 1: 0, 2: 1, 3: 0}

    You can use this to set a node attribute indicating the biparite set:
    
    >>> nx.set_node_attributes(G, 'bipartite', c)
    >>> print(G.node[0]['bipartite'])
    1
    >>> print(G.node[1]['bipartite'])
    0
    """
    color = {}
    for n in G: # handle disconnected graphs
        if n in color or len(G[n])==0: # skip isolates
            continue
        queue = [n]  
        color[n] = 1 # nodes seen with color (1 or 0)
        while queue:
            v = queue.pop()
            c = 1 - color[v] # opposite color of node v
            for w in G[v]: 
                if w in color: 
                    if color[w] == color[v]:
                        raise nx.NetworkXError("Graph is not bipartite.")
                else:
                    color[w] = c
                    queue.append(w)
    # color isolates with 0
    color.update(dict.fromkeys(nx.isolates(G),0))
    return color

コード例 #28

0

ファイルを表示

ファイル: algorithm.py プロジェクト: vvanirudh/Pixel-Art

	def get_boundaries(self):
		# Remove internal edges from a copy of our pixgrid graph and just get the boundaries
		self.outlines_graph = networkx.Graph(self.grid_graph)
		for pixel, attrs in self.pixel_graph.nodes_iter(data=True):
			corners = attrs['corners']
			for neighbor in self.pixel_graph.neighbors(pixel):
				edge = corners & self.pixel_graph.node[neighbor]['corners']
				if len(edge) != 2: # If the number of edges is not 2
					print edge
				elif self.outlines_graph.has_edge(*edge): # Remove the internal edges in the outlines graph
					self.outlines_graph.remove_edge(*edge)
		for node in networkx.isolates(self.outlines_graph):
			self.outlines_graph.remove_node(node) # Remove the nodes from the outline graph too

コード例 #29

0

ファイルを表示

ファイル: depixeler.py プロジェクト: NKCSS/depixel

 def isolate_outlines(self):
     # Remove internal edges from a copy of our pixgrid graph.
     self.outlines_graph = nx.Graph(self.grid_graph)
     for pixel, attrs in self.pixel_graph.nodes_iter(data=True):
         corners = attrs['corners']
         for neighbor in self.pixel_graph.neighbors(pixel):
             edge = corners & self.pixel_graph.node[neighbor]['corners']
             if len(edge) != 2:
                 print edge
             if self.outlines_graph.has_edge(*edge):
                 self.outlines_graph.remove_edge(*edge)
     for node in nx.isolates(self.outlines_graph):
         self.outlines_graph.remove_node(node)

コード例 #30

0

ファイルを表示

ファイル: Organism.py プロジェクト: brunopace/metaevo

    def clean_met_net(self, MetNet, genes_list, food_list):
        #Verificar com mais calma se a delecao de targets nao afeta nada...
        chemis = deepcopy(MetNet)
        #sera que esse objeto vai receber as funcoes da classe MetabolicNetwork?

        for r in [x for x in MetNet.nodes() if MetNet.node[x]['Type'] == 'R']:
            if r not in [reac for reac in genes_list if reac not in food_list]:
                chemis.remove_node(r)
        #Alguns targets podem ser deletados nesse passo:
        chemis.remove_nodes_from([isol for isol in nx.isolates(chemis) if isol not in food_list])

        #chemis.remove_nodes_from([n for n in nx.isolates(chemis) if n not in food_list])????
        #Talvez tenha que verificar se food ou target molecules foram removidos?
        return chemis

コード例 #31

0

ファイルを表示

def has_isolated_nodes(G):
    """Returns if the graph `G` has isolated nodes."""
    if len(list(nx.isolates(G))) > 0:
        return True
    else:
        return False

コード例 #32

0

ファイルを表示

ファイル: Graph.py プロジェクト: Andrey-Shakhnov/Breadth-First-Search

	print("Do you want to add any edges?")
	keyadd = input("y/n? ")
	if (keyadd == "y"):
		val = int(input("How many edges need to add? "))
		for i in range(val):
			startV = int(input("Enter start vertex "))
			endV = int(input("Enter end vertex "))
			G.add_edges_from([(startV, endV)]) #Добавляем ребро от вершины startV к вершине endV.
	print("==========================================")
	
	#Кратчайшие пути: от введенной вершины до всех остальных и от нулевой до введённой
	end = 1
	eccentricity = dict()
	lens = []

	isolate = list(nx.isolates(G))
	if isolate:
		for i in isolate:
			G.add_edge(i, end)

	for i in range(n-1):
		if end in isolate:
			end+=1
		else:
			start = 0
			p = nx.shortest_path(G,source = 0, target = end)
			end+=1
			if len(p) in eccentricity:
				eccentricity[len(p)].append(p)
			else:
				eccentricity[len(p)] = []

コード例 #33

0

ファイルを表示

# To Run this script: python run_generate_adjlist_largestcomponent.py

import networkx as nx
import matplotlib.pyplot as plt
import sys
from matplotlib.legend_handler import HandlerLine2D
from matplotlib.font_manager import FontProperties

x = int(sys.argv[1])
year = []
largestcomponent = []

fh=open("../data/adjlistfile_till_year_"+str(x))
G = nx.read_adjlist(fh, create_using=nx.DiGraph())
G = G.to_undirected()
#print "Year "+str(x)+":"
#print "Number of nodes:", G.number_of_nodes()
#print "Number of isolates:", len(nx.isolates(G))
G.remove_nodes_from(nx.isolates(G))
#print "Number of nodes after removing isolates:", G.number_of_nodes()
components = sorted(nx.connected_components(G), key = len, reverse=True)
largestcomponent = G.subgraph(components[0])
year.append(x)

for line in nx.generate_adjlist(largestcomponent):
	print(line)

コード例 #34

0

ファイルを表示

ファイル: basic.py プロジェクト: Adeilsoara/LearnPython

def color(G):
    """Returns a two-coloring of the graph.

    Raises an exception if the graph is not bipartite.

    Parameters
    ----------
    G : NetworkX graph

    Returns
    -------
    color : dictionary
        A dictionary keyed by node with a 1 or 0 as data for each node color.

    Raises
    ------
    NetworkXError
        If the graph is not two-colorable.

    Examples
    --------
    >>> from networkx.algorithms import bipartite
    >>> G = nx.path_graph(4)
    >>> c = bipartite.color(G)
    >>> print(c)
    {0: 1, 1: 0, 2: 1, 3: 0}

    You can use this to set a node attribute indicating the biparite set:

    >>> nx.set_node_attributes(G, c, "bipartite")
    >>> print(G.nodes[0]["bipartite"])
    1
    >>> print(G.nodes[1]["bipartite"])
    0
    """
    if G.is_directed():
        import itertools

        def neighbors(v):
            return itertools.chain.from_iterable(
                [G.predecessors(v), G.successors(v)])

    else:
        neighbors = G.neighbors

    color = {}
    for n in G:  # handle disconnected graphs
        if n in color or len(G[n]) == 0:  # skip isolates
            continue
        queue = [n]
        color[n] = 1  # nodes seen with color (1 or 0)
        while queue:
            v = queue.pop()
            c = 1 - color[v]  # opposite color of node v
            for w in neighbors(v):
                if w in color:
                    if color[w] == color[v]:
                        raise nx.NetworkXError("Graph is not bipartite.")
                else:
                    color[w] = c
                    queue.append(w)
    # color isolates with 0
    color.update(dict.fromkeys(nx.isolates(G), 0))
    return color

コード例 #35

0

ファイルを表示

ファイル: digraph_tools.py プロジェクト: c0ntradicti0n/ScienceMap

def find_roots(G):
    dfs_tree = nx.dfs_tree(G, depth_limit=0)
    return set(list([n1 for n1, n2 in dfs_tree.edges]) + list(nx.isolates(G)))

コード例 #36

0

ファイルを表示

ファイル: ofpfinal.py プロジェクト: ritallopes/OFPFINAL

G = nx.DiGraph()
G.add_nodes_from(nodes.name)

G.add_edges_from([(s,t) for s,t in zip(edges.name_source, edges.name_target)]) # adicionando as arestas

nx.write_graphml(G, "dependencies_py.graphml")

nx.set_node_attributes(G, pd.Series(list(nodes.position.str.split(",")), index=nodes.name).to_dict(), 'pos')

print(nx.number_of_nodes(G))
print(nx.number_of_edges(G))
print(G.nodes.data())

print(nx.number_of_isolates(G)) #nós isolados
 G.remove_nodes_from(list(nx.isolates(G)))# removendo nós isolados

print(nx.number_of_nodes(G))
print(nx.number_of_edges(G))
print(G.nodes)

"""# ANÁLISES, MÉTRICAS, GRAU

##Matrizes e mais
"""

print(list(G.adj['labkit'])) # or list(G.neighbors(1))
print(nx.center(nx.Graph(G))) # ['beautifulsoup4', 'requests', 'six', 'docopt', 'docutils', 'gevent', 'pycrypto', 'distribute', 'lxml', 'argparse', 'pyyaml', 'jinja2', 'simplejson', 'mock', 'numpy', 'sphinx', 'python-dateutil', 'flake8', 'sqlalchemy', 'twisted', 'babel', 'psycopg2', 'click', 'flask', 'pillow', 'pytz', 'pep8']

"""##Densidade"""

コード例 #37

0

ファイルを表示

def load_graph(data_dir, min_num_nodes, max_num_nodes, node_labels,
               graph_labels):

    #Each file should contain the datasetname at the front of the file
    name = data_dir.split('/')[-1]

    #(node_x, node_y)
    data_adj = np.loadtxt(fname=os.path.join(data_dir,
                                             '{}_A.txt'.format(name)),
                          delimiter='|').astype(int)

    if node_labels:
        #(node_id, **info)
        data_node_label = np.loadtxt(
            fname=os.path.join(data_dir, '{}_node_labels.txt'.format(name)),
            delimiter='|',
            dtype={
                'names':
                ('node_id', 'tree_id', 'node_type', 'node_name', 'node_path'),
                'formats': ('i4', 'i4', 'S4', 'S100', 'S250')
            })

    else:
        #(node_id, graph_id)
        data_node_label = np.loadtxt(fname=os.path.join(
            data_dir, '{}_graph_indicators.txt'.format(name)),
                                     delimiter='|').astype(int)

    #(graph_id, **info)
    if graph_labels:
        data_graph_label = np.loadtxt(
            fname=os.path.join(data_dir, '{}_graph_labels.txt'.format(name)),
            delimiter='|',
            dtype={
                'names': ('tree_id', 'tree_name', 'language', 'stars',
                          'git_uri', 'last_update'),
                'formats': ('i4', 'S100', 'S100', 'i4', 'S250', 'S100')
            })
    else:
        #(graph_id)
        data_node_label = np.loadtxt(fname=os.path.join(
            data_dir, '{}_graph_labels.txt'.format(name)),
                                     delimiter=',',
                                     usecols=(0)).astype(int)

    DG = nx.DiGraph()

    # Add Edges
    data_tuple = list(map(tuple, data_adj))
    DG.add_edges_from(data_tuple)

    # Add Nodes
    node_bar = tqdm(range(data_node_label.shape[0]))

    for i in node_bar:
        #node_bar.set_description("Processing node {}".format(i))

        if node_labels:
            DG.add_node(
                data_node_label[i][0],
                label=data_node_label[i][0],
                tree_id=data_node_label[i][1],
                node_type=data_node_label[i][2],
                node_name=data_node_label[i][3],
                node_path=data_node_label[i][4],
            )
        else:
            DG.add_node(data_node_label[i][0],
                        label=data_node_label[i][0],
                        tree_id=data_node_label[i][1])

    isolates = list(nx.isolates(DG))
    selfloops = list(nx.selfloop_edges(DG))
    if len(isolates) or len(selfloops):
        print("Removing isolates ({}) and selfloops ({})".format(
            len(isolates), len(selfloops)))
        DG.remove_nodes_from(isolates)
        DG.remove_edges_from(selfloops)

    tree_id_node_list = dict()
    tree_id_lang = dict()
    for n in DG.nodes.data():
        tree_id = n[1]['tree_id']

        if tree_id not in tree_id_node_list:
            tree_id_node_list[tree_id] = []
            tree_id_lang[tree_id] = False

        tree_id_node_list[tree_id].append(n[0])
        #check if .jl extension exists
        if ext(name) in n[1]['node_name'].decode("utf-8"):
            tree_id_lang[tree_id] = True

    graphs = []
    graph_bar = tqdm(range(data_graph_label.shape[0]))
    for i in graph_bar:
        #graph_bar.set_description("Processing graph {}".format(i))

        tree_id = data_graph_label[i][0]
        #Search for nodes with same tree-id
        nodes = tree_id_node_list[tree_id]
        #Language file exist
        lang = tree_id_lang[tree_id]

        #Create sub-graph
        G_sub = DG.subgraph(nodes).copy()
        G_sub.graph['label'] = tree_id

        #lang node reduces the number of additional steps
        if graph_labels:
            G_sub.graph['tree_id'] = tree_id
            G_sub.graph['tree_name'] = data_graph_label[i][1]
            G_sub.graph['language'] = data_graph_label[i][2]
            G_sub.graph['stars'] = data_graph_label[i][3]
            G_sub.graph['git_uri'] = data_graph_label[i][4]
            G_sub.graph['last_update'] = data_graph_label[i][5]

        if G_sub.number_of_nodes() >= min_num_nodes \
          and G_sub.number_of_nodes() <= max_num_nodes \
          and lang and nx.is_arborescence(G_sub):
            graphs.append(G_sub)

            #print(G_sub.graph['tree_name'], G_sub.graph['tree_id'])

    return graphs

コード例 #38

0

ファイルを表示

def fast_consensus(G, algorithm='louvain', n_p=20, thresh=0.2, delta=0.02):
    graph = G.copy()
    L = G.number_of_edges()
    N = G.number_of_nodes()

    for u, v in graph.edges():
        graph[u][v]['weight'] = 1.0

    while (True):

        if (algorithm == 'louvain'):

            nextgraph = graph.copy()
            L = G.number_of_edges()
            for u, v in nextgraph.edges():
                nextgraph[u][v]['weight'] = 0.0

            with mp.Pool(processes=mp.cpu_count()) as pool:
                communities_all = pool.map(louvain_community_detection,
                                           get_yielded_graph(graph, n_p))

            for node, nbr in graph.edges():

                if (node, nbr) in graph.edges() or (nbr,
                                                    node) in graph.edges():
                    if graph[node][nbr]['weight'] not in (0, n_p):
                        for i in range(n_p):
                            communities = communities_all[i]
                            if communities[node] == communities[nbr]:
                                nextgraph[node][nbr]['weight'] += 1
                            else:
                                nextgraph[node][nbr]['weight'] = graph[node][
                                    nbr]['weight']

            remove_edges = []
            for u, v in nextgraph.edges():
                if nextgraph[u][v]['weight'] < thresh * n_p:
                    remove_edges.append((u, v))

            nextgraph.remove_edges_from(remove_edges)

            if check_consensus_graph(nextgraph, n_p=n_p, delta=delta):
                break

            for _ in range(L):

                node = np.random.choice(nextgraph.nodes())
                neighbors = [a[1] for a in nextgraph.edges(node)]

                if (len(neighbors) >= 2):
                    a, b = random.sample(set(neighbors), 2)

                    if not nextgraph.has_edge(a, b):
                        nextgraph.add_edge(a, b, weight=0)

                        for i in range(n_p):
                            communities = communities_all[i]

                            if communities[a] == communities[b]:
                                nextgraph[a][b]['weight'] += 1

            for node in nx.isolates(nextgraph):
                nbr, weight = sorted(graph[node].items(),
                                     key=lambda edge: edge[1]['weight'])[0]
                nextgraph.add_edge(node, nbr, weight=weight['weight'])

            graph = nextgraph.copy()

            if check_consensus_graph(nextgraph, n_p=n_p, delta=delta):
                break

        elif (algorithm in ('infomap', 'lpm')):

            nextgraph = graph.copy()

            for u, v in nextgraph.edges():
                nextgraph[u][v]['weight'] = 0.0

            if algorithm == 'infomap':
                communities = [{
                    frozenset(c)
                    for c in nx_to_igraph(
                        graph).community_infomap().as_cover()
                } for _ in range(n_p)]
            if algorithm == 'lpm':
                communities = [{
                    frozenset(c)
                    for c in nx_to_igraph(
                        graph).community_label_propagation().as_cover()
                } for _ in range(n_p)]

            for node, nbr in graph.edges():

                for i in range(n_p):
                    for c in communities[i]:
                        if node in c and nbr in c:
                            if not nextgraph.has_edge(node, nbr):
                                nextgraph.add_edge(node, nbr, weight=0)
                            nextgraph[node][nbr]['weight'] += 1

            remove_edges = []
            for u, v in nextgraph.edges():
                if nextgraph[u][v]['weight'] < thresh * n_p:
                    remove_edges.append((u, v))
            nextgraph.remove_edges_from(remove_edges)

            for _ in range(L):
                node = np.random.choice(nextgraph.nodes())
                neighbors = [a[1] for a in nextgraph.edges(node)]

                if (len(neighbors) >= 2):
                    a, b = random.sample(set(neighbors), 2)

                    if not nextgraph.has_edge(a, b):
                        nextgraph.add_edge(a, b, weight=0)

                        for i in range(n_p):
                            if a in communities[i] and b in communities[i]:
                                nextgraph[a][b]['weight'] += 1

            graph = nextgraph.copy()

            if check_consensus_graph(nextgraph, n_p=n_p, delta=delta):
                break

        elif (algorithm == 'cnm'):

            nextgraph = graph.copy()

            for u, v in nextgraph.edges():
                nextgraph[u][v]['weight'] = 0.0

            communities = []
            mapping = []
            inv_map = []

            for _ in range(n_p):

                order = list(range(N))
                random.shuffle(order)
                maps = dict(zip(range(N), order))

                mapping.append(maps)
                inv_map.append({v: k for k, v in maps.items()})
                G_c = nx.relabel_nodes(graph, mapping=maps, copy=True)
                G_igraph = nx_to_igraph(G_c)

                communities.append(
                    G_igraph.community_fastgreedy(
                        weights='weight').as_clustering())

            for i in range(n_p):

                edge_list = [(mapping[i][j], mapping[i][k])
                             for j, k in graph.edges()]

                for node, nbr in edge_list:
                    a, b = inv_map[i][node], inv_map[i][nbr]

                    if graph[a][b] not in (0, n_p):
                        for c in communities[i]:
                            if node in c and nbr in c:
                                nextgraph[a][b]['weight'] += 1

                    else:
                        nextgraph[a][b]['weight'] = graph[a][b]['weight']

            remove_edges = []
            for u, v in nextgraph.edges():
                if nextgraph[u][v]['weight'] < thresh * n_p:
                    remove_edges.append((u, v))

            nextgraph.remove_edges_from(remove_edges)

            for _ in range(L):
                node = np.random.choice(nextgraph.nodes())
                neighbors = [a[1] for a in nextgraph.edges(node)]

                if (len(neighbors) >= 2):
                    a, b = random.sample(set(neighbors), 2)
                    if not nextgraph.has_edge(a, b):
                        nextgraph.add_edge(a, b, weight=0)

                        for i in range(n_p):
                            for c in communities[i]:
                                if mapping[i][a] in c and mapping[i][b] in c:

                                    nextgraph[a][b]['weight'] += 1

            if check_consensus_graph(nextgraph, n_p, delta):
                break

        else:
            break

    if (algorithm == 'louvain'):
        with mp.Pool(processes=mp.cpu_count()) as pool:
            communities_all = pool.map(louvain_community_detection,
                                       get_yielded_graph(graph, n_p))
        return communities_all
    if algorithm == 'infomap':
        return [{
            frozenset(c)
            for c in nx_to_igraph(graph).community_infomap().as_cover()
        } for _ in range(n_p)]
    if algorithm == 'lpm':
        return [{
            frozenset(c)
            for c in nx_to_igraph(
                graph).community_label_propagation().as_cover()
        } for _ in range(n_p)]
    if algorithm == 'cnm':

        communities = []
        mapping = []
        inv_map = []

        for _ in range(n_p):
            order = list(range(N))
            random.shuffle(order)
            maps = dict(zip(range(N), order))

            mapping.append(maps)
            inv_map.append({v: k for k, v in maps.items()})
            G_c = nx.relabel_nodes(graph, mapping=maps, copy=True)
            G_igraph = nx_to_igraph(G_c)

            communities.append(
                G_igraph.community_fastgreedy(
                    weights='weight').as_clustering())

        return communities

コード例 #39

0

ファイルを表示

def drawNetwork(path1, path2, sele=None, sele1=None, sele2=None, top1=None, top2=None,
                r=1, edge_norm=None, alpha=0.5, mutations=False, align_with = None, 
                node_color=(0.6, 0.6, 0.6), edge_color1 = (0, 0, 1), palette="colorblind",
                edge_color2 = (1, 0, 0), labeling='0', norm_expected=False,
                threshold=0, topk=None, max_compo=None, mean_vp=None, strong_compo=None, 
                around=None, keep_previous=False, compo_size=None, save_cc=None, load_cc=None,
                compos_to_excel = None, force_binary_color=False, compo_radius=None, compo_diam=None,
                label_compo='', auto_patch=True, printall=False, sum=False, n_clusters=None,
                color_by_compo=False, color_by_group=False, show_top_group=None,
                name1 = None, name2 = None, name_nodes='nodes', userSelection='all',
                fromstruct=None, color_by_contact_type=False, standard_and_expected=None):
    '''
    Draws a NetworkX network on the PyMol structure
    '''

    #Initialization of labeling variables and retreieving residue XYZ positions
    if not keep_previous:
        cmd.delete('*nodes *edges Component* Group*')
        cmd.label(selection=userSelection, expression="")
        cmd.hide("licorice", "?mutations")
    # Building position -- name correspondance
    stored.posCA = []
    stored.names = []
    stored.ss = []
    userSelection = userSelection + " and ((n. CA) or n. C)"
    cmd.iterate_state(1, selector.process(userSelection), "stored.posCA.append([x,y,z])")
    cmd.iterate(userSelection, "stored.ss.append(ss)")
    cmd.iterate(userSelection, 'stored.names.append(resn+resi+chain)')
    stored.labels = list(map(relabel, stored.names))
    stored.resid = list(map(selection, stored.names))
    node2id = dict(zip(stored.labels, stored.resid))
    node2CA = dict(zip(stored.labels, stored.posCA))

    #Secondary Structure labels
    prevSS, prevChain = None, None
    counters = {'': 0, 'H': 0, 'S': 0, 'L': 0}
    node2SS = dict(zip(stored.labels, stored.ss))
    SS2nodelist = {}
    putflag = lambda X: 'U' if X in ['', 'L'] else X
    for label in node2SS:
        ss = node2SS[label]
        chain = label[-1]
        if prevChain != chain:
            for counter in counters: counters[counter] = 0
        if prevSS != ss:
            counters[ss] +=1
        labss = putflag(ss)+str(counters[ss])+':'+chain
        if labss in SS2nodelist:
            SS2nodelist[labss].append(label)
        else:
            SS2nodelist[labss] = [label]
        prevSS = ss
        prevChain = chain

    prevkey, prevChain = None, None
    order = []
    keys = list(SS2nodelist.keys())

    for key in keys:
        if prevChain != key.split(':')[-1]:
            prevkey = None
        if key[0] == 'U':
            if prevkey == None:
                newkey = 'Head:'+key.split(':')[-1]
            else:
                newkey = 'U'+prevkey
            SS2nodelist[newkey] = SS2nodelist.pop(key)
            order.append(newkey)
        else:
            order.append(key)
        prevkey = key
        prevChain = key.split(':')[-1]
    prevkey = None
    final = []
    for key in order[::-1]:
        if prevChain != key.split(':')[-1]:
            prevkey = None
        if key[0] == 'U':
            if prevkey == None:
                newkey = 'Tail:'+key.split(':')[-1]
            else:
                newkey = '{}-{}'.format(key[1:], prevkey)
            SS2nodelist[newkey] = SS2nodelist.pop(key)
            final.append(newkey)
        else:
            final.append(key)
        prevkey = key
        prevChain = key.split(':')[-1]
    # ss_dict = dict(zip(keys, final[::-1]))
    mapss = {}
    for key in final:
        newkey = key.replace('S', 'β').replace('H', 'α').replace('αead', 'Head')
        if 'IGPS' in str(label_compo):
            _ = []
            for elt in newkey.split('-'):    
                if elt.split(':')[1] in ['A', 'C', 'E']:
                    _.append('𝘧{}'.format(elt.split(':')[0]))
                elif elt.split(':')[1] in ['B', 'D', 'F']:
                    _.append('𝘩{}'.format(elt.split(':')[0]))
            newkey = '-'.join(_)
            mapss[key] = IGPS_mapping[newkey]      
        else:
            mapss[key] = newkey     

    for ss in SS2nodelist:
        for node in SS2nodelist[ss]:
            node2SS[node] = mapss[ss]


    #Loading external data
    atom_mat1, atom_mat2 = list(map(load, [path1, path2]))
    get_ext = lambda X: X.split('.')[-1]
    ext1, ext2 = list(map(get_ext, [path1, path2]))
    top1 = load(path1.split('_')[0].split('.')[0]+'.topy') if top1 == None else load(top1)
    top2 = load(path2.split('_')[0].split('.')[0]+'.topy') if top2 == None else load(top2)

    #Handling selections
    if sele != None:
        sele1, sele2 = [sele]*2
    if sele == None and sele1 == None and sele2 == None:
        sele1, sele2 = ['protein && not hydrogen']*2
        print('Default selection protein without hydrogens')
    
    sels = [sele1, sele2]

    #Creating topology matrices for each selection
    topg1, topd1 = [create_top(sel, top1, fromstruct) for sel in sels]
    topg2, topd2 = [create_top(sel, top2, fromstruct) for sel in sels]
    #From atomic to residual contacts and perturbation network computation
    mat1 = (atom_mat1 @ topd1).transpose() @ topg1
    mat2 = (atom_mat2 @ topd2).transpose() @ topg2
    #Apply expected norm if necessary
    if norm_expected:
        exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1)
        exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2)
        mat1 = divide_expected(mat1, exp1)
        mat2 = divide_expected(mat2, exp2)
        mat1, mat2 = list(map(csr_matrix, [mat1, mat2]))

    if align_with != None:
        cmd.align(align_with, userSelection, object='aln')
        raw_aln = cmd.get_raw_alignment('aln')
        cmd.hide('cgo', 'aln')
        order_string = [idx[0] for idx in raw_aln[-1]][::-1]
        trans_mat = dok_matrix(tuple([cmd.count_atoms(X) for X in order_string]))
        for idx1, idx2 in raw_aln:
            trans_mat[idx2[1]-1, idx1[1]-1] = 1
        trans_mat = csr_matrix(trans_mat)
        top_t1, top_t2 = [create_top('name CA', top) for top in [top1, top2]]
        trans_res = (trans_mat @ top_t1).transpose() @ top_t2
        mat2 = trans_res @ (mat2 @ trans_res.transpose())

    pertmat = mat2 - mat1

    pertmat.setdiag(0)
    pertmat.eliminate_zeros()
    
    net = nx.from_scipy_sparse_matrix(pertmat)

    #Creating labeling dictionnary
    if str(next(top1.residues))[-1] == '0':
        offset = 1
    else:
        offset = 0

    chain_names = [chr(ord('A') + i) for i in range(26)]

    t2o = lambda X: three2one[X] if X in three2one else X[0]
    get_chain = lambda X: chain_names[(X.chain.index % len(chain_names))]
    res2str = lambda X: t2o(X.name)+str(X.resSeq+offset)+':'+get_chain(X)
    id2label = {i: res2str(res) for i, res in enumerate(top1.residues)}
    # if 'IGPS' in label_compo:
    #     igps_label = {}
    #     for elt in id2label.items():
    #         if elt.split(':')[1] in ['A', 'C', 'E']:
    #             rerelabel[elt] = '𝘧{}'.format(elt.split(':')[0])
    #         elif elt.split(':')[1] in ['B', 'D', 'F']:
    #             rerelabel[elt] = '𝘩{}'.format(elt.split(':')[0])
    #Relabeling network
    net = nx.relabel_nodes(net, id2label)

    label2id = {res2str(res): i for i, res in enumerate(top1.residues)}



    #Auto_patching network labels
    if not all(elem in node2CA for elem in net.nodes()):
        print('PDB structure and topology labeling not matching.')
        if auto_patch:
            print('Attempting to auto-patch residue names. (this can be disabled with auto_patch=False)')
            if len(node2CA.keys()) == len(net.nodes()):
                remap = dict(zip(net.nodes(), node2CA.keys()))
                net = nx.relabel_nodes(net, remap)
                label2id = dict(zip(node2CA.keys(), range(top1.n_residues)))
            else:
                print("Auto-patching not working, please try on different PDB file")


    #Output topK if necessary
    if type(topk) == int:
        limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][topk] 
        threshold = limit_weight

    if type(standard_and_expected) == int:
        limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][standard_and_expected]
        relabel_net2 = dict(enumerate(net.nodes()))
        threshold = limit_weight


    if max_compo or mean_vp or any(np.array([compo_size, compo_diam, compo_radius, strong_compo])!= None): 
        color_by_compo = True
        if load_cc != None:
            cc = np.load(load_cc)
        else:
            cc = get_connected_components(pertmat)
            if save_cc != None:
                np.save(save_cc, cc)
        if max_compo:
            threshold = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])]
        else:
            lastmax = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])]
            print('last maximum: {}'.format(np.round(lastmax, 2)))
            net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < lastmax])
            net.remove_nodes_from(list(nx.isolates(net)))
            components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] 
            if mean_vp:
                vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list]
                threshold = np.median(vanishing_points)
            elif compo_size !=None:
                robust = [list(c.nodes()) for c in components_list if len(c.edges())>=float(compo_size)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif compo_diam !=None:
                robust = [list(c.nodes()) for c in components_list if nx.diameter(c)>=float(compo_diam)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif compo_radius !=None:
                robust = [list(c.nodes()) for c in components_list if nx.radius(c)>=float(compo_radius)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif strong_compo !=None:
                vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list]
                edges_len = [len(c.edges()) for c in components_list]
                percentile = float(strong_compo)*len(components_list)/100
                vani_ranks = len(vanishing_points)+1-rankdata(vanishing_points, method='max')
                size_ranks = len(edges_len)+1-rankdata(edges_len, method='max')
                vani_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if vani_ranks[i]<percentile]
                size_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if size_ranks[i]<percentile]
                vani_nodes = [x for vani_nodes in list(vani_nodes) for x in vani_nodes]
                size_nodes = [x for size_nodes in list(size_nodes) for x in size_nodes]
                strong = list(set(vani_nodes) & set(size_nodes))
                net = net.subgraph(strong)


   #Detect mutations
    if mutations:
        cmd.show_as(representation="cartoon", selection="?mutations")
        cmd.color(color="grey80", selection="?mutations")
        cmd.delete("?mutations")
        mutations_list = []
        y = {j: res2str(res) for j, res in enumerate(top2.residues)}
        for resid in id2label:
            if resid in y:
                if id2label[resid] != y[resid]:
                    mutations_list.append((resid, (y[resid][0]+':').join(id2label[resid].split(':'))))
                    cmd.select("mutations", 'resi '+str(id2label[resid].split(':')[0][1:])+ ' and chain '+id2label[resid][-1], merge=1)
            else:
                print('Deletion of ', id2label[resid])
        print('List of mutations: ', ', '.join([elt[1] for elt in mutations_list]))
        cmd.show_as(representation="licorice", selection="?mutations")
        cmd.color(color="magenta", selection="?mutations")


    #Apply threshold
    if threshold !=0:
        print('Applying threshold {}'.format(threshold))
        net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < threshold])
        net.remove_nodes_from(list(nx.isolates(net)))

    #Induced perturbation network if needed

    if around !=None:
        net = net.subgraph(nx.node_connected_component(net, around))

    #Setting Pymol parameters
    cmd.set('auto_zoom', 0)
    cmd.set("cgo_sphere_quality", 4)
    if len(net.edges()) == 0:    
        raise ValueError('Computations give empty network')

    #Norm edges
    if edge_norm == None:
        edge_norm = max([net.edges()[(u, v)]['weight'] for u, v in net.edges()])/r

    elif edge_norm == True:
        tot_atoms_in_sel = np.sum([np.sum(elt) for elt in [topd1, topd2, topg1, topg2]])
        tot_atoms = np.sum([max(elt.shape) for elt in [topd1, topd2, topg1, topg2]])
        norm_fact = tot_atoms_in_sel**2/tot_atoms**2
        edge_norm = norm_fact*30
        print('Global normalization factor: {}'.format(1/norm_fact))


    #Function to name edges
    def name_edges(name, path):
        if name == None:
            return '.'.join(basename(path).split('.')[:-1])
        return name

    if type(standard_and_expected) == int:
        exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1)
        exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2)
        mat1 = divide_expected(mat1, exp1)
        mat2 = divide_expected(mat2, exp2)
        mat1, mat2 = list(map(csr_matrix, [mat1, mat2]))
        net2 = nx.from_scipy_sparse_matrix(mat2-mat1)
        net2 = nx.relabel_nodes(net2, relabel_net2)
        limit_weight = np.sort([abs(net2.edges[(u, v)]['weight']) for u, v in net2.edges])[::-1][standard_and_expected] 
        net2.remove_edges_from([(u, v) for u, v in net2.edges() if abs(net2[u][v]['weight']) < limit_weight])
        net2.remove_nodes_from(list(nx.isolates(net2)))
        colors = [(1, 1, 0), (0, 1, 1), (1, 0, 1)]
        objs_inboth = []
        objs_instd = []
        objs_inexp = []
        nodes = []
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            if (u, v) in list(net2.edges()):
                objs_inboth += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[0], *colors[0]]
            else:
                objs_instd += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[1], *colors[1]]
            nodes += [u, v]
        edge_norm2 = max([net2.edges()[(u, v)]['weight'] for u, v in net2.edges()])/r
        for u, v in net2.edges():
            radius = net2[u][v]['weight']/edge_norm2
            if (u, v) not in list(net.edges()):
                objs_inexp += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[2], *colors[2]]
            nodes += [u, v]

        nodelist = set(nodes)
        objs_nodes = [COLOR, *node_color]
        for u in nodelist:
                x, y, z = node2CA[u]
                objs_nodes += [SPHERE, x, y, z, r]
        selnodes = ''.join([node2id[u] for u in nodelist])[4:]
        cmd.load_cgo(objs_inboth, 'in_both_edges') 
        cmd.load_cgo(objs_instd, 'in_std_edges')
        cmd.load_cgo(objs_inexp, 'in_exp_edges')
        cmd.load_cgo(objs_nodes, 'nodes') 



    elif color_by_contact_type:
        expected_matrices = get_expected_type(atom_mat1, atom_mat2, top1, top2, fromstruct)
        name1, name2 = list(map(name_edges, [name1, name2], [path1, path2]))
        names = ['{0}_{1}'.format(name1, sel) for sel in ['hydro', 'polar', 'mixed']] + ['{0}_{1}'.format(name2, sel) for sel in ['hydro', 'polar', 'mixed']]
        nodes_dict = {i: [] for i in range(len(names))}
        objs_dict = {i: [] for i in range(len(names))}
        colors = [(1, 0.86, 0.73), (0.68, 0.85, 0.90), (0.60, 0.98, 0.60), (1, 0.86, 0), (0.25, 0.41, 0.88), (0, 0.50, 0)]
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            id_u, id_v = label2id[u], label2id[v]
            values = list(map(lambda _mat: _mat[id_v, id_u], expected_matrices))
            type_of_contact = np.argmax(values)
            objs_dict[type_of_contact] += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[type_of_contact], *colors[type_of_contact]]
            nodes_dict[type_of_contact] += [u, v]
        selnodes = ''
        for toc in nodes_dict:
            nodelist = set(nodes_dict[toc])
            objs_dict[toc]+=[COLOR, *node_color]
            for u in nodelist:
                x, y, z = node2CA[u]
                objs_dict[toc]+=[SPHERE, x, y, z, r]
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]

        for i, name in zip(objs_dict.keys(), names):
            cmd.load_cgo(objs_dict[i], '{}_edges'.format(name))         
    
    #Coloring by components
    elif color_by_compo:
        components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)]
        diameters = [nx.diameter(c) for c in components_list]
        ranking = np.argsort(diameters)[::-1]
        colors = sns.color_palette(palette, n_colors=len(components_list)+1)
        for i, c in enumerate(colors):
            if c[0] == c[1] == c[2]:
                print(c)
                colors.pop(i)
                break
        selnodes = ''
        for i, rank in enumerate(ranking):
            color, compo = colors[rank], components_list[rank]
            _obj, nodelist = [], []
            for u, v in compo.edges():
                radius = net[u][v]['weight']/edge_norm
                if abs(net[u][v]['weight']) >= threshold:
                    if not force_binary_color:
                        _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *color, *color]
                    else:
                        if net[u][v]['weight'] <= 0:
                            _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                        else:
                            _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                    nodelist += [u, v]
#            cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1))
            _obj+=[COLOR, *node_color]
            nodelist = set(nodelist)
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]
            for u in nodelist:
                x, y, z = node2CA[u]
                _obj+=[SPHERE, x, y, z, r]
            cmd.load_cgo(_obj, 'Component{}'.format(i+1)) 

    #Color by group of relevance  
    elif color_by_group:
        weights = np.array([abs(net[u][v]['weight']) for u, v in net.edges()]).reshape(-1, 1)
        birch = Birch(n_clusters=n_clusters).fit(weights)
        labels = birch.predict(weights)
        ordered_labels = labels[np.argsort(pertmat.data)]
        _, idx = np.unique(ordered_labels, return_index=True)
        mapping = dict(zip(ordered_labels[np.sort(idx)], np.sort(np.unique(ordered_labels))))
        i2color =  dict(zip(ordered_labels[np.sort(idx)], sns.color_palette(palette, len(np.unique(ordered_labels)))[::-1]))
        selnodes = ''
        if show_top_group == None:
            show_top_group = len(mapping.keys())
        
        for j, i in enumerate(list(mapping.keys())[:show_top_group]):
            _obj, nodelist = [], []
            _net = net.copy()
            to_remove_edges = [(u, v) for j, (u, v) in enumerate(net.edges()) if labels[j] != i]
            _net.remove_edges_from(to_remove_edges)
            _net.remove_nodes_from(list(nx.isolates(_net)))
            for u, v in _net.edges():
                radius = net[u][v]['weight']/edge_norm
                if abs(net[u][v]['weight']) >= threshold:
                    _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *i2color[j], *i2color[j]]
                    nodelist += [u, v]
#            cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1))
            _obj+=[COLOR, *node_color]
            nodelist = set(nodelist)
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]
            for u in nodelist:
                x, y, z = node2CA[u]
                _obj+=[SPHERE, x, y, z, r]
            cmd.load_cgo(_obj, 'Group{}'.format(j+1)) 

    #Default edge coloring   
    else:
        obj1, obj2, nodelist = [], [], []
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            if abs(net[u][v]['weight']) >= threshold:
                if 'color' in net[u][v]: 
                    if net[u][v]['color'] == 'r':
                        obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                    else:
                        obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                else:
                    if net[u][v]['weight'] <= 0:
                        obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                    else:
                        obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                nodelist+=[u, v]
        name1, name2 = map(name_edges, [name1, name2], [path1, path2])
        cmd.load_cgo(obj1, name1+'_edges')
        cmd.load_cgo(obj2, name2+'_edges')

        #Drawing nodes 
        obj=[COLOR, *node_color]
        nodelist = set(nodelist)
        selnodes = ''.join([node2id[u] for u in nodelist])[4:]
        for u in nodelist:
            x, y, z = node2CA[u]
            obj+=[SPHERE, x, y, z, r]

        cmd.load_cgo(obj, name_nodes)


    #Creating text for labeling components
    if label_compo != '' or compos_to_excel !=None:
        if compos_to_excel != None:
            rows_list = []
        objtxt = []
        axes = -np.array(cmd.get_view()[:9]).reshape(3,3)
        components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)]
        diameters = [nx.diameter(c) for c in components_list]
        for i, j in enumerate(np.argsort(diameters)[::-1]):
            row_dict = {}
            c = components_list[j]
            sses = sorted(list(set([node2SS[node] for node in c])))
            if compos_to_excel !=None:
                row_dict['Secondary structure elements'] = ','.join(sses)
                row_dict['Vanishing point'] = np.max([abs(net[u][v]['weight']) for u, v in c.edges()])
                row_dict['Diameter'] = nx.diameter(c)
                row_dict['Size'] = len(c.edges())
                row_dict['Size rank'] = i+1

            else:
                print('Component {}\n'.format(i+1), ', '.join(sses))
                print('Size (number of edges) {}'.format(len(c.edges())))
                print('Vanishing point: {}'.format(np.max([abs(net[u][v]['weight']) for u, v in c.edges()])))
            if 'h' in str(label_compo):
                methods = ['eigenvector', 'hits_hub', 'hits_authority', 'pagerank', 'betweenness', 'katz']
                hubs = [get_hubs(c, method) for method in methods]
                if compos_to_excel !=None:
                    row_dict.update(dict(zip(methods, hubs)))
                else:
                    print(dict(zip(methods, hubs)))
            if 'c' in str(label_compo):
                pos = np.array(node2CA[next(c.__iter__())]) + (axes[0])
                cyl_text(objtxt, plain, pos, 'Component {}'.format(i+1), radius=0.1, color=[0, 0, 0], axes=axes)
            if compos_to_excel:
                rows_list.append(row_dict)
        if compos_to_excel:
            df = pd.DataFrame(rows_list)
            df.to_excel(compos_to_excel)
        if 's' in str(label_compo):
            for ss in SS2nodelist:
                nodelist = SS2nodelist[ss] 
                print(mapss[ss], ': ', ('{}--{}'.format(nodelist[0], nodelist[-1]) if len(nodelist)>1 else nodelist[0]))

#        print(objtxt)
        cmd.set("cgo_line_radius", 0.03)
        cmd.load_cgo(objtxt, 'txt')

    #labeling
    if labeling==1:
        cmd.label(selection=selnodes, expression="t2o(resn)+resi")
    if labeling==3:
        cmd.label(selection=selnodes, expression="resn+resi")

    #Summing
    if sum:
        print('Sum of contacts lost: ', np.sum(pertmat))

    if printall:
        print([(u,v, net[u][v]) for u, v in net.edges()])

コード例 #40

0

ファイルを表示

ファイル: gph_uno_bipartite.py プロジェクト: wdong5/psi4

def _enumMaximumMatchingIter(g, match, all_matches, add_e=None):
    """Recurively search maximum matchings.

    Parameters
    ----------
    g : 
        Undirected bipartite graph. Nodes are separated by their
        'bipartite' attribute.
    match : 
        List of edges forming one maximum matching of `g`.
    all_matches : 
	    List, each is a list of edges forming a maximum matching of `g`.
	    Newly found matchings will be appended into this list.
    add_e : tuple, optional
        Edge used to form subproblems. If not `None`, will be added to each
        newly found matchings.

    Returns
    -------
    list
        Updated list of all maximum matchings.

    Author
    ------
    guangzhi XU ([email protected]; [email protected])
    Update time: 2017-05-21 20:09:06.

    """
    #---------------Form directed graph D---------------
    d = _formDirected(g, match)

    #-----------------Find cycles in D-----------------
    cycles = list(nx.simple_cycles(d))

    if len(cycles) == 0:

        #---------If no cycle, find a feasible path---------
        all_uncovered = set(g.node).difference(set([ii[0] for ii in match]))
        all_uncovered = all_uncovered.difference(set([ii[1] for ii in match]))
        all_uncovered = list(all_uncovered)

        #--------------If no path, terminiate--------------
        if len(all_uncovered) == 0:
            return all_matches

        #----------Find a length 2 feasible path----------
        idx = 0
        uncovered = all_uncovered[idx]
        while True:

            if uncovered not in nx.isolates(g):
                paths = nx.single_source_shortest_path(d, uncovered, cutoff=2)
                len2paths = [vv for kk, vv in paths.items() if len(vv) == 3]

                if len(len2paths) > 0:
                    reversed = False
                    break

                #----------------Try reversed path----------------
                paths_rev = nx.single_source_shortest_path(d.reverse(),
                                                           uncovered,
                                                           cutoff=2)
                len2paths = [
                    vv for kk, vv in paths_rev.items() if len(vv) == 3
                ]

                if len(len2paths) > 0:
                    reversed = True
                    break

            idx += 1
            if idx > len(all_uncovered) - 1:
                return all_matches

            uncovered = all_uncovered[idx]

        #-------------Create a new matching M'-------------
        len2path = len2paths[0]
        if reversed:
            len2path = len2path[::-1]
        len2path = list(zip(len2path[:-1], len2path[1:]))

        new_match = []
        for ee in d.edges():
            if ee in len2path:
                if g.node[ee[1]]['bipartite'] == 0:
                    new_match.append((ee[1], ee[0]))
            else:
                if g.node[ee[0]]['bipartite'] == 0:
                    new_match.append(ee)

        if add_e is not None:
            for ii in add_e:
                new_match.append(ii)

        all_matches.append(new_match)

        #---------------------Select e---------------------
        e = set(len2path).difference(set(match))
        e = list(e)[0]

        #-----------------Form subproblems-----------------
        g_plus = g.copy()
        g_minus = g.copy()
        g_plus.remove_node(e[0])
        g_plus.remove_node(e[1])

        g_minus.remove_edge(e[0], e[1])

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = _enumMaximumMatchingIter(g_minus, match, all_matches,
                                               add_e)
        all_matches = _enumMaximumMatchingIter(g_plus, new_match, all_matches,
                                               add_e_new)

    else:
        #----------------Find a cycle in D----------------
        cycle = cycles[0]
        cycle.append(cycle[0])
        cycle = list(zip(cycle[:-1], cycle[1:]))

        #-------------Create a new matching M'-------------
        new_match = []
        for ee in d.edges():
            if ee in cycle:
                if g.node[ee[1]]['bipartite'] == 0:
                    new_match.append((ee[1], ee[0]))
            else:
                if g.node[ee[0]]['bipartite'] == 0:
                    new_match.append(ee)

        if add_e is not None:
            for ii in add_e:
                new_match.append(ii)

        all_matches.append(new_match)

        #-----------------Choose an edge E-----------------
        e = set(match).intersection(set(cycle))
        e = list(e)[0]

        #-----------------Form subproblems-----------------
        g_plus = g.copy()
        g_minus = g.copy()
        g_plus.remove_node(e[0])
        g_plus.remove_node(e[1])
        g_minus.remove_edge(e[0], e[1])

        add_e_new = [
            e,
        ]
        if add_e is not None:
            add_e_new.extend(add_e)

        all_matches = _enumMaximumMatchingIter(g_minus, new_match, all_matches,
                                               add_e)
        all_matches = _enumMaximumMatchingIter(g_plus, match, all_matches,
                                               add_e_new)

    return all_matches

コード例 #41

0

ファイルを表示

ファイル: physarum_metrics.py プロジェクト: diegoabt/Img2net

def fault_tolerance(G,
                    elem_type,
                    removing_flag,
                    removal_percentage,
                    weight_flag='weight',
                    rseed=0,
                    deg=None,
                    bn=None):

    rng = np.random.RandomState(seed=rseed)

    dg_flag = False

    if deg is not None:
        dg_flag = True
        #print('Not None')

    bn_flag = False
    if bn is not None:
        bn_flag = True

    G_copy = G.copy()
    #print('number of nodes',len(G_copy.nodes))

    # defining the 'bucket' (i.e. set of options to remove)
    if elem_type == 'node':
        bucket = list(G.nodes())
    elif elem_type == 'edge':
        bucket = list(G.edges())
    else:
        print('elem_type not defined!')

    bucket = np.array(bucket)
    N = len(bucket)

    # defining the number of removals (a % of the things in the bucket)

    if removal_percentage > 1:

        rem_bound = removal_percentage

    else:

        rem_bound = int(removal_percentage * N)

    if rem_bound == 0:  # for very small percentages

        rem_bound += 1

    No = len(G.nodes())

    if No - rem_bound != 0:

        # removing by cases:

        ## random: we take 'rem_bound' elements from the bucket

        if removing_flag == 'random':

            idx_to_be_removed = rng.choice(len(bucket), rem_bound)

            to_be_removed = bucket[idx_to_be_removed]

        ## targeted: we remove the 'most important' elements

        elif removing_flag == 'targeted':

            if elem_type == 'node':  # we remove the nodes with highest degree

                if 'betweenness' in weight_flag or 'degree' in weight_flag:

                    if not bn_flag:
                        bn = nx.betweenness_centrality(G)
                    if not dg_flag:
                        deg = nx.degree_centrality(G)

                    deg_sorted_rem_bound = {
                        key: deg[key]
                        for key in sorted(deg, key=deg.get, reverse=True)
                        [:rem_bound]
                    }

                    bn_sorted_rem_bound = {
                        key: bn[key]
                        for key in sorted(bn, key=bn.get, reverse=True)
                        [:rem_bound]
                    }

                    #print(deg_sorted_rem_bound)

                    if weight_flag == 'degree':

                        to_be_removed = list(deg_sorted_rem_bound.keys())

                        #print('different-value elements:',len(set(deg.values())),'/',len(deg.keys()))

                    elif weight_flag == 'degree+betweenness':

                        # getting the min degree of the candidates (why? so we dont get these rem_bound amount of nodes sorted just by labels)

                        min_deg = min(deg_sorted_rem_bound.values())

                        #print('len before adding the equal-degree ones',len(deg_sorted_rem_bound),'of',rem_bound)

                        equal_degree = {
                            node: deg[node]
                            for node in deg.keys() if deg[node] == min_deg
                        }

                        top_sorted_without_equal_degree = [
                            node for node in deg_sorted_rem_bound.keys()
                            if node not in equal_degree.keys()
                        ]

                        equal_degree_sorted_by_bn = [
                            key for key in sorted(bn, key=bn.get, reverse=True)
                            [:rem_bound - len(top_sorted_without_equal_degree)]
                        ]  #the reamining rem_bound - len (taken) nodes

                        to_be_removed = top_sorted_without_equal_degree + equal_degree_sorted_by_bn

                    elif weight_flag == 'betweenness':

                        to_be_removed = list(bn_sorted_rem_bound.keys())

                        #print('different-value elements:',len(bn.values()),'/',len(bn.keys()))

                    else:

                        print('weight_flag not defined')

                elif 'clustering' in weight_flag:

                    cl = nx.clustering(G)

                    cl_sorted_rem_bound = {
                        key: cl[key]
                        for key in sorted(cl, key=cl.get, reverse=True)
                        [:rem_bound]
                    }

                    to_be_removed = list(cl_sorted_rem_bound.keys())

                    #print('different-value elements:',len(cl.values()),'/',len(cl.keys()))

                else:
                    print('weight_flag not defined')

                #to_be_removed = rem

            elif elem_type == 'edge':  # we remove the edges with largest weight (eihter 'weight' or 'length')

                weights = {edge: G.edges[edge][weight_flag] for edge in bucket}

                weights_sorted_rem_bound = {
                    key: weights[key]
                    for key in sorted(weights, key=weights.get, reverse=True)
                    [:rem_bound]
                }

                to_be_removed = list(weights_sorted_rem_bound.keys())

        else:

            print('removing_flag not defined!')

        #print('tbr',to_be_removed)

        if elem_type == 'node':

            G_copy.remove_nodes_from(to_be_removed)

        elif elem_type == 'edge':

            G_copy.remove_edges_from(to_be_removed)

        else:
            print('wrong input')

        G_copy.remove_nodes_from(list(nx.isolates(G_copy)))

        N1 = len(G_copy.nodes())

        try:
            largest_cc = max(nx.connected_components(G_copy),
                             key=len)  # this is a list of nodes
        except:
            print('graph is too small')
            largest_cc = []
            N1 = 1
        # if elem_type == 'edge': #using the same removal_percentage given for the edges causes inconsistencies

        # 	removal_percentage = (No-len(G_copy.nodes()))/No
        # 	print('No',No, 'removed',No-len(G_copy.nodes()), '%',removal_percentage)
        # 	print(removal_percentage)

        # denominator: the remaining number of NODES after removing

        # denom =int((1-removal_percentage)*No)
        denom = N1

        #print('number of elements removed:',len(to_be_removed))

        # debugging:

        cc = list(nx.connected_components(G_copy))
        len_cc = [len(c) for c in cc]

        #assert No >= sum(len_cc) + round(removal_percentage*No) - 1

        gcc = len(largest_cc) / denom

        #print('lcc',len(largest_cc),'den',denom)

    else:
        print('no nodes left.')
        gcc = 0

    #print('gcc',gcc)

    return gcc, G_copy

コード例 #42

0

ファイルを表示

ファイル: core.py プロジェクト: Adeilsoara/LearnPython

def k_truss(G, k):
    """Returns the k-truss of `G`.

    The k-truss is the maximal induced subgraph of `G` which contains at least
    three vertices where every edge is incident to at least `k-2` triangles.

    Parameters
    ----------
    G : NetworkX graph
      An undirected graph
    k : int
      The order of the truss

    Returns
    -------
    H : NetworkX graph
      The k-truss subgraph

    Raises
    ------
    NetworkXError

      The k-truss is not defined for graphs with self loops or parallel edges
      or directed graphs.

    Notes
    -----
    A k-clique is a (k-2)-truss and a k-truss is a (k+1)-core.

    Not implemented for digraphs or graphs with parallel edges or self loops.

    Graph, node, and edge attributes are copied to the subgraph.

    K-trusses were originally defined in [2] which states that the k-truss
    is the maximal induced subgraph where each edge belongs to at least
    `k-2` triangles. A more recent paper, [1], uses a slightly different
    definition requiring that each edge belong to at least `k` triangles.
    This implementation uses the original definition of `k-2` triangles.

    References
    ----------
    .. [1] Bounds and Algorithms for k-truss. Paul Burkhardt, Vance Faber,
       David G. Harris, 2018. https://arxiv.org/abs/1806.05523v2
    .. [2] Trusses: Cohesive Subgraphs for Social Network Analysis. Jonathan
       Cohen, 2005.
    """
    H = G.copy()

    n_dropped = 1
    while n_dropped > 0:
        n_dropped = 0
        to_drop = []
        seen = set()
        for u in H:
            nbrs_u = set(H[u])
            seen.add(u)
            new_nbrs = [v for v in nbrs_u if v not in seen]
            for v in new_nbrs:
                if len(nbrs_u & set(H[v])) < (k - 2):
                    to_drop.append((u, v))
        H.remove_edges_from(to_drop)
        n_dropped = len(to_drop)
        H.remove_nodes_from(list(nx.isolates(H)))

    return H

コード例 #43

0

ファイルを表示

ファイル: read_DNC.py プロジェクト: zhh0998/NetEmb-Datasets

    tmp = df['time'][0]  # time is in ascending order
    for i in range(len(df['time'])):
        if tmp == df['time'][i]:  # if is in current day
            g.add_edge(str(df['from'][i]), str(df['to'][i]))
            if i == len(df['time']) - 1:  # EOF ---
                cnt_graphs += 1
                # graphs.append(g.copy())  # ignore the last day
                print('processed graphs ', cnt_graphs, '/', all_days,
                      'ALL done......\n')
        elif tmp < df['time'][i]:  # if goes to next day
            cnt_graphs += 1
            if (cnt_graphs // gap) >= (
                    all_days // gap - 70
            ) and cnt_graphs % gap == 0:  # the last 50 graphs 'and' the gap
                g.remove_edges_from(g.selfloop_edges())
                g.remove_nodes_from(list(nx.isolates(g)))
                graphs.append(g.copy(
                ))  # append previous g; for a part of graphs to reduce ROM
                # g = nx.Graph()            # reset graph, based on the real-world application
            if cnt_graphs % 50 == 0:
                print('processed graphs ', cnt_graphs, '/', all_days)
            tmp = df['time'][i]
            g.add_edge(str(df['from'][i]), str(df['to'][i]))
        else:
            print(
                'ERROR -- EXIT -- please double check if time is in ascending order!'
            )
            exit(0)

    # --- take out and save part of graphs ----
    print('total graphs: ', len(graphs))

コード例 #44

0

ファイルを表示

def network_generate():
    import numpy as np
    import math as math
    import networkx as nx
    from scipy.stats import bernoulli

    ### We may have real world networks or synthetic graphs following the DCSBM

    ## Choice of the network to generate
    #network='real_world'   ## Choose either 'real_world' or 'DCSBM'
    network = 'DCSBM'

    if network == 'real_world':
        ##Get the adjacency matrix
        #Given the network, find the corresponding adjacency matrix
        scenario = 'dolphins.gml'
        G = nx.read_gml(scenario)
        G_0 = G

        ##Remove nodes without neighbors
        isolated_nodes = nx.isolates(G)
        G.remove_nodes_from(isolated_nodes)

        ## Adjacency matrix
        A = nx.adjacency_matrix(G, nodelist=None, weight=None)
        A = A.todense()

        ##New number of instances after removing isolated nodes
        n = len(A[:, 1])

        ## Ground truth
        ground_truth = np.zeros((n, 1))
        for i in range(int(0), int(n)):
            if (0 in G_0.nodes()):
                ground_truth[i] = G_0.node[i]['value']
            else:
                ground_truth[i] = G_0.node[i + 1]['value']

        ## Remove the label of the node with no neighbor
        np.delete(ground_truth, isolated_nodes)
        ##Check wether the first element of the ground truth start from 0 or from 1
        if (min(ground_truth) == 0):
            startGround_truth = 0
        else:
            startGround_truth = 1

    if network == 'DCSBM':
        n_init = 1000
        ##Class proportions
        cs = [0.25, 0.25, 0.5]
        ## Number of classes
        K = len(cs)
        ## Number of instances per class
        ns = np.array(cs) * n_init

        ##Setting of the model parameters
        ## Average connectivities q's
        #bs = [0.25,0.75]
        bs = [0.75, 0.25]
        q1 = 0.4
        q2 = 0.8
        q = np.repeat(
            np.array([q1, q2]),
            [int(n_init * bs[0]), int(n_init * bs[1])])

        # Choice of affinity matrix M
        #M = 10 * (-1 * np.ones(K) + 2 * np.identity(K))
        M = 5 * np.identity(K)
        # Construction of C
        C = np.ones((int(K), int(K))) + M / math.sqrt(n_init)

        ##Extenxion of C in an nxn bloc matrix
        large_C = np.zeros((int(n_init), int(n_init)))
        for i in range(int(0), int(K)):
            for j in range(int(0), int(K)):
                large_C[int(np.sum(ns[int(0):i])):int(np.sum(ns[int(0):i +
                                                                1])),
                        int(np.sum(ns[int(0):j])):int(np.sum(ns[int(0):j + 1])
                                                      )] = C[i, j] * np.ones(
                                                          (ns[i], ns[j]))

        ## Construction of matrix of DCSBM edge probabilities
        P = np.minimum(((np.diag(q)).dot(large_C)).dot(np.diag(q)),
                       np.ones((n_init, n_init)))
        ## Generation adjacency matrix A
        A = np.zeros((int(n_init), int(n_init)))
        for i in range(int(0), int(n_init)):
            #A[i,]=np.random.binomial(1,P[i,], size=n)
            A[i, ] = bernoulli.rvs(P[i, :], size=n_init)

        ## Ground_truth
        ground_truth = np.zeros((n_init, 1))
        for i in range(int(0), int(K)):
            ground_truth[int(np.sum(ns[int(0):i])):int(np.sum(ns[int(0):i + 1])
                                                       )] = i * np.ones(
                                                           (ns[i], 1))

        ##Construct graph from adjacency matrix
        G = nx.from_numpy_matrix(A)
        ##Remove nodes without neighbors
        isolated_nodes = nx.isolates(G)
        G.remove_nodes_from(isolated_nodes)
        ## Remove the label of the node with no neighbor
        np.delete(ground_truth, isolated_nodes)

        A = nx.adjacency_matrix(G, nodelist=None, weight=None)
        A = A.todense()

        ## Symmetrization of the adjacency matrix in order to have an undirected unweighted graph
        A = np.triu(A) + np.transpose(np.triu(A))
        startGround_truth = 0

    return A, ground_truth, startGround_truth

コード例 #45

0

ファイルを表示

 def get_num_isolates(self):
     """ return the number of isolated nodes """
     return len(list(nx.isolates(self.G)))

コード例 #46

0

ファイルを表示

            final_mat.iloc[i, j] = 0

print(final_mat)
for column in CONFIG.column_names:
    final_mat[column] = np.where(np.abs(final_mat[column]) < .5, 0, 1)

# Save final binary adjacency matrix
final_mat.to_csv("results/final_adjacency_matrix.csv", index=True)

# Draw the DAG
final_DAG = from_numpy_matrix(final_mat.to_numpy(), create_using=nx.DiGraph)
final_DAG = nx.relabel_nodes(
    final_DAG,
    dict(zip(list(range(CONFIG.data_variable_size)), CONFIG.column_names)))

final_DAG.remove_nodes_from(list(nx.isolates(final_DAG)))

nx.draw(
    final_DAG,
    node_color="lightcoral",
    node_size=75,
    font_size=3,
    width=0.5,
    arrowsize=4,
    with_labels=True,
    pos=nx.spring_layout(final_DAG),
)
plt.draw()
plt.savefig(os.path.expanduser("results/DAG_plot_alarm.png"),
            format="PNG",
            dpi=500)

コード例 #47

0

ファイルを表示

def graph_load_batch(data_dir,
                     min_num_nodes=20,
                     max_num_nodes=1000,
                     name='ENZYMES',
                     node_attributes=True,
                     graph_labels=True):
  '''
    load many graphs, e.g. enzymes
    :return: a list of graphs
    '''
  print('Loading graph dataset: ' + str(name))
  G = nx.Graph()
  # load data
  path = os.path.join(data_dir, name)
  data_adj = np.loadtxt(
      os.path.join(path, '{}_A.txt'.format(name)), delimiter=',').astype(int)
  if node_attributes:
    data_node_att = np.loadtxt(
        os.path.join(path, '{}_node_attributes.txt'.format(name)),
        delimiter=',')
  data_node_label = np.loadtxt(
      os.path.join(path, '{}_node_labels.txt'.format(name)),
      delimiter=',').astype(int)
  data_graph_indicator = np.loadtxt(
      os.path.join(path, '{}_graph_indicator.txt'.format(name)),
      delimiter=',').astype(int)
  if graph_labels:
    data_graph_labels = np.loadtxt(
        os.path.join(path, '{}_graph_labels.txt'.format(name)),
        delimiter=',').astype(int)

  data_tuple = list(map(tuple, data_adj))
  # print(len(data_tuple))
  # print(data_tuple[0])

  # add edges
  G.add_edges_from(data_tuple)
  # add node attributes
  for i in range(data_node_label.shape[0]):
    if node_attributes:
      G.add_node(i + 1, feature=data_node_att[i])
    G.add_node(i + 1, label=data_node_label[i])
  G.remove_nodes_from(list(nx.isolates(G)))

  # remove self-loop
  G.remove_edges_from(nx.selfloop_edges(G))

  # print(G.number_of_nodes())
  # print(G.number_of_edges())

  # split into graphs
  graph_num = data_graph_indicator.max()
  node_list = np.arange(data_graph_indicator.shape[0]) + 1
  graphs = []
  max_nodes = 0
  for i in range(graph_num):
    # find the nodes for each graph
    nodes = node_list[data_graph_indicator == i + 1]
    G_sub = G.subgraph(nodes)
    if graph_labels:
      G_sub.graph['label'] = data_graph_labels[i]
    # print('nodes', G_sub.number_of_nodes())
    # print('edges', G_sub.number_of_edges())
    # print('label', G_sub.graph)
    if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes(
    ) <= max_num_nodes:
      graphs.append(G_sub)
      if G_sub.number_of_nodes() > max_nodes:
        max_nodes = G_sub.number_of_nodes()
      # print(G_sub.number_of_nodes(), 'i', i)
      # print('Graph dataset name: {}, total graph num: {}'.format(name, len(graphs)))
      # logging.warning('Graphs loaded, total num: {}'.format(len(graphs)))
  print('Loaded')
  return graphs

コード例 #48

0

ファイルを表示

ファイル: network.py プロジェクト: resourcesbookvisual/code

    nx.draw_networkx_labels(net,pos=pos,
                            labels={user:nodeLabel},
                            font_size=sizeLabel**8)

plt.legend(markerscale=1, loc="best")
plt.show()




#%%

# to indirected
unet=net.to_undirected(reciprocal=True)
# removing isolates
unet.remove_nodes_from(list(nx.isolates(unet)))


#%%

# bring algorithm
from cdlib import algorithms

# Find the communities
modCommunity = algorithms.greedy_modularity(unet).communities


#%%
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import rgb2hex

コード例 #49

0

ファイルを表示

 def _prune_nodes(self, network_graph):
     """ Remove all nonzero nodes if threshold == 0 """
     nodes_to_remove = list(nx.isolates(network_graph))
     network_graph.remove_nodes_from(nodes_to_remove)
     return network_graph

コード例 #50

0

ファイルを表示

'''
print 'Number of nodes (before preprocessing) = %d' % len(G.nodes())
print 'Number of edges (before preprocessing) = %d' % len(G.edges())
deg_list = nx.degree(G, G.nodes()).values()
avg_degree = sum(deg_list)/float(len(deg_list))
print 'Average degree (before preprocessing) = %f' % avg_degree

# self loops removal

print 'Number of self-loops = %d' % len(G.selfloop_edges())
G.remove_edges_from(G.selfloop_edges())
print 'Self-loops removed!'

# isolated nodes removal

print 'Number of isolated nodes = %d' % len(nx.isolates(G))
G.remove_nodes_from(nx.isolates(G))
print 'Isolated nodes removed!'

# multiple parallel edges test

no_of_nodes_with_multiP_edges = np.count_nonzero(
        nx.adjacency_matrix(G).data > 1)/2
if (no_of_nodes_with_multiP_edges == 0):
    print 'No multiple parallel edges in the graph.'
else:
    print 'Multiple parallel edges found in the graph!'
    print 'Number of nodes with multiple parallel edges = %d' % no_of_nodes_with_multiP_edges

# summary of the graph (after preprocessing)

コード例 #51

0

ファイルを表示

ファイル: mit_experiment.py プロジェクト: thejat/dynamic-network-growth-models


df, df_label = processed_data()

all_userids = set(df['user_id'].unique()).union(
    set(df['dest_user_id_if_known'].unique()))

userids_to_newids, newids_to_userids = get_contiguous_ids(
    all_userids, df_label)  #for relabling

GT0 = construct_graph_sequence(df, userids_to_newids, newids_to_userids)

#Remove the last three months because they have lots of isolates
isolated = []
for G in GT0:
    isolated.append([x for x in nx.isolates(G)])
# print([len(x) for x in isolated])
isolated = isolated[:6]
# print([len(x) for x in isolated])

#Remove nodes that are isolated in at least one snapshot
beta_nodes = set()
for x in isolated:
    beta_nodes = set().union(beta_nodes, x)
print('beta_nodes', beta_nodes)
# print('len(beta_nodes)',len(beta_nodes))

beta_userids = [newids_to_userids[x] for x in beta_nodes]
all_userids_filtered = [x for x in all_userids if x not in beta_userids]
userids_to_newids_filtered, newids_to_userids_filtered = get_contiguous_ids(
    all_userids_filtered, df_label)  #for relabling

コード例 #52

0

ファイルを表示

ファイル: graph_reading.py プロジェクト: tzw28/GraphEmbedding

def read_cites_contents_graph(graph_name):
    G = nx.Graph()
    node_class = {}
    cite_file = "data/{}.cites".format(graph_name)
    content_file = "data/{}.content".format(graph_name)
    feature_dict = {}
    with open(content_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            strs = line.split("\t")
            node = strs[0]
            G.add_node(node)
            paper_class = strs[-1].strip()
            node_class[node] = paper_class
            # G.nodes[node]["class"] = paper_class
            features = strs[1:-1]
            for i, feat in enumerate(features):
                # continue
                if feat == "0":
                    continue
                feat_name = "feat{}".format(i)
                if feat_name not in feature_dict.keys():
                    feature_dict[feat_name] = 1
                else:
                    feature_dict[feat_name] += 1
                G.nodes[node][feat_name] = 1
    with open(cite_file, "r") as f:
        lines = f.readlines()
        for line in lines:
            strs = line.split("\t")
            s = strs[0]
            t = strs[1].strip()
            if s not in node_class.keys():
                continue
            if t not in node_class.keys():
                continue
            G.add_edge(s, t)
    self_edge = []
    for n, nbrs in G.adjacency():
        for nbr in nbrs.keys():
            if n == nbr:
                self_edge.append(n)
    for s in self_edge:
        G.remove_edge(s, s)
        # print("remove {} to {}".format(s, s))
    G.remove_nodes_from(list(nx.isolates(G)))
    sorted_items = sorted(feature_dict.items(), key=lambda d: (d[1]), reverse=True)
    key_features = [feat for feat, num in sorted_items]
    for node in list(G.nodes(data=True)):
        node_name = node[0]
        attr_dict = node[1]
        for feat in list(attr_dict.keys()):
            if feat in key_features:
                continue
            else:
                G.nodes[node_name].pop(feat)
    # G = largest_connected_subgraph(G)
    print("Read {}, {} nodes, {} edges.".format(
        graph_name,
        len(list(G.nodes)),
        len(list(G.edges))
    ))
    class_idx = {}
    count = 0
    for node, clas in node_class.items():
        if clas in class_idx.keys():
            continue
        class_idx[clas] = count
        count += 1
    for node in node_class.keys():
        node_class[node] = class_idx[node_class[node]]
    return G, node_class

コード例 #53

0

ファイルを表示

ファイル: filtering.py プロジェクト: diegoabt/Img2net

def filtering(
    Gpe,
    sources=None,
    sinks=None,
    beta_d=1.5,
    threshold=1e-3,
    tdens0=None,
    BPweights="tdens",
    stopping_threshold_f=1e-3,
    weight_flag="unit",
    rhs=None,
    MaxNumIter=100,
    verbose=False,
):

    inputs = {}

    if sources is None and sinks is None and rhs is None:

        raise ValueError(
            "Either rhs or sources/sinks need to be passed as inputs.")

    ### relabeling

    # todo: add an if for the case in which nodes are already relabeled

    mapping = {}
    k = -1
    for node in Gpe.nodes():
        k += 1
        mapping[node] = k
    Gpe_rel = nx.relabel_nodes(Gpe, mapping, copy=True)

    edges = Gpe_rel.edges()
    nedges = len(edges)
    nodes = Gpe_rel.nodes()
    nnodes = len(nodes)

    # tdens0

    if tdens0 != None:
        try:
            tdens0 = np.array([(Gpe_rel.edges[edge]["tdens"])
                               for edge in edges])
        except:
            tdens0 = np.array([(Gpe_rel.edges[edge]["flux"])
                               for edge in edges])

    # topol

    topol = np.zeros((nedges, 2))
    k = -1
    for edge in edges:
        k += 1
        topol[k, :] = edge

    # weight (uniform)

    weight = np.empty(nedges, dtype=object)

    k = -1
    for edge in edges:
        k += 1
        if weight_flag == "unit":
            weight[k] = 1
        elif weight_flag == "length":
            weight[k] = distance.euclidean(Gpe_rel.nodes[edge[0]]["pos"],
                                           Gpe_rel.nodes[edge[1]]["pos"])
        else:
            weight[k] = Gpe_rel.edges[edge][weight_flag]

    # rhs (f+ and f-)

    if (
            sinks is not None and sources is not None
    ):  # there are lists from the sources and sinks are going to be chosen.
        # (else) if this is not pass, then the rhs is passed.

        rhs = np.zeros(nnodes)
        sources_rel = [mapping[node] for node in sources]
        sinks_rel = [mapping[node] for node in sinks]

        number_sources = len(sources_rel)
        number_sinks = len(sinks_rel)

        for node in nodes:
            if node in sources_rel:
                rhs[node] = 1 / number_sources
            elif node in sinks_rel:
                rhs[node] = -1 / number_sinks
            else:
                rhs[node] = 0
    else:
        sources_rel = [i for i in range(len(rhs)) if rhs[i] > 0]
        sinks_rel = [i for i in range(len(rhs)) if rhs[i] < 0]

    assert sum(rhs) < 0.01
    assert len(rhs) == nnodes
    # init and set controls
    ctrl = Dmkcontrols.DmkCtrl()
    Dmkcontrols.get_from_file(ctrl, root + "/nextrout_core/dmk_discr.ctrl")
    # if and where save data
    ctrl.id_save_dat = 1
    ctrl.fn_tdens = "tdens.dat"
    ctrl.fn_pot = "pot.dat"
    ctrl.max_time_iterations = MaxNumIter
    # if and where save log
    ctrl.id_save_statistics = 1
    ctrl.fn_statistics = "dmk.log"
    # if print info
    #
    if verbose:
        ctrl.info_state = 3
        ctrl.info_update = 3
        print(ctrl.outer_solver_approach)
    else:
        ctrl.info_state = 0
        ctrl.info_update = 0

    [info, tdens, pot, flux, timefun] = dmk_graph.dmk_graph(
        topol,
        rhs,
        pflux=beta_d,
        tdens0=tdens0,
        tolerance=stopping_threshold_f,
        weight=weight,
        ctrl=ctrl,
    )

    tdens = list(tdens)
    flux = list(flux)

    if (info == 0) and verbose:
        print("Convergence achieved")

    max_flux = max(flux)
    max_tdens = max(tdens)
    Gf = nx.Graph()
    ed_count = -1
    weights_in_Gf = []
    for edge in Gpe_rel.edges():
        ed_count += 1
        if BPweights == "flux":
            if abs(flux[ed_count]) > max_flux * threshold:
                Gf.add_edge(*edge, flux=flux[ed_count])
                weights_in_Gf.append(flux[ed_count])

        elif BPweights == "tdens":
            if abs(tdens[ed_count]) > max_tdens * threshold:
                Gf.add_edge(*edge, tdens=tdens[ed_count])
                weights_in_Gf.append(tdens[ed_count])

        else:
            raise ValueError("BPweights flag not defined!.")
        try:
            Gf.add_node(
                edge[0], weight=Gpe_rel.nodes[edge[0]]["tdens"]
            )  # todo: this needs to be fixed once the flux is working again (BPweights)
            Gf.add_node(edge[1], weight=Gpe_rel.nodes[edge[1]]["tdens"])
        except:
            pass

    Gf.remove_nodes_from(list(nx.isolates(Gf)))

    weights_in_Gf = np.array(weights_in_Gf)
    colors = []

    for node in Gf.nodes():

        Gf.nodes[node]["pos"] = Gpe_rel.nodes[node]["pos"]

        if node in sources_rel:
            colors.append("g")
        elif node in sinks_rel:
            colors.append("r")
        else:
            colors.append("k")

    inputs["topol"] = topol
    inputs["rhs"] = rhs
    inputs["pflux"] = beta_d
    inputs["tdens0"] = tdens0

    return Gf, weights_in_Gf, colors, inputs

コード例 #54

0

ファイルを表示

ファイル: distances.py プロジェクト: project-renard-survey/gargantext

def clusterByDistances( cooc_matrix
               , field1=None, field2=None
               , distance=None):
    '''
    clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight})
    '''

    # implicit global session

    authorized = ['conditional', 'distributional', 'cosine']
    if distance not in authorized:
        raise ValueError("Distance must be in %s" % str(authorized))

    matrix = defaultdict(lambda : defaultdict(float))
    ids    = defaultdict(lambda : defaultdict(int))
    labels = dict()
    weight = dict()

    for cooc in cooc_matrix.items:
        ngram1_id = cooc[0]
        ngram2_id = cooc[1]
        ccweight = cooc_matrix.items[cooc]

        matrix[ngram1_id][ngram2_id] = ccweight
        matrix[ngram2_id][ngram1_id] = ccweight

        ids[ngram1_id] = (field1, ngram1_id)
        ids[ngram2_id] = (field2, ngram2_id)

        weight[ngram1_id] = weight.get(ngram1_id, 0) + ccweight
        weight[ngram2_id] = weight.get(ngram2_id, 0) + ccweight

    x = pd.DataFrame(matrix).fillna(0)

    if distance == 'conditional':
        x = x / x.sum(axis=1)
        #y = y / y.sum(axis=0)

        xs = x.sum(axis=1) - x
        ys = x.sum(axis=0) - x

        # top inclus ou exclus
        n = ( xs + ys) / (2 * (x.shape[0] - 1))
        # top generic or specific
        m = ( xs - ys) / (2 * (x.shape[0] - 1))

        n = n.sort_index(inplace=False)
        m = m.sort_index(inplace=False)

        nodes_included = 10000 #int(round(size/20,0))
        #nodes_excluded = int(round(size/10,0))

        nodes_specific = 10000 #int(round(size/10,0))
        #nodes_generic = int(round(size/10,0))

        # TODO use the included score for the node size
        n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
        # Generic:
        #m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
        # Specific:
        m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
        #m_index = pd.Index.intersection(x.index, n.index[:nodes_included])

        x_index = pd.Index.union(n_index, m_index)
        xx = x[list(x_index)].T[list(x_index)]

        # Removing unconnected nodes
        xxx = xx.values
        threshold = min(xxx.max(axis=1))
        matrix_filtered = np.where(xxx >= threshold, xxx, 0)
        #matrix_filtered = matrix_filtered.resize((90,90))

        G = nx.from_numpy_matrix(np.matrix(matrix_filtered))
        G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)])))

    elif distance == 'cosine':
        scd = defaultdict(lambda : defaultdict(int))

        for i in matrix.keys():
            for j in matrix.keys():
                numerator = sum(
                                [
                                matrix[i][k] * matrix[j][k]
                                    for k in matrix.keys()
                                    if i != j and k != i and k != j
                                ]
                            )

                denominator  = sqrt(
                                    sum([
                                    matrix[i][k]
                                        for k in matrix.keys()
                                        if k != i and k != j #and matrix[i][k] > 0
                                       ])
                                    *
                                    sum([
                                    matrix[i][k]
                                        for k in matrix.keys()
                                        if k != i and k != j #and matrix[i][k] > 0
                                       ])

                               )

                try:
                    scd[i][j] = numerator / denominator
                except Exception as error:
                    scd[i][j] = 0

        minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()])

        G = nx.DiGraph()
        G.add_edges_from(
                          [
                            (i, j, {'weight': scd[i][j]})
                                for i in scd.keys() for j in scd.keys()
                                if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i]
                          ]
                        )



    elif distance == 'distributional':
        mi = defaultdict(lambda : defaultdict(int))
        total_cooc = x.sum().sum()

        for i in matrix.keys():
            si = sum([matrix[i][j] for j in matrix[i].keys() if i != j])
            for j in matrix[i].keys():
                sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k])
                if i!=j :
                    mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) )

        r = defaultdict(lambda : defaultdict(int))

        for i in matrix.keys():
            for j in matrix.keys():
                sumMin = sum(
                                [
                                min(mi[i][k], mi[j][k])
                                    for k in matrix.keys()
                                    if i != j and k != i and k != j and mi[i][k] > 0
                                ]
                            )

                sumMi  = sum(
                                [
                                mi[i][k]
                                    for k in matrix.keys()
                                    if k != i and k != j and mi[i][k] > 0
                                ]
                            )

                try:
                    r[i][j] = sumMin / sumMi
                except Exception as error:
                    r[i][j] = 0

        # Need to filter the weak links, automatic threshold here
        minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()])

        G = nx.DiGraph()
        G.add_edges_from(
                          [
                            (i, j, {'weight': r[i][j]})
                                for i in r.keys() for j in r.keys()
                                if i != j and r[i][j] > minmax and r[i][j] > r[j][i]
                          ]
                        )

#        degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1]
#        nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)]
#        G.remove_nodes_from(nodes_to_remove)

    # Removing too connected nodes (find automatic way to do it)
    #edges_to_remove = [ e for e in G.edges_iter() if

    #   nodes_to_remove = [n for n in degree if degree[n] <= 1]
    #   G.remove_nodes_from(nodes_to_remove)



    def getWeight(item):
        return item[1]
#
#    node_degree = sorted(G.degree().items(), key=getWeight, reverse=True)
#    #print(node_degree)
#    nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]]
#
#    for n in nodes_too_connected:
#        n_edges = list()
#        for v in nx.neighbors(G,n):
#            #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight'])
#            n_edges.append(((n, v), G[n][v]['weight']))
#
#        n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True)
#        #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]])
#        #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]])
#        G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]])

    G.remove_nodes_from(nx.isolates(G))
    partition = best_partition(G.to_undirected())

    return(G,partition,ids,weight)

コード例 #55

0

ファイルを表示

def preprocessing():
    '''removing isolated nodes'''
    isolated = nx.isolates(G)
    for j in isolated:
        for key in nodes_data.keys():
            if j == key:
                del nodes_data[key]
                G.remove_node(j)
                #print G.number_of_nodes(), len(nodes_data)
                continue
    for k in edges_data.keys():
        '''deleting all edges from edges_data which no more exist'''
        if not G.has_edge(edges_data[k][1], edges_data[k][2], key=k):
            #print len(edges_data)
            del edges_data[k]
    gen = []
    indegree = G.in_degree(G.nodes())
    '''number of nodes with no in_degree'''
    '''removing joints,merge nodes categorised as generators from graph and node data'''
    t = 0
    for i in indegree:
        if indegree[i] == 0:
            if nodes_data[i][3] == 'merge' or nodes_data[i][3] == 'joint':
                continue
            gen.append(i)
    for node, nd in nodes_data.iteritems():
        '''adding plants to generators'''
        if nd[3] == 'plant' and nd[0] not in gen:
            gen.append(node)
    voltages = {}
    z = 0
    for aa, val in edges_data.iteritems():
        '''adding edge attributes'''
        z += 1
        #print z
        if edges_data[aa][3].find(';') != -1:
            xx = map(float, edges_data[aa][3].split(';'))
            edges_data[aa][3] = max(xx)
            del xx[:]
        elif edges_data[aa][3] == '':
            edges_data[aa][3] = np.nan
        else:
            edges_data[aa][3] = float(edges_data[aa][3])
        if val[4].find(';') != -1:
            xy = map(float, val[4].split(';'))
            val[4] = sum(xy)
            del xy[:]
        else:
            val[4] = float(val[4])
        voltages[(val[1], val[2], val[0])] = val[3]
    nx.set_edge_attributes(G, 'voltages', voltages)
    distr = distributers()
    for f in gen:
        if f in distr:
            distr.remove(f)
            #G.remove_edges_from(G.in_edges(f))
        if node in gen:
            nodes_data[node][3] = 'generators'
        elif node in distr:
            nodes_data[node][3] = 'distributors'
        else:
            nodes_data[node][3] = 'transmitters'
    for ab, v in nodes_data.iteritems():
        '''adding nodes attributes'''
        if v[4].find(';') != -1:
            ss = map(float, v[4].split(';'))
            if v[3] == 'generator' or v[3] == 'distributor':
                v[4] = min(ss)
            else:
                v[4] = max(ss)
            del ss[:]
        elif v[4] == '':
            v[4] = np.nan
        else:
            v[4] = float(v[4])
    '''removing multi edges'''
    for x in G.nodes():
        for y in G.nodes():
            if x == y:
                continue
            if G.number_of_edges(x, y) > 1:
                kkk = []
                for ee in list(G.edges(x, keys=True)):
                    if ee[0] == x and ee[1] == y:
                        kkk.append(ee[2])
                for l in range(1, len(kkk)):
                    edges_data[kkk[0]][4] += edges_data[kkk[l]][4]
                    edges_data[kkk[0]][3] = max(edges_data[kkk[0]][3],
                                                edges_data[kkk[l]][3])
                    del edges_data[kkk[l]]
                    G.remove_edge(x, y, key=kkk[l])
                G[x][y][kkk[0]]['cable'] = edges_data[kkk[0]][4]
                G[x][y][kkk[0]]['voltages'] = edges_data[kkk[0]][3]
    #return (G,nodes_data,edges_data)
    o = []
    with open('../../NS_project/Data/Vertices_new.csv', 'wb') as csvfile:
        nodewriter = csv.writer(csvfile, delimiter=',')
        header = ['v_id', 'lon', 'lat', 'typ', 'voltage']
        nodewriter.writerow(header)
        for da, it in nodes_data.iteritems():
            for l in range(0, 5):
                o.append(str(it[l]))
            nodewriter.writerow(o)
            del o[:]
    with open('../../NS_project/Data/Edges_new.csv', 'wb') as csvfile1:
        edgewriter = csv.writer(csvfile1, delimiter=',')
        header1 = ['l_id', 'v_id_1', 'v_id_2', 'voltage', 'cables', 'length_m']
        edgewriter.writerow(header1)
        for db, itt in edges_data.iteritems():
            for l in range(0, 5):
                o.append(str(itt[l]))
            o.append(str(itt[10]))
            edgewriter.writerow(o)
            del o[:]
    return G

コード例 #56

0

ファイルを表示

ファイル: get_edges_mods.py プロジェクト: gaius-gracchus/subreddit_mods

for ( source, target ), weight in c.items( ):
  edge = ( source, target, { 'weight' : weight } )
  edge_list.append( edge )

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

G = nx.Graph( )
G.add_edges_from( edge_list )

nx.write_gexf( G, OUTPUT_GEXF_FULL )

weights = nx.get_edge_attributes( G, 'weight' )
edges_to_remove = [ k for k, v in weights.items( ) if v < WEIGHT_THRESHOLD ]

G.remove_edges_from( edges_to_remove )
G.remove_nodes_from( list( nx.isolates( G ) ) )

nodes_to_remove = [ ]
for node in G:
  if len( list( nx.neighbors( G, node ) ) ) < NEIGHBOR_THRESHOLD:
    nodes_to_remove.append( node )

G.remove_nodes_from( nodes_to_remove )
G.remove_nodes_from( BOT_LIST )
G.remove_nodes_from( list( nx.isolates( G ) ) )

#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

nx.write_gexf( G, OUTPUT_GEXF_FILTERED )

###############################################################################

コード例 #57

0

ファイルを表示

def drop_entities(triples_file,
                  train_size=0.8,
                  valid_size=0.1,
                  test_size=0.1,
                  seed=0,
                  types_file=None):
    """Drop entities from a graph, to create training, validation and test
    splits.
    Entities are dropped so that no disconnected nodes are left in the training
    graph. Dropped entities are distributed between disjoint validation
    and test sets.
    """
    splits_sum = train_size + valid_size + test_size
    if splits_sum < 0 or splits_sum > 1:
        raise ValueError('Sum of split sizes must be between greater than 0'
                         ' and less than or equal to 1.')

    use_types = types_file is not None
    if use_types:
        type2entities = read_entity_types(types_file)
        types = list(type2entities.keys())

    random.seed(seed)

    graph = nx.MultiDiGraph()
    triples, rel_counts = parse_triples(triples_file)
    graph.add_weighted_edges_from(triples)
    original_num_edges = graph.number_of_edges()
    original_num_nodes = graph.number_of_nodes()

    print(f'Loaded graph with {graph.number_of_nodes():,} entities '
          f'and {graph.number_of_edges():,} edges')

    dropped_entities = []
    dropped_edges = dict()
    num_to_drop = int(original_num_nodes * (1 - train_size))
    num_val = int(original_num_nodes * valid_size)
    num_test = int(original_num_nodes * test_size)

    print(f'Removing {num_to_drop:,} entities...')
    progress = tqdm(total=num_to_drop, file=sys.stdout)
    while len(dropped_entities) < num_to_drop:
        if use_types:
            # Sample an entity with probability proportional to its type count
            # (minus 1 to keep at least one entity of any type)
            weights = [len(type2entities[t]) - 1 for t in types]
            rand_type = random.choices(types, weights, k=1)[0]
            rand_ent = random.choice(list(type2entities[rand_type]))
        else:
            # Sample an entity uniformly at random
            rand_ent = random.choice(list(graph.nodes))

        removed_tuple = get_safely_removed_edges(graph, rand_ent, rel_counts)

        if removed_tuple is not None:
            removed_edges, removed_counts = removed_tuple
            dropped_edges[rand_ent] = removed_edges
            graph.remove_node(rand_ent)
            dropped_entities.append(rand_ent)
            rel_counts.subtract(removed_counts)

            if use_types:
                type2entities[rand_type].remove(rand_ent)

            progress.update(1)

    progress.close()

    # Are there indeed no disconnected nodes?
    assert len(list(nx.isolates(graph))) == 0

    # Did we keep track of removed edges correctly?
    num_removed_edges = sum(map(len, dropped_edges.values()))
    assert num_removed_edges + graph.number_of_edges() == original_num_edges

    # Test entities MUST come from first slice! This guarantees that
    # validation entities don't have edges with them (because nodes were
    # removed in sequence)
    test_ents = set(dropped_entities[:num_test])
    val_ents = set(dropped_entities[num_test:num_test + num_val])
    train_ents = set(graph.nodes())

    # Check that entity sets are disjoint
    assert len(train_ents.intersection(val_ents)) == 0
    assert len(train_ents.intersection(test_ents)) == 0
    assert len(val_ents.intersection(test_ents)) == 0

    # Check that validation graph does not contain test entities
    val_graph = nx.MultiDiGraph()
    val_edges = []
    for entity in val_ents:
        val_edges += dropped_edges[entity]
    val_graph.add_weighted_edges_from(val_edges)
    assert len(set(val_graph.nodes()).intersection(test_ents)) == 0

    names = ('train', 'dev', 'test')

    dirname = osp.dirname(triples_file)
    prefix = 'ind-'

    for entity_set, set_name in zip((train_ents, val_ents, test_ents), names):
        # Save file with entities for set
        with open(osp.join(dirname, f'{set_name}-ents.txt'), 'w') as file:
            file.writelines('\n'.join(entity_set))

        if set_name == 'train':
            # Triples for train split are saved later
            continue

        # Save file with triples for entities in set
        with open(osp.join(dirname, f'{prefix}{set_name}.tsv'), 'w') as file:
            for entity in entity_set:
                triples = dropped_edges[entity]
                for head, tail, rel in triples:
                    file.write(f'{head}\t{rel}\t{tail}\n')

    with open(osp.join(dirname, f'{prefix}train.tsv'), 'w') as train_file:
        for head, tail, rel in graph.edges(data=True):
            train_file.write(f'{head}\t{rel["weight"]}\t{tail}\n')

    print(f'Dropped {len(val_ents):,} entities for validation'
          f' and {len(test_ents):,} for test.')
    print(f'{graph.number_of_nodes():,} entities are left for training.')
    print(f'Saved output files to {dirname}/')

コード例 #58

0

ファイルを表示

def smith_waterman_filter(graph, flanking_reads, params):

    fasta_filename = params['fasta_filename']
    paf_filename = params['paf_filename']
    window_size = params['sw_window_size']
    fasta_dict = get_fasta_dict(fasta_filename)
    paf_dict = get_paf_dict(paf_filename)

    score_threshold = compute_sw_threshold(flanking_reads, paf_dict,
                                           fasta_dict, window_size)

    # Generate scores dictionary
    scores = {}
    num_good_scores = 0
    num_bad_scores = 0
    edges_to_remove = set()
    for query, target in nx.edges(graph):

        # Get overlap info from the paf dictionary
        if str(query + target) in paf_dict:
            # get the info
            overlap_info = paf_dict[query + target]
        elif str(target + query) in paf_dict:
            # get info and swap them
            overlap_info = paf_dict[target + query]
            query, target = target, query
        else:
            overlap_info = None

        query_start = overlap_info['query_start']
        query_end = overlap_info['query_end']
        target_start = overlap_info['target_start']
        target_end = overlap_info['target_end']

        query_seq = fasta_dict[query][query_start:query_end]
        target_seq = fasta_dict[target][target_start:target_end]

        # Align the sequences using the rolling method
        bad_score = False
        min_len = min(len(query_seq), len(target_seq))

        # Get scores for this pair; store in cur_scores
        cur_scores = []
        if window_size:
            # Use rolling window
            for start, end in utils.pairwise(range(0, min_len, window_size)):
                qs = query_seq[start:end]
                ts = target_seq[start:end]
                score = smith_waterman.smith_waterman(qs, ts)
                cur_scores.append(score)
        else:
            # No rolling window
            score = smith_waterman.smith_waterman(query_seq, target_seq)
            cur_scores = [score]

        # Save data to scores dictionary
        # Sometimes the scores dictionary is never used
        # Other times it's extremely useful for plotting data
        scores[str(query + target)] = cur_scores

        # Analyze scores
        score = max(cur_scores)
        if score < score_threshold:
            num_good_scores += 1
        else:
            num_bad_scores += 1
            edges_to_remove.add((query, target))

    # remove edges and isolated nodes
    graph.remove_edges_from(list(edges_to_remove))
    isolates = list(nx.isolates(graph))
    graph.remove_nodes_from(isolates)

    # the histogram of the data
    plt.subplot(2, 3, 3)
    all_scores = list(utils.flatten(list(scores.values())))
    plt.hist(all_scores)
    plt.title(
        "histogram of num_gaps / len(aligned_sequence)\n{} bad_scores {} good_scores\nthreshold = {}\nwindow_size = {}"
        .format(num_bad_scores, num_good_scores, score_threshold, window_size))
    return graph

コード例 #59

0

ファイルを表示

ファイル: core.py プロジェクト: Adeilsoara/LearnPython

def onion_layers(G):
    """Returns the layer of each vertex in an onion decomposition of the graph.

    The onion decomposition refines the k-core decomposition by providing
    information on the internal organization of each k-shell. It is usually
    used alongside the `core numbers`.

    Parameters
    ----------
    G : NetworkX graph
        A simple graph without self loops or parallel edges

    Returns
    -------
    od_layers : dictionary
        A dictionary keyed by vertex to the onion layer. The layers are
        contiguous integers starting at 1.

    Raises
    ------
    NetworkXError
        The onion decomposition is not implemented for graphs with self loops
        or parallel edges or for directed graphs.

    Notes
    -----
    Not implemented for graphs with parallel edges or self loops.

    Not implemented for directed graphs.

    See Also
    --------
    core_number

    References
    ----------
    .. [1] Multi-scale structure and topological anomaly detection via a new
       network statistic: The onion decomposition
       L. Hébert-Dufresne, J. A. Grochow, and A. Allard
       Scientific Reports 6, 31708 (2016)
       http://doi.org/10.1038/srep31708
    .. [2] Percolation and the effective structure of complex networks
       A. Allard and L. Hébert-Dufresne
       Physical Review X 9, 011023 (2019)
       http://doi.org/10.1103/PhysRevX.9.011023
    """
    if nx.number_of_selfloops(G) > 0:
        msg = ("Input graph contains self loops which is not permitted; "
               "Consider using G.remove_edges_from(nx.selfloop_edges(G)).")
        raise NetworkXError(msg)
    # Dictionaries to register the k-core/onion decompositions.
    od_layers = {}
    # Adjacency list
    neighbors = {v: list(nx.all_neighbors(G, v)) for v in G}
    # Effective degree of nodes.
    degrees = dict(G.degree())
    # Performs the onion decomposition.
    current_core = 1
    current_layer = 1
    # Sets vertices of degree 0 to layer 1, if any.
    isolated_nodes = [v for v in nx.isolates(G)]
    if len(isolated_nodes) > 0:
        for v in isolated_nodes:
            od_layers[v] = current_layer
            degrees.pop(v)
        current_layer = 2
    # Finds the layer for the remaining nodes.
    while len(degrees) > 0:
        # Sets the order for looking at nodes.
        nodes = sorted(degrees, key=degrees.get)
        # Sets properly the current core.
        min_degree = degrees[nodes[0]]
        if min_degree > current_core:
            current_core = min_degree
        # Identifies vertices in the current layer.
        this_layer = []
        for n in nodes:
            if degrees[n] > current_core:
                break
            this_layer.append(n)
        # Identifies the core/layer of the vertices in the current layer.
        for v in this_layer:
            od_layers[v] = current_layer
            for n in neighbors[v]:
                neighbors[n].remove(v)
                degrees[n] = degrees[n] - 1
            degrees.pop(v)
        # Updates the layer count.
        current_layer = current_layer + 1
    # Returns the dictionaries containing the onion layer of each vertices.
    return od_layers

コード例 #60

0

ファイルを表示

def remove_isolates(G, node_list, option, basic, to_b_nodes, from_a_nodes,
                    a_to_b_edges, net):
    '''Removed any isolated nodes in the given network and any associated 
    edges. Retruns the eddited network and a number of lists which require 
    updating due to the removal.
    Input: 
    Return: '''

    #remove any isolated nodes and assocaited edges
    try:
        isolatednodes = nx.isolates(G)
    except:
        return 4010
    if G.number_of_edges() == 0:
        #print 'The number of nodes left is:', G.number_of_nodes()
        raise error_classes.GraphError(
            'Error. The network is dissconnected, there are no edges left in the network.'
        )
    else:
        #remove all nodes which are in the isolated list
        G.remove_nodes_from(isolatednodes)

    j = 0
    #loop through the isolated nodes and remove from the node list
    try:
        while j < len(isolatednodes):
            k = 0
            while k < len(node_list):
                if isolatednodes[j] == node_list[k]:
                    node_list.remove(node_list[k])
                    k -= 1
                k += 1
            j += 1
    except:
        return 4012

    #update some of the lists to record the simulation process

    tot = 0
    if net == 'B':
        for nd in isolatednodes:
            v = 0
            found = False
            try:
                while v < len(a_to_b_edges):
                    if int(nd) == int(a_to_b_edges[v][1]):

                        a_to_b_edges.pop(v)
                        found = True
                        tot += 1
                        v -= 1
                        if found == False:
                            print('node is:', nd)
                            for item in a_to_b_edges:
                                print(item[1])
                            exit()
                    v += 1
            except:
                return 4013

    var = G, node_list, basic, option, isolatednodes, to_b_nodes, from_a_nodes, a_to_b_edges
    return var