def smallWorldness(graph):
	return_values = []
	#Small-worldness criteria
	n = len(nx.nodes(graph))
	e = len(nx.edges(graph))
	#probability of edges: (number of edges in real graph)/possible edges
	p = e/float((n*(n-1)/2.0))	
	##
	#generate random graph using probability
	rand_graph = nx.fast_gnp_random_graph(n, p, seed=1)
	#calculate values for real graph and random graph
	Creal = nx.transitivity(graph) #float
	Crand = nx.transitivity(rand_graph) #float
	Lreal = 0
	Lrand = 0
	real_sum = 0
	rand_sum = 0
	splReal = shortest_path_lengths(graph)
	splRand = shortest_path_lengths(rand_graph)
	for i in range(len(splReal)):
		real_sum += splReal[i]
		rand_sum += splRand[i]
	Lreal = real_sum / len(splReal)
	Lrand = rand_sum / len(splRand)		
	#compare with actual graph
	if(Lreal != 0 and Lrand !=0 and Crand !=0):
		S = (Creal)/(Crand) / (float(Lreal)/(Lrand))
	else:
		S = 0
	return_values.append(S)
	return return_values
Ejemplo n.º 2
0
def draw_graph(label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40):
    G=build_graph(fb.get_friends_network())
    betweenness=nx.betweenness_centrality(G)
    degree=nx.degree_centrality(G)
    degree_num=[ degree[v] for v in G]
    maxdegree=max(degree_num);mindegree=min(degree_num);
    print maxdegree,mindegree
    clustering=nx.clustering(G)
    print nx.transitivity(G)
    # Judge whether remove the isolated point from graph
    if remove_isolated is True:
        H = nx.empty_graph()
        for SG in nx.connected_component_subgraphs(G):
            if SG.number_of_nodes() > iso_level:
                H = nx.union(SG, H)
        G = H
    # Ajust graph for better presentation
    if different_size is True:
        L = nx.degree(G)
        G.dot_size = {}
        for k, v in L.items():
            G.dot_size[k] = v
        #node_size = [betweenness[v] *1000 for v in G]
        node_size = [G.dot_size[v] * 10 for v in G]
        node_color= [((degree[v]-mindegree))/(maxdegree-mindegree) for v in G]
        #edge_width = [getcommonfriends(u,v) for u,v in G.edges()]
    pos = nx.spring_layout(G, iterations=15)
    nx.draw_networkx_edges(G, pos, alpha=0.05)
    nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=node_color, vmin=0.0,vmax=1.0, alpha=0.3)
    # Judge whether shows label
    if label_flag is True:
        nx.draw_networkx_labels(G, pos, font_size=6,alpha=0.1)
    #nx.draw_graphviz(G)
    plt.show()
    return G
Ejemplo n.º 3
0
def gen_graph_stats (graph):
	G = nx.read_graphml(graph)
	stats = {}

	edges, nodes = 0,0
	for e in G.edges_iter(): edges += 1
	for n in G.nodes_iter(): nodes += 1
	stats['Edges'] = (edges,'The number of edges within the Graph')
	stats['Nodes'] = (nodes, 'The number of nodes within the Graph')
	print "%i edges, %i nodes" % (edges, nodes)


	# Accessing the highest degree node
	center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0]
	stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree')


	hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0])
	print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball)
	stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '')
	# print "Center: %s" % G[center]

	# print "Shortest Path to Center: %s" % p


	print "Degree: %0.5f" % degree
	stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.')

	print "Order: %i" % G.number_of_nodes()
	stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.')

	print "Size: %i" % G.number_of_edges()
	stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.')

	print "Clustering: %0.5f" % nx.average_clustering(G)
	stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.')

	print "Transitivity: %0.5f" % nx.transitivity(G)
	stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.')

	part = community.best_partition(G)
	# values = [part.get(node) for node in G.nodes()]

	# nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
	# plt.show()

	mod = community.modularity(part,G)
	print "modularity: %0.5f" % mod
	stats['Modularity'] = (mod,'The modularity of a partition of a graph.')

	knn = nx.k_nearest_neighbors(G)
	print knn
	stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre')


	return G, stats
Ejemplo n.º 4
0
def get_small_worldness(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_small_worldness.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))

    cluster = nx.average_clustering(G)
    ER_cluster = nx.average_clustering(ER_graph)
    
    transi = nx.transitivity(G)
    ER_transi = nx.transitivity(ER_graph)

    print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi)

    f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster))
    components = nx.connected_component_subgraphs(G)
    ER_components = nx.connected_component_subgraphs(ER_graph)

    values = []
    ER_values = []
    for i in range(len(components)):
      if nx.number_of_nodes(components[i]) > 1:
        values.append(nx.average_shortest_path_length(components[i]))
    for i in range(len(ER_components)):
      if nx.number_of_nodes(ER_components[i]) > 1:
        ER_values.append(nx.average_shortest_path_length(ER_components[i]))
    if len(values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(values)/len(values)))

    if len(ER_values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(ER_values)/len(ER_values)))
    
    f.write("\t%f\t%f" % (transi, ER_transi))  
    
    if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_WS = 0.
    if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_Delta = 0.
    
    f.write("\t%f\t%f" % (S_WS, S_Delta))  
    f.write("\n")
    
  f.close()  
  print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity" 
Ejemplo n.º 5
0
def get_small_worldness(G, thr):
	f = open(out_prfx + 'small_worldness.dat', 'a')
	g = open(out_prfx + 'cc_trans_ER.dat', 'a')
	#g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n')
	ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))
	# erdos-renyi, binomial random graph generator ...(N,D:density)	
	cluster = nx.average_clustering(G)   # clustering coef. of whole network
	ER_cluster = nx.average_clustering(ER_graph)	#cc of random graph
	
	transi = nx.transitivity(G)
	ER_transi = nx.transitivity(ER_graph)

	g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi ))
	
	f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster))
	components = nx.connected_component_subgraphs(G)
	ER_components = nx.connected_component_subgraphs(ER_graph)

	values = []
	ER_values = []
	for i in range(len(components)):
		if nx.number_of_nodes(components[i]) > 1:
			values.append(nx.average_shortest_path_length(components[i]))
	for i in range(len(ER_components)):
		if nx.number_of_nodes(ER_components[i]) > 1:
			ER_values.append(nx.average_shortest_path_length(ER_components[i]))
	if len(values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(values)/len(values))) # pathlenght

	if len(ER_values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(ER_values)/len(ER_values)))

	f.write("\t%f\t%f" % (transi, ER_transi))  

	if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))  
	else:
		S_WS = 0.
	if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
	else:
		S_Delta = 0.

	f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness 
	f.write("\n")

	f.close() 
	g.close()	 
Ejemplo n.º 6
0
def compute_singlevalued_measures(ntwk, weighted=True, calculate_cliques=False):
    """
    Returns a single value per network
    """
    iflogger.info("Computing single valued measures:")
    measures = {}
    iflogger.info("...Computing degree assortativity (pearson number) ...")
    try:
        measures["degree_pearsonr"] = nx.degree_pearsonr(ntwk)
    except AttributeError:  # For NetworkX 1.6
        measures["degree_pearsonr"] = nx.degree_pearson_correlation_coefficient(ntwk)
    iflogger.info("...Computing degree assortativity...")
    try:
        measures["degree_assortativity"] = nx.degree_assortativity(ntwk)
    except AttributeError:
        measures["degree_assortativity"] = nx.degree_assortativity_coefficient(ntwk)
    iflogger.info("...Computing transitivity...")
    measures["transitivity"] = nx.transitivity(ntwk)
    iflogger.info("...Computing number of connected_components...")
    measures["number_connected_components"] = nx.number_connected_components(ntwk)
    iflogger.info("...Computing average clustering...")
    measures["average_clustering"] = nx.average_clustering(ntwk)
    if nx.is_connected(ntwk):
        iflogger.info("...Calculating average shortest path length...")
        measures["average_shortest_path_length"] = nx.average_shortest_path_length(ntwk, weighted)
    if calculate_cliques:
        iflogger.info("...Computing graph clique number...")
        measures["graph_clique_number"] = nx.graph_clique_number(ntwk)  # out of memory error
    return measures
Ejemplo n.º 7
0
def printStats(filename):
	'''
	Converts json adjacency list into networkx to calculate and print the
	graphs's 
	  - average clustering coefficient
	  - overall clustering coefficient
	  - maximum diameter
	  - average diameter
	  - number of paritions using community.best_parition
	  - modularity of community.best_partition
	'''
	g = makeGraphFromJSON(filename)
	
	print "Average Clustering Coefficient: %f" % nx.average_clustering(g)
	print "Overall Clustering Coefficient: %f" % nx.transitivity(g)
	
	connected_subgraphs = list(nx.connected_component_subgraphs(g))
	largest = max(nx.connected_component_subgraphs(g), key=len)
	print "# Connected Components: %d" % len(connected_subgraphs)
	print "    Maximal Diameter: %d" % nx.diameter(largest)
	print "    Average Diameter: %f" % nx.average_shortest_path_length(largest)

	# Find partition that maximizes modularity using Louvain's algorithm
	part = community.best_partition(g)	
	print "# Paritions: %d" % (max(part.values()) + 1)
	print "Louvain Modularity: %f" % community.modularity(part, g)
Ejemplo n.º 8
0
def get_network_property(graph):
    """Returns various property of the graph.

    It calculates the richness coefficient, triangles and transitivity
    coefficient. To do so, it removes self-loops *in-place*. So, there
    is a possibility that the graph passed as parameter has been
    changed.
    """

    remove_self_loop(graph)

    # If number of nodes is less than three
    # no point in calculating these property.
    if len(graph.nodes()) < 3:
        return ({0: 0.0}, 0, 0)

    try:
        richness = nx.rich_club_coefficient(graph)
    except nx.NetworkXAlgorithmError:
        # NetworkXAlgorithmError is raised when
        # it fails achieve desired swaps after
        # maximum number of attempts. It happened
        # for a really small graph. But, just to
        # guard against those cases.
        richness = nx.rich_club_coefficient(graph, False)

    triangle = nx.triangles(graph)
    transitivity = nx.transitivity(graph)

    return (richness, triangle, transitivity)
def plot_distribution(distribution_type,legend,graph,list_communities,out=None):
	x = [i for i in range(0,len(list_communities[0]))]
	for communities in list_communities:
		if distribution_type.lower() == "nodes":
			y = list(map(len,communities))
		else:
			y = []
			for l in communities:
				H = graph.subgraph(l)
				if distribution_type.lower() == "density":
					y.append(nx.density(H))
				elif distribution_type.lower() == "transitivity":
					y.append(nx.transitivity(H))
				else:
					return None
		plt.plot(x,y,linewidth=2,alpha=0.8)
		#plt.yscale("log")

	plt.legend(legend, loc='upper left')
	plt.xlabel("Comunity ID")
	plt.ylabel(distribution_type)

	if out == None:
		plt.show()
	else:
		plt.savefig(out+".svg",bbox_inches="tight")
	plt.close()
Ejemplo n.º 10
0
    def connected_components(self):
        """
        Returns basic statistics about the connected components of the
        graph. This includes their number, order, size, diameter, radius,
        average clusttering coefficient, transitivity, in addition to basic
        info about the largest and smallest connected components.
        """
        cc_stats = {}
        cc = nx.connected_components(self.graph.structure)

        for index, component in enumerate(cc):
            cc_stats[index] = {}
            this_cc = cc_stats[index]

            this_cc["order"] = len(component)
            this_cc["size"] = len(self.graph.structure.edges(component))

            subgraph = self.graph.structure.subgraph(component)
            this_cc["avg_cluster"] = nx.average_clustering(subgraph)
            this_cc["transitivity"] = nx.transitivity(subgraph)

            eccentricity = nx.eccentricity(subgraph)
            ecc_values = eccentricity.values()
            this_cc["diameter"] = max(ecc_values)
            this_cc["radius"] = min(ecc_values)

        return cc_stats
Ejemplo n.º 11
0
def get_motifs(filename):
  import networkx as nx
  from math import factorial
  threshold = 0
  f = open(filename[:-4]+'_motifs.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    tri_dict = nx.triangles(G)
    summe = 0
    for node in tri_dict:
      summe += tri_dict[node]
    
    N = nx.number_of_nodes(G)
    ratio = summe / (3. * binomialCoefficient(N,3))
    
    transi = nx.transitivity(G)
    if transi > 0:
      triads = summe / transi 
      ratio_triads = triads / (3 * binomialCoefficient(N,3))
    else:
      triads = 0.
      ratio_triads = 0.
    
    print 'threshold: %f, number of triangles: %f, ratio: %f, triads: %f, ratio: %f' %(threshold, summe/3, ratio, triads, ratio_triads)
    f.write("%f\t%d\t%f\t%f\t%f\n" % (threshold, summe/3, ratio, triads, ratio_triads))
  f.close()
  print "1:threshold 2:#triangles 3:ratio-to-potential-triangles 4:triads 5:ratio-to-potential-triads"
Ejemplo n.º 12
0
def netstats_simple(graph):
    G = graph
    if nx.is_connected(G): 
        d = nx.diameter(G)
        r = nx.radius(G)
    else: 
        d = 'NA - graph is not connected' #should be calculatable on unconnected graph - see example code for hack
        r = 'NA - graph is not connected'
   
#using dictionary to pack values and variablesdot, eps, ps, pdf break equally
    result = {#"""single value measures"""  
              'nn': G.number_of_nodes(),
              'ne': G.number_of_edges(),
              'd': d,
              'r': r,
              'conn': nx.number_connected_components(G),
              'asp': nx.average_shortest_path_length(G), 
#              """number of the largest clique"""
              'cn': nx.graph_clique_number(G),
#              """number of maximal cliques"""
              'mcn': nx.graph_number_of_cliques(G),
#              """transitivity - """
              'tr': nx.transitivity(G),
              #cc = nx.clustering(G) """clustering coefficient"""
              'avgcc': nx.average_clustering(G) } 
#    result['d'] = nx.diameter(G)
    print result
    return result
Ejemplo n.º 13
0
def degree_statistics(G):
    n_nodes = G.number_of_nodes()
    
    start = time.clock()
    # list of sampled graphs
    g_list[:] = []
    for i in range(N_SAMPLES):
        g_list.append(generate_sample(G))
    print "Sampling graphs - Elapsed ", (time.clock() - start)
    
    #####
    # number of edges s_NE
    s_NE = sum(e[2]['p'] for e in G.edges_iter(data=True))
    
    # average degree s_AD
    s_AD = 2*s_NE /n_nodes
    
    # maximal degree s_MD
    sum_MD = 0.0
    for aG in g_list:
        max_deg = max(aG.degree().itervalues())
        sum_MD += max_deg
        
    s_MD = sum_MD/N_SAMPLES
    
    # degree variance s_DV
    sum_DV = 0.0
    for aG in g_list:
        deg_var = 1.0/n_nodes * sum((d - s_AD)*(d-s_AD) for d in aG.degree().itervalues())
        sum_DV += deg_var
    
    s_DV = sum_DV/N_SAMPLES
    
    # clustering coefficient s_CC
    sum_CC = 0.0
    for aG in g_list:
        cc = nx.transitivity(aG)
        sum_CC += cc
    
    s_CC = sum_CC/N_SAMPLES
    
    # degree distribution
    deg_list = [0 for i in range(MAX_DEG)]
    for aG in g_list:
        for d in aG.degree().itervalues():
            deg_list[d] += 1
            
    i = MAX_DEG-1
    while deg_list[i] == 0:
        i = i-1
    deg_list = deg_list[:i+1]
    print "len(deg_list) =", len(deg_list)
    print deg_list
    
    for i in range(len(deg_list)):
        deg_list[i] = float(deg_list[i])/N_SAMPLES
    
    #
    return s_NE, s_AD, s_MD, s_DV, s_CC, deg_list
Ejemplo n.º 14
0
def cluster():
	if created == 0:
		print 'No graph created!'
	elif created == 1:
		try:
			print 'The clustering coefficient for the whole graph is %0.4f.'%(nx.transitivity(G))
		except nx.NetworkXError, e:
			print e
Ejemplo n.º 15
0
def preferentialAttachment(G):
    n = G.number_of_nodes()
    m =  random.randrange(15,20)
    PG = nx.barabasi_albert_graph(n,m)
    plot(PG)
    l =  math.log(n)/math.log(math.log(n))
    print 'Global Clustering: {0}\t'.format(str(nx.transitivity(PG))),
    print 'Average path length : {0}\n'.format(str(l))
Ejemplo n.º 16
0
def get_my_small_worldness(filename) :
	threshold = 0
	f = open(filename[:-4]+'_small_worldness.dat','w')
	f.write('thresh\t\taver_clus\t\tave_ER_clus\t\tcoup_coeff\t\tchar_path\t\ttransi\t\tER_transi\t\tS_WS\t\tS_delta\n')
	print f
	for i in range(0,101):
		threshold = float(i)/100
		G = get_my_threshold_matrix(filename, threshold)
		ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # random graph
		cluster = nx.average_clustering(G)
		ER_cluster = nx.average_clustering(ER_graph)
		transi = nx.transitivity(G)
		ER_transi = nx.transitivity(ER_graph)
		f.write('%f\t%f\t%f'% (threshold,cluster,ER_cluster))
		components = nx.connected_component_subgraphs(G)
		ER_components = nx.connected_component_subgraphs(ER_graph)
		values = []
		ER_values = []
		for i in range(len(components)) :	
			if nx.number_of_nodes(components[i]) > 1:
				values.append(nx.average_shortest_path_length(components[i]))
		for i in range(len(ER_components)) :	
			if nx.number_of_nodes(ER_components[i]) > 1:
				ER_values.append(nx.average_shortest_path_length(ER_components[i]))
		if len(values) == 0 :
			f.write("\t0.")
		else : 
			f.write("\t%f" % (sum(values)/len(values)))
		if len(ER_values) == 0:
			f.write("\t0.")
		else:
			f.write("\t%f" % (sum(ER_values)/len(ER_values)))
		f.write("\t%f\t%f" % (transi, ER_transi))
		if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
			S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
		else:
			S_WS = 0.
		if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
			S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
		else:
			S_Delta = 0.
		f.write("\t%f\t%f" % (S_WS, S_Delta))  
		f.write("\n")
	f.close()
Ejemplo n.º 17
0
 def test_clustering_transitivity(self):
     # check that weighted average of clustering is transitivity
     G = nx.complete_graph(5)
     G.remove_edge(1,2)
     t1=nx.transitivity(G)
     (cluster_d2,weights)=nx.clustering(G,weights=True)
     trans=[]
     for v in G.nodes():
         trans.append(cluster_d2[v]*weights[v])
     t2=sum(trans)
     assert_almost_equal(abs(t1-t2),0)
Ejemplo n.º 18
0
def myglobalclust(corr):
    if not isinstance(corr,nx.DiGraph):
        return [nx.transitivity(corr)]  #3*triangles/triads
    corr = np.array(nx.to_numpy_matrix(corr))
    mat = np.dot(corr,corr)
    paths = np.sum(mat) - np.trace(mat)
    mat = np.dot(mat,corr)
    loops = np.trace(mat)
    if paths == 0:
        return [0]
    else:
        return [float(loops)/paths]
def calGraph(infile, mode = 1):
	#init Parameter
	inputpath = 'edge_list/'
	n = mode
	Data_G = inputpath+infile+'_'+str(n)+'.edgelist'
	
	#init Graph
	G = nx.read_edgelist(Data_G, create_using=nx.DiGraph())
	GU = nx.read_edgelist(Data_G)
	average_clustering = nx.average_clustering(GU)
	transitivity = nx.transitivity(G)
	return [average_clustering, transitivity]
def graphAnalysis(graph, top_number, save_file_path):
    """
        Do the essential analysis to the final combined graph
    """
    with io.open(save_file_path, 'w') as save_file:

        # centrality
        # degree centrality
        deg_central = nx.degree_centrality(graph)
        deg_central_sort = sorted(deg_central.items(), key = lambda x: x[1], reverse = True)
        top_deg_central_sort = deg_central_sort[:top_number]
        save_file.write('top %d degree centrality items,' % top_number)
        save_file.write(','.join('%s %s' % x for x in top_deg_central_sort))

        # clustering

        # number of triangles: triangles() is not defined for directed graphs
        triangle_num = nx.triangles(graph)
        triangle_num_sort = sorted(triangle_num.items(), key = lambda x: x[1], reverse = True)
        top_triangle_num_sort = triangle_num_sort[:top_number]
        save_file.write('\ntop %d number of triangles including a node as one vertex,' % top_number)
        save_file.write(','.join('%s %s' % x for x in top_triangle_num_sort))

        # clustering coefficient of node in the graph
        cluster_coefficient = nx.clustering(graph)
        cluster_coefficient_sort = sorted(cluster_coefficient.items(), key = lambda x: x[1], reverse = True)
        top_cluster_coefficient_sort = cluster_coefficient_sort[:top_number]
        save_file.write('\ntop %d clustering coefficient items,' % top_number)
        save_file.write(','.join('%s %s' % x for x in top_cluster_coefficient_sort))

        # transitivity of the graph
        triangle_transitivity = nx.transitivity(graph)
        save_file.write('\ntransitivity of the graph,%f' % triangle_transitivity)

        # average clustering coefficient of the graph
        avg_cluster = nx.average_clustering(graph)
        save_file.write('\naverage clustering coefficient of the graph,%f' % avg_cluster)

        # clique
        # size of the largest clique in the graph
        size_largest_clique = nx.graph_clique_number(graph)
        save_file.write('\nsize of the largest clique in the graph,%d' % size_largest_clique)
        
        # all the cliques in the graph
        
        all_clique = nx.find_cliques(graph) # a generator
        list_all_clique = list(all_clique)
        list_all_clique_sort = sorted(list_all_clique, key = lambda x: len(x), reverse = True)
        list_all_clique_sort = [' '.join(clique) for clique in list_all_clique_sort]
        # print list_all_clique_sort
        save_file.write('\ncliques,')
        save_file.write(','.join(x for x in list_all_clique_sort))
def ClusteringCoefficientCentralityExperiment(G, min_target, max_target, filename):
    print nx.info(G)
    print 'Global Clustering Coefficient:', nx.transitivity(G.to_undirected())

    X_Clustering_Coefficient = []
    Y_nD = []
    target = min_target
    while target <= max_target:       
        copyG = G.copy()
        new_G = SimulatedAnnealing(copyG, target, test_cost_function)
        clustering_coeff = nx.transitivity(new_G.to_undirected())
        nD = SCT.controllability(new_G)
        X_Clustering_Coefficient.append(clustering_coeff)
        Y_nD.append(nD)
        print "target = ", target, " CC = ", clustering_coeff, 'nD = ', nD      
        target += 0.05
    
    s = 'results/' + filename;
    with open(s, "w") as f:
        for i in range(len(Y_nD)):
            print >> f, "%f %f"%(X_Clustering_Coefficient[i], Y_nD[i])
    return (X_Clustering_Coefficient, Y_nD)
Ejemplo n.º 22
0
def compute_singlevalued_measures(ntwk, weighted=True,
                                  calculate_cliques=False):
    """
    Returns a single value per network
    """
    iflogger.info('Computing single valued measures:')
    measures = {}
    iflogger.info('...Computing degree assortativity (pearson number) ...')
    try:
        measures['degree_pearsonr'] = nx.degree_pearsonr(ntwk)
    except AttributeError:  # For NetworkX 1.6
        measures[
            'degree_pearsonr'] = nx.degree_pearson_correlation_coefficient(
                ntwk)
    iflogger.info('...Computing degree assortativity...')
    try:
        measures['degree_assortativity'] = nx.degree_assortativity(ntwk)
    except AttributeError:
        measures['degree_assortativity'] = nx.degree_assortativity_coefficient(
            ntwk)
    iflogger.info('...Computing transitivity...')
    measures['transitivity'] = nx.transitivity(ntwk)
    iflogger.info('...Computing number of connected_components...')
    measures['number_connected_components'] = nx.number_connected_components(
        ntwk)
    iflogger.info('...Computing graph density...')
    measures['graph_density'] = nx.density(ntwk)
    iflogger.info('...Recording number of edges...')
    measures['number_of_edges'] = nx.number_of_edges(ntwk)
    iflogger.info('...Recording number of nodes...')
    measures['number_of_nodes'] = nx.number_of_nodes(ntwk)
    iflogger.info('...Computing average clustering...')
    measures['average_clustering'] = nx.average_clustering(ntwk)
    if nx.is_connected(ntwk):
        iflogger.info('...Calculating average shortest path length...')
        measures[
            'average_shortest_path_length'] = nx.average_shortest_path_length(
                ntwk, weighted)
    else:
        iflogger.info('...Calculating average shortest path length...')
        measures[
            'average_shortest_path_length'] = nx.average_shortest_path_length(
                nx.connected_component_subgraphs(ntwk)[0], weighted)
    if calculate_cliques:
        iflogger.info('...Computing graph clique number...')
        measures['graph_clique_number'] = nx.graph_clique_number(
            ntwk)  # out of memory error
    return measures
Ejemplo n.º 23
0
def main(filename):
    G, UG = loadEdgeList(filename)
    print 'Original Graph'
    print '--------------\n'
    directedgraph_plot(G)
    print 'Global Clustering: {0}\t'.format(str(nx.transitivity(G))), 
    print 'Average path length : {0}\n'.format(str(nx.average_shortest_path_length(UG)))
    print '\nRandom Graph Model'
    print '------------------\n'
    randomGraph(G)
    print '\nSmall World Model'
    print '-----------------\n' 
    smallworld(G)
    print '\nPreferential Attachement Model'
    print '------------------------------\n'
    preferentialAttachment(G)
Ejemplo n.º 24
0
def get_network_statistics(G):
    '''
    Compute key nework statistics for 
    '''
    size = len(G)
    density = nx.density(G)
    #diameter = nx.diameter(G)

    clustering = nx.average_clustering(G.to_undirected())
    transitivity = nx.transitivity(G.to_undirected())
    grc = global_reaching_centrality(G)
    return {"size": size,
            "density": density,
            #"diameter": diameter,
            "clustering": clustering,
            "transitivity": transitivity,
            "grc": grc}
Ejemplo n.º 25
0
def test_networkx(file_name):     
    
    start = time.clock()
    g = nx.read_edgelist("../data/" + file_name + ".gr", '#', '\t', None, nodetype=int, data=False)
    print "elapsed ", time.clock() - start 
    
    print "#nodes =", g.number_of_nodes()
    print "#edges =", g.number_of_edges()
    
    deg_list = nx.degree(g) 
    max_deg = max(deg_list.itervalues())
    print "max_deg =", max_deg
    
    #
    start = time.clock()
    clustering_coeff = nx.transitivity(g)
    print "clustering_coeff =", clustering_coeff
    print "elapsed ", time.clock() - start    
def subgraphProperty(H):
  nnodes = nx.number_of_nodes(H)
  if nnodes < 2:
    return (2, 0, 0, 0, 0)
  nedges = nx.number_of_edges(H)
  dens = edge_density(nedges, nnodes)
  average_score = 0
  score = 0
  for (u, v, d) in H.edges(data=True):
    score += d['score']

  # average over all possible edges
  # average_score = 2 * score / (nnodes * (nnodes - 1))
  # average over all present edges
  average_score = score / nedges
  trans = nx.transitivity(H)

  return (nnodes, nedges, dens, average_score, trans)
Ejemplo n.º 27
0
def get_motifs(G, thr):
	f = open(out_prfx + 'motifs.dat', 'a')
	tri_dict = nx.triangles(G)   #number of triangles around nodes in G
	summe = 0
	for node in tri_dict:
		summe += tri_dict[node] # summing up all triangle numbers over nodes

	N = nx.number_of_nodes(G)
	ratio = summe / (3. * binomialCoefficient(N,3)) # ratio to porential tria.

	transi = nx.transitivity(G)
	if transi > 0:
		triads = summe / transi 	# triads
		ratio_triads = triads / (3 * binomialCoefficient(N,3)) #ratio to pot.
	else:
		triads = 0.
		ratio_triads = 0.
	f.write("%f\t%d\t%f\t%f\t%f\n" % (thr, summe/3, ratio, triads, ratio_triads))
	f.close()
def save_network_statistics(g):
    stats = {}
    stats['num_weakly_connected_components'] = nx.number_weakly_connected_components(g)
    stats['num_strongly_connected_components'] = nx.number_strongly_connected_components(g)
    stats['num_nodes'] = nx.number_of_nodes(g)
    stats['num_edges'] = nx.number_of_edges(g)
    stats['density'] = nx.density(g)
    try:
        stats['avg_clustering_coef'] = nx.average_clustering(g)
    except:
        stats['avg_clustering_coef'] = None # not defined for directed graphs
    stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes'])
    stats['transitivity'] = nx.transitivity(g)
    try:
        stats['diameter'] = nx.diameter(g)
    except:
        stats['diameter'] = None # unconnected --> infinite path length between connected components

    with open('./network-statistics/twitter-combined-statistics.txt', 'wb') as f:
        for stat_name, stat_value in stats.iteritems():
            f.write(stat_name + ': ' + str(stat_value) + '\n')
Ejemplo n.º 29
0
def compute_local_clustering(genelist, T250, commGraphs):
    '''
    First part computes clustering coefficient of a subset of nodes delta
    in the set
    Second part computes communities in T250 and calculates the third term
    '''
    vertexList = {k: [] for k in range(0, len(commGraphs.keys()))}
    for comm in commGraphs:
        for gene in genelist:
            if gene in commGraphs[comm].nodes():
                vertexList[comm].append(gene)

    k = 0
    delta = 0
    for comm in vertexList.keys():
        if len(vertexList[comm]) > 0:
            delta += NX.transitivity(NX.subgraph(commGraphs[comm],
                                                 vertexList[comm]))
            k += 1

    return delta / float(k)
Ejemplo n.º 30
0
def loadEdgeList(anomymizedEdges):
    G = nx.DiGraph()
    UG = nx.Graph()
    with open(anomymizedEdges,'rb') as file:
        content = csv.reader(file)
        for row in content:
            G.add_edge(row[0],row[1])
    UG = G.to_undirected()



    print 'Average Local Clustering : {0}\n'.format(str(nx.average_clustering(UG)))

    print 'Global Clustering: {0}\n'.format(str(nx.transitivity(G)))

    print 'Page Rank Centrality:'
    pageRank = sorted(nx.pagerank_numpy(G).items(),key=lambda x:x[1])

    for i in pageRank[-10:]:
        print '{0}       {1}'.format(i[0],i[1])
    print ''
    print 'Eigenvector Centrality:'
    eigenVector = sorted(nx.centrality.eigenvector_centrality(G).items(),key=lambda x:x[1] )
    for i in eigenVector[-10:]:
        print '{0}       {1}'.format(i[0],i[1])
    print ''
    print 'Degree Centrality:'
    degreeCentrality = sorted(nx.centrality.in_degree_centrality(G).items(),key=lambda x:x[1])
    for i in degreeCentrality[-10:]:
        print '{0}       {1}'.format(i[0],i[1])
    print ''

    print 'Rank correlation between Pagerank Centrality and Eigenvector Centrality: ' ,st.spearmanr([i[1] for i in pageRank],[i[1] for i in eigenVector])[0]

    print 'Rank correlation between Pagerank Centrality and Degree Centrality: ',st.spearmanr([i[1] for i in pageRank],[i[1] for i in degreeCentrality])[0]

    print 'Rank correlation between Degree Centrality and Eigenvector Centrality: ',st.spearmanr([i[1] for i in degreeCentrality],[i[1] for i in eigenVector])[0]

    js = max([i for i in nx.algorithms.link_prediction.jaccard_coefficient(UG)], key = lambda x:x[2])
    print '\nNodes with max Jaccard Similarity : {0} {1}\n'.format(str(js[0]),str(js[1]))
Ejemplo n.º 31
0
def degree_statistics_one(G):
    n_nodes = G.number_of_nodes()
    
    #####
    # number of edges s_NE
    s_NE = G.number_of_edges()
    
    # average degree s_AD
    s_AD = 2*float(s_NE) /n_nodes
    
    # maximal degree s_MD
    s_MD = max(G.degree().itervalues())
    
    # degree variance s_DV
    s_DV = 1.0/n_nodes * sum((d - s_AD)*(d-s_AD) for d in G.degree().itervalues())
    
    # clustering coefficient s_CC
    s_CC = nx.transitivity(G)

    
    # degree distribution --> HISTOGRAM count
#    deg_list = [0 for i in range(MAX_DEG)]
#    for d in G.degree().itervalues():
#        deg_list[d] += 1
#            
#    i = MAX_DEG-1
#    while deg_list[i] == 0:
#        i = i-1
#    deg_list = deg_list[:i+1]
#    print "len(deg_list) =", len(deg_list)
#    print deg_list

    # degree distribution --> keep it as (multi) SET
    deg_list = list(G.degree().itervalues())
    
    #
    return s_NE, s_AD, s_MD, s_DV, s_CC, deg_list
Ejemplo n.º 32
0
def analyze_graph(graph):
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.triangles.html
    # Triangles per nodes, we should analyse the average per graph
    triangles = np.average(list(nx.triangles(graph).values()))
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.transitivity.html
    transitivity = nx.transitivity(graph)
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html
    # clustering = nx.clustering(graph, weight='weight').values()
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
    average_clustering = nx.average_clustering(graph,
                                               weight='weight',
                                               count_zeros=False)
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.bipartite.centrality.closeness_centrality.html
    closeness = nx.closeness_centrality(graph).values()
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.bipartite.centrality.betweenness_centrality.html
    betweenness = nx.betweenness_centrality(graph).values()
    # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.assortativity.degree_assortativity_coefficient.html
    homophily = nx.degree_assortativity_coefficient(graph, weight='weight')
    # https://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.algorithms.assortativity.attribute_assortativity_coefficient.html
    # Homophily by citations
    homophily_citations = nx.attribute_assortativity_coefficient(
        graph, 'citations')
    # Homophily by university
    homophily_university = nx.attribute_assortativity_coefficient(
        graph, 'university')

    return {
        'triangles': np.round(triangles, 2),
        'transitivity': transitivity,
        # 'clustering': clustering,
        'average_clustering': average_clustering,
        'closeness': list(closeness),
        'betweenness': list(betweenness),
        'homophily': homophily,
        'homophily_citations': homophily_citations,
        'homophily_university': homophily_university
    }
Ejemplo n.º 33
0
def create_graph(red_G):
    pos = nx.spring_layout(red_G)
    nx.draw_networkx_nodes(red_G, pos, node_color='black', node_size=30)
    nx.draw_networkx_edges(red_G, pos, edge_color='purple')
    nx.draw_networkx_labels(red_G, pos, font_size=10, font_family='Arial')
    plt.axis('off')
    plt.show()

    #самые центральные слова
    centr_words = []
    deg_centr_words = nx.degree_centrality(red_G)
    for nodeid in sorted(deg_centr_words,
                         key=deg_centr_words.get,
                         reverse=True):
        centr_words.append(nodeid)
    print('Самые центральные слова графа:' + str(centr_words[0]) + ',' +
          str(centr_words[1]) + '.')

    #радиус графа
    print('Радиус графа:' + str(nx.radius(red_G)))

    #коэффициент кластеризации
    print('Коэффициент кластеризации:' + str(nx.average_clustering(red_G)))
    print(nx.transitivity(red_G))
Ejemplo n.º 34
0
    opr_nodes = operon_nodes(crs_f, similarity_cutoff, zscore_cutoff)
    #pprint(opr_nodes)
    G = motif_graph(crs_f, similarity_cutoff, zscore_cutoff)

    loo_f = "../../data/LOO_per_matrix_site.tsv"
    regulon_f = "../../data/regulon_by_first_gene.txt"
    LOO = read_LOO(loo_f)
    regulon = read_regulon(regulon_f)

    print("#graph has %d nodes with %d edges, edges to nodes ratio: %f, edge average zscore: %f, %f transitivity"\
            %(nx.number_of_nodes(G),
                nx.number_of_edges(G),
                edge_density(nx.number_of_edges(G), nx.number_of_nodes(G)),
                edge_average_zscore(G),
                nx.transitivity(G)))
    print("#", nx.number_connected_components(G), "connected components")

    print(
        "reg\tLOO\tsize\tnodes\tedges\tratio\tavg_zscore\tnumber_connected_components\tlargest_comp_size\ttransitivity"
    )

    for reg in regulon.keys():
        if len(regulon[reg]) > 2:
            nodes = list()
            for gi in regulon[reg]:
                for n in opr_nodes[gi]:
                    nodes.append(n)

            H = G.subgraph(nodes)
            if nx.number_of_nodes(H) < 2:
Ejemplo n.º 35
0
 def test_cubical(self):
     G = nx.cubical_graph()
     assert nx.transitivity(G) == 0.0
Ejemplo n.º 36
0
 def test_transitivity(self):
     G = nx.Graph()
     assert nx.transitivity(G) == 0.0
Ejemplo n.º 37
0
def sigma(G, niter=100, nrand=10, seed=None):
    """Returns the small-world coefficient (sigma) of the given graph.

    The small-world coefficient is defined as:
    sigma = C/Cr / L/Lr
    where C and L are respectively the average clustering coefficient and
    average shortest path length of G. Cr and Lr are respectively the average
    clustering coefficient and average shortest path length of an equivalent
    random graph.

    A graph is commonly classified as small-world if sigma>1.

    Parameters
    ----------
    G : NetworkX graph
        An undirected graph.
    niter : integer (optional, default=100)
        Approximate number of rewiring per edge to compute the equivalent
        random graph.
    nrand : integer (optional, default=10)
        Number of random graphs generated to compute the average clustering
        coefficient (Cr) and average shortest path length (Lr).
    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    sigma : float
        The small-world coefficient of G.

    Notes
    -----
    The implementation is adapted from Humphries et al. [1]_ [2]_.

    References
    ----------
    .. [1] The brainstem reticular formation is a small-world, not scale-free,
           network M. D. Humphries, K. Gurney and T. J. Prescott,
           Proc. Roy. Soc. B 2006 273, 503-511, doi:10.1098/rspb.2005.3354.
    .. [2] Humphries and Gurney (2008).
           "Network 'Small-World-Ness': A Quantitative Method for Determining
           Canonical Network Equivalence".
           PLoS One. 3 (4). PMID 18446219. doi:10.1371/journal.pone.0002051.
    """
    import numpy as np

    # Compute the mean clustering coefficient and average shortest path length
    # for an equivalent random graph
    randMetrics = {"C": [], "L": []}
    for i in range(nrand):
        Gr = random_reference(G, niter=niter, seed=seed)
        randMetrics["C"].append(nx.transitivity(Gr))
        randMetrics["L"].append(nx.average_shortest_path_length(Gr))

    C = nx.transitivity(G)
    L = nx.average_shortest_path_length(G)
    Cr = np.mean(randMetrics["C"])
    Lr = np.mean(randMetrics["L"])

    sigma = (C / Cr) / (L / Lr)

    return sigma
Ejemplo n.º 38
0
path = '../data/karate/karate.gml'
nodes, edges = gml_data.load_gml_data(path)

G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)


# 输出节点信息
print(G.nodes(data=True))

# 输出边信息
print(G.edges)

# 计算图或者网络的传递性
print(nx.transitivity(G))

# 节点个数
print(G.number_of_nodes())

# 边数
print(G.number_of_edges())

# 节点邻居个数
print(G.neighbors(1))

import igraph

g = igraph.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)])
igraph.plot(g, target="/tmp/igraph_demo.png")
        degree_cen = nx.degree_centrality(G_r[repo])
        cens[repo] = (max(cns) * len(G_r[repo]) - sum(cns)) / (
            len(G_r[repo]) - 2) if len(G_r[repo]) > 2 else 0
        sizes[repo] = len(G_r[repo].nodes)
    if not nx.is_connected(G):
        # print(tm)
        pass
        # continue
    degree_cen = nx.degree_centrality(G).items()
    degree_cen = sorted(degree_cen, key=lambda x: x[1])
    nodes = [d_c[0] for d_c in degree_cen]
    cns = [d_c[1] for d_c in degree_cen]
    centers['all'] = nodes[cns.index(cns[-1]):]
    aspls['all'] = nx.average_shortest_path_length(G) if nx.is_connected(
        G) else -1
    acs['all'] = nx.transitivity(G) if nx.is_connected(G) else -1
    cens['all'] = (cns[-1] * len(G) - sum(cns)) / (len(G) -
                                                   2) if len(G) > 2 else 0
    team_centers.append(centers)
    team_acs.append(acs)
    team_aspls.append(aspls)
    team_cens.append(cens)
    team_sizes.append(sizes)
    # print(len(team_cens))

print("Computing Existing Duration...")
network_time = {}
with open(time_filename) as tf:
    for line in tf.readlines():
        m1, m2, t1, t2 = line.strip().split('\t')
        if m1 in network_time:
print(kmin_mul)
print(kmean_mul)
print(avDegree_mul)
"""

Results.write("k max\t%.2f\t%.2f\t%.2f\n" % (kmax_lit, kmax_bin, kmax_mul))
Results.write("k min\t%.2f\t%.2f\t%.2f\n" % (kmin_lit, kmin_bin, kmin_mul))

densidad_bin = nx.density(Gbin)
densidad_lit = nx.density(Glit)
densidad_mul = nx.density(Gmul)

Results.write("density\t%.4f\t%.4f\t%.4f\n" %
              (densidad_lit, densidad_bin, densidad_mul))

c_global_lit = nx.transitivity(Glit)
c_global_bin = nx.transitivity(Gbin)
c_global_mul = nx.transitivity(Gmul)

ci_lit = nx.average_clustering(Glit)
ci_bin = nx.average_clustering(Gbin)
ci_mul = nx.average_clustering(Gmul)

Results.write("C glob.\t%.4f\t%.4f\t%.4f\n" %
              (c_global_lit, c_global_bin, c_global_mul))
Results.write("C_i\t\t%.4f\t%.4f\t%.4f\n" % (ci_lit, ci_bin, ci_mul))

####### lit
componentes_lit = nx.connected_component_subgraphs(Glit)
diametros_lit = []
for c in componentes_lit:
def printGraphProperty(G, score_name):
  total_nodes = G.number_of_nodes()
  total_edges = G.number_of_edges()
  trans = nx.transitivity(G)
  print ("score\tnodes\tedges\ttransitivity")
  print ("%s\t%d\t%d\t%.3f" % (score_name, total_nodes, total_edges, trans))
Ejemplo n.º 42
0
                       node_size=5,
                       label="Students")
plt.legend(loc="lower right", facecolor='grey')
nx.draw_networkx_edges(P,
                       pos=nx.random_layout(P),
                       with_labels=False,
                       edge_color="red",
                       width=0.03,
                       label="Student-Student")
plt.title("Projected bipartite graph of students", size=20)
ax2 = plt.axes()
ax2.set_facecolor("lavender")
plt.show()

clustering = nx.average_clustering(P)
transitivity = nx.transitivity(P)
print("\n\n\nclustering:\n", clustering, file=f)
print("\n\n\ntransitivity:\n", transitivity, file=f)

degCent = nx.degree_centrality(G)
degCent_classes = dict(
    (key, value) for key, value in degCent.items() if not re.match("\d+", key))
sorted_degCent = sorted(degCent_classes.items(),
                        key=operator.itemgetter(1),
                        reverse=True)
# # print(sorted_degCent)
#
closeCent = nx.closeness_centrality(G, wf_improved=True)
closeCent_classes = dict((key, value) for key, value in closeCent.items()
                         if not re.match("\d+", key))
sorted_closeCent = sorted(closeCent_classes.items(),
Ejemplo n.º 43
0
 def get_transitivity(self):
     try:
         return nx.transitivity(self.graph)
     except Exception, e:
         print traceback.print_exc()
Ejemplo n.º 44
0
def print_basic_graph_properties(G, file_path="graph/graph_properties.txt"):

    output_string = ""
    if type(G) != nx.classes.digraph.DiGraph:
        raise Exception("NetworkX directed graph expected")
    output_string += " Type of object " + str(type(G)) + "\n"
    output_string += " It has  " + str(len(G.nodes())) + " nodes and " +\
     str(len(G.edges()))+ " edges \n"

    pathlengths = []
    output_string += "source vertex {target:length, } for some nodes \ n "
    count = 0
    for v in G.nodes():
        # Compute the shortest path lengths from source to all reachable nodes
        spl = nx.single_source_shortest_path_length(G, v)
        count += 1
        if count < 20:
            output_string += '%s %s' % (v, spl)
            output_string += "\n"
        for p in spl.values():
            pathlengths.append(p)
    # histogram of lengths of paths
    histogram_graph(pathlengths,
                    "Distribución de la menor longitud de los caminos",
                    oyellow, "images/pathlengths_distribution.png")

    output_string += "    \n"
    output_string += " ******  average shortest path length %s" % (
        sum(pathlengths) / len(pathlengths)) + " \n"

    # Strongly connected component
    is_wk_connected = nx.is_weakly_connected(G)
    output_string += " Is the graph strongly connected? -> " + str(
        nx.is_strongly_connected(G)) + "   \n"
    n = nx.number_strongly_connected_components(G)
    output_string += "It has " + str(n) + " strongly connected components  \n"
    # time consuming
    largest = max(nx.strongly_connected_component_subgraphs(G), key=len)
    output_string += "the largest strongly connected component has  " + str(
        len(largest)) + " nodes, which are a " + str(
            len(largest) / len(G) * 100) + "% of total nodes  \n"
    output_string += "for the largest component, the descriptive measures are: \n"
    output_string += basic_measures(largest)

    # Weakly connected component
    output_string += " Is the graph weakly connected? -> " + str(
        nx.is_weakly_connected(G)) + "     \n"
    n = nx.number_weakly_connected_components(G)
    output_string += "It has " + str(n) + " weakly connected components  \n"
    # time consuming
    largest = max(nx.weakly_connected_component_subgraphs(G), key=len)
    output_string += "the largest weakly connected component has  " + str(
        len(largest)) + " nodes, which are a " + str(
            len(largest) / len(G) * 100) + "% of total nodes  \n"

    degree_sequence = [d for n, d in G.degree()]
    histogram_graph(degree_sequence, "Distribucion del grado", oyellow,
                    "images/degree_distribution.png")
    degree_sequence = [d for n, d in G.in_degree()]
    histogram_graph(degree_sequence, "Distribución del in-degree", oyellow,
                    "images/indegree_distribution.png")
    degree_sequence = [d for n, d in G.out_degree()]
    histogram_graph(degree_sequence, "Distribución del out-degree", oyellow,
                    "images/outdegree_distribution.png")

    output_string += "El coeficiente de transitividad del grafo es " + str(
        nx.transitivity(G))

    print(output_string)
    f = open(file_path, 'w')
    f.write(output_string)
    f.close()
    return
Ejemplo n.º 45
0
for grado in deg.values():
    a.append(grado)
print("iv.2) el grado màximo es", max(a))
print("iv.3) el grado mìnimo es", min(a))

##Densidad

print("v) la densidad de la red 1 es", nx.density(G1))
print("v) la densidad de la red 2 es", nx.density(G2))
print("v) la densidad de la red 2 es", nx.density(G3))

#Coef de Clústering promedio

CC1 = nx.average_clustering(G1)
print("el coef de Clústering <C> de la red es", CC1)
CC2 = nx.average_clustering(G2)
print("el coef de Clústering <C> de la red es", CC2)
CC3 = nx.average_clustering(G3)
print("el coef de Clústering <C> de la red es", CC3)

#Coef  de Clustering Global

CCG1 = nx.transitivity(G1)
print("el coef de Clústering global de la red 1 es", CCG1)

CCG2 = nx.transitivity(G2)
print("el coef de Clústering global de la red 2 es", CCG2)

CCG3 = nx.transitivity(G3)
print("el coef de Clústering global de la red 3 es", CCG3)
Ejemplo n.º 46
0
def spearman(G_times,
             anomaly_ranks,
             directed,
             window,
             initial_period,
             plot=False):

    max_time = len(G_times)
    t = list(range(0, max_time))
    avg_clustering = []

    avg_weight = []
    total_edges = []
    avg_clustering = []
    avg_degree = []
    transitivity = []

    if (directed):
        num_strong = []
        num_weak = []
    else:
        num_connected_components = []

    for G in G_times:
        weights = list(nx.get_edge_attributes(G, 'weight').values())
        degrees = list(G.degree)
        sum_degree = 0
        for (v, d) in degrees:
            sum_degree = sum_degree + d

        total_edges.append(G.number_of_edges())
        avg_degree.append(sum_degree / len(degrees))
        if (len(weights) > 0):
            avg_weight.append(sum(weights) / len(weights))
        avg_clustering.append(nx.average_clustering(G))
        transitivity.append(nx.transitivity(G))

        if (directed):
            num_strong.append(nx.number_strongly_connected_components(G))
            num_weak.append(nx.number_weakly_connected_components(G))
        else:
            num_connected_components.append(nx.number_connected_components(G))

    if (len(avg_weight) > 0):
        ranks = rank_outliers(avg_weight,
                              window=window,
                              initial_period=initial_period)
        (corr, p_test) = spearmanr(anomaly_ranks, ranks)
        if (plot):
            normal_util.plot_ranks(anomaly_ranks, ranks, "avg_weight")
        print("spearman rank correlation with avg edge weight is " + str(corr))
        print("p-test with avg edge weight is " + str(p_test))
        print()

    ranks = rank_outliers(avg_clustering,
                          window=window,
                          initial_period=initial_period)
    (corr, p_test) = spearmanr(anomaly_ranks, ranks)

    # if (plot):
    #     normal_util.plot_ranks(anomaly_ranks, ranks, "avg_clustering")
    # print ("spearman rank correlation with avg clustering coefficient is " + str(corr))
    # print ("p-test with avg clustering coefficient is " + str(p_test))
    # print ()

    if (directed):
        ranks = rank_outliers(num_weak,
                              window=window,
                              initial_period=initial_period)
        (corr, p_test) = spearmanr(anomaly_ranks, ranks)
        if (plot):
            normal_util.plot_ranks(anomaly_ranks, ranks, "weak_connected")
        print(
            "spearman rank correlation with number of weakly connected components is "
            + str(corr))
        print("p-test with number of weakly connected components is " +
              str(p_test))
        print()

        ranks = rank_outliers(num_strong,
                              window=window,
                              initial_period=initial_period)
        (corr, p_test) = spearmanr(anomaly_ranks, ranks)
        if (plot):
            normal_util.plot_ranks(anomaly_ranks, ranks, "strong_connected")
        print(
            "spearman rank correlation with number of strongly connected components is "
            + str(corr))
        print("p-test with number of strongly connected components is " +
              str(p_test))
        print()

    else:
        ranks = rank_outliers(num_connected_components,
                              window=window,
                              initial_period=initial_period)
        (corr, p_test) = spearmanr(anomaly_ranks, ranks)
        if (plot):
            normal_util.plot_ranks(anomaly_ranks, ranks, "num_connected")
        print(
            "spearman rank correlation with number of connected components is "
            + str(corr))
        print("p-test with number of connected components is " + str(p_test))
        print()

    ranks = rank_outliers(transitivity,
                          window=window,
                          initial_period=initial_period)
    (corr, p_test) = spearmanr(anomaly_ranks, ranks)
    if (plot):
        normal_util.plot_ranks(anomaly_ranks, ranks, "transitivity")
    print("spearman rank correlation with transitivity is " + str(corr))
    print("p-test with transitivity is " + str(p_test))
    print()

    ranks = rank_outliers(total_edges,
                          window=window,
                          initial_period=initial_period)
    (corr, p_test) = spearmanr(anomaly_ranks, ranks)
    if (plot):
        normal_util.plot_ranks(anomaly_ranks, ranks, "num_edges")
    print("spearman rank correlation with total number of edges is " +
          str(corr))
    print("p-test with total number of edges is " + str(p_test))
    print()

    ranks = rank_outliers(avg_degree,
                          window=window,
                          initial_period=initial_period)
    (corr, p_test) = spearmanr(anomaly_ranks, ranks)
    if (plot):
        normal_util.plot_ranks(anomaly_ranks, ranks, "average_degree")
    print("spearman rank correlation with average degree is " + str(corr))
    print("p-test with average degree is " + str(p_test))
    print()
Ejemplo n.º 47
0
def compute_summaries(G):
    """ Compute network features, computational times and their nature.
    
    Evaluate 54 summary statistics of a network G, plus 4 noise variables,
    store the computational time to evaluate each summary statistic, and keep
    track of their nature (discrete or not).
        
        Args:
            G (networkx.classes.graph.Graph):
                an undirected networkx graph.
        
        Returns:
            resDicts (tuple): 
                a tuple containing the elements:
                - dictSums (dict): a dictionary with the name of the summaries
                as keys and the summary statistic values as values;
                - dictTimes (dict): a dictionary with the name of the summaries
                as keys and the time to compute each one as values;
                - dictIsDist (dict): a dictionary indicating if the summary is 
                discrete (True) or not (False).
                
    """

    dictSums = dict()  # Will store the summary statistic values
    dictTimes = dict()  # Will store the evaluation times
    dictIsDisc = dict()  # Will store the summary statistic nature

    # Extract the largest connected component
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G_lcc = G.subgraph(Gcc[0])

    # Number of edges
    start = time.time()
    dictSums["num_edges"] = G.number_of_edges()
    dictTimes["num_edges"] = time.time() - start
    dictIsDisc["num_edges"] = True

    # Number of connected components
    start = time.time()
    dictSums["num_of_CC"] = nx.number_connected_components(G)
    dictTimes["num_of_CC"] = time.time() - start
    dictIsDisc["num_of_CC"] = True

    # Number of nodes in the largest connected component
    start = time.time()
    dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc)
    dictTimes["num_nodes_LCC"] = time.time() - start
    dictIsDisc["num_nodes_LCC"] = True

    # Number of edges in the largest connected component
    start = time.time()
    dictSums["num_edges_LCC"] = G_lcc.number_of_edges()
    dictTimes["num_edges_LCC"] = time.time() - start
    dictIsDisc["num_edges_LCC"] = True

    # Diameter of the largest connected component
    start = time.time()
    dictSums["diameter_LCC"] = nx.diameter(G_lcc)
    dictTimes["diameter_LCC"] = time.time() - start
    dictIsDisc["diameter_LCC"] = True

    # Average geodesic distance (shortest path length in the LCC)
    start = time.time()
    dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc)
    dictTimes["avg_geodesic_dist_LCC"] = time.time() - start
    dictIsDisc["avg_geodesic_dist_LCC"] = False

    # Average degree of the neighborhood of each node
    start = time.time()
    dictSums["avg_deg_connectivity"] = np.mean(
        list(nx.average_degree_connectivity(G).values()))
    dictTimes["avg_deg_connectivity"] = time.time() - start
    dictIsDisc["avg_deg_connectivity"] = False

    # Average degree of the neighbors of each node in the LCC
    start = time.time()
    dictSums["avg_deg_connectivity_LCC"] = np.mean(
        list(nx.average_degree_connectivity(G_lcc).values()))
    dictTimes["avg_deg_connectivity_LCC"] = time.time() - start
    dictIsDisc["avg_deg_connectivity_LCC"] = False

    # Recover the degree distribution
    start_degree_extract = time.time()
    degree_vals = list(dict(G.degree()).values())
    degree_extract_time = time.time() - start_degree_extract

    # Entropy of the degree distribution
    start = time.time()
    dictSums["degree_entropy"] = ss.entropy(degree_vals)
    dictTimes["degree_entropy"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_entropy"] = False

    # Maximum degree
    start = time.time()
    dictSums["degree_max"] = max(degree_vals)
    dictTimes["degree_max"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_max"] = True

    # Average degree
    start = time.time()
    dictSums["degree_mean"] = np.mean(degree_vals)
    dictTimes["degree_mean"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_mean"] = False

    # Median degree
    start = time.time()
    dictSums["degree_median"] = np.median(degree_vals)
    dictTimes["degree_median"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_median"] = False

    # Standard deviation of the degree distribution
    start = time.time()
    dictSums["degree_std"] = np.std(degree_vals)
    dictTimes["degree_std"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_std"] = False

    # Quantile 25%
    start = time.time()
    dictSums["degree_q025"] = np.quantile(degree_vals, 0.25)
    dictTimes["degree_q025"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q025"] = False

    # Quantile 75%
    start = time.time()
    dictSums["degree_q075"] = np.quantile(degree_vals, 0.75)
    dictTimes["degree_q075"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q075"] = False

    # Average geodesic distance
    start = time.time()
    dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length(
        G_lcc)
    dictTimes["avg_shortest_path_length_LCC"] = time.time() - start
    dictIsDisc["avg_shortest_path_length_LCC"] = False

    # Average global efficiency:
    # The efficiency of a pair of nodes in a graph is the multiplicative
    # inverse of the shortest path distance between the nodes.
    # The average global efficiency of a graph is the average efficiency of
    # all pairs of nodes.
    start = time.time()
    dictSums["avg_global_efficiency"] = nx.global_efficiency(G)
    dictTimes["avg_global_efficiency"] = time.time() - start
    dictIsDisc["avg_global_efficiency"] = False

    # Harmonic mean which is 1/avg_global_efficiency
    start = time.time()
    dictSums["harmonic_mean"] = nx.global_efficiency(G)
    dictTimes["harmonic_mean"] = time.time() - start
    dictIsDisc["harmonic_mean"] = False

    # Average local efficiency
    # The local efficiency of a node in the graph is the average global
    # efficiency of the subgraph induced by the neighbors of the node.
    # The average local efficiency is the average of the
    # local efficiencies of each node.
    start = time.time()
    dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc)
    dictTimes["avg_local_efficiency_LCC"] = time.time() - start
    dictIsDisc["avg_local_efficiency_LCC"] = False

    # Node connectivity
    # The node connectivity is equal to the minimum number of nodes that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc)
    dictTimes["node_connectivity_LCC"] = time.time() - start
    dictIsDisc["node_connectivity_LCC"] = True

    # Edge connectivity
    # The edge connectivity is equal to the minimum number of edges that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc)
    dictTimes["edge_connectivity_LCC"] = time.time() - start
    dictIsDisc["edge_connectivity_LCC"] = True

    # Graph transitivity
    # 3*times the number of triangles divided by the number of triades
    start = time.time()
    dictSums["transitivity"] = nx.transitivity(G)
    dictTimes["transitivity"] = time.time() - start
    dictIsDisc["transitivity"] = False

    # Number of triangles
    start = time.time()
    dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3
    dictTimes["num_triangles"] = time.time() - start
    dictIsDisc["num_triangles"] = True

    # Estimate of the average clustering coefficient of G:
    # Average local clustering coefficient, with local clustering coefficient
    # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i)
    start = time.time()
    dictSums["avg_clustering_coef"] = nx.average_clustering(G)
    dictTimes["avg_clustering_coef"] = time.time() - start
    dictIsDisc["avg_clustering_coef"] = False

    # Square clustering (averaged over nodes):
    # the fraction of possible squares that exist at the node.

    # We average it over nodes
    start = time.time()
    dictSums["square_clustering_mean"] = np.mean(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_mean"] = time.time() - start
    dictIsDisc["square_clustering_mean"] = False

    # We compute the median
    start = time.time()
    dictSums["square_clustering_median"] = np.median(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_median"] = time.time() - start
    dictIsDisc["square_clustering_median"] = False

    # We compute the standard deviation
    start = time.time()
    dictSums["square_clustering_std"] = np.std(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_std"] = time.time() - start
    dictIsDisc["square_clustering_std"] = False

    # Number of 2-cores
    start = time.time()
    dictSums["num_2cores"] = len(nx.k_core(G, k=2))
    dictTimes["num_2cores"] = time.time() - start
    dictIsDisc["num_2cores"] = True

    # Number of 3-cores
    start = time.time()
    dictSums["num_3cores"] = len(nx.k_core(G, k=3))
    dictTimes["num_3cores"] = time.time() - start
    dictIsDisc["num_3cores"] = True

    # Number of 4-cores
    start = time.time()
    dictSums["num_4cores"] = len(nx.k_core(G, k=4))
    dictTimes["num_4cores"] = time.time() - start
    dictIsDisc["num_4cores"] = True

    # Number of 5-cores
    start = time.time()
    dictSums["num_5cores"] = len(nx.k_core(G, k=5))
    dictTimes["num_5cores"] = time.time() - start
    dictIsDisc["num_5cores"] = True

    # Number of 6-cores
    start = time.time()
    dictSums["num_6cores"] = len(nx.k_core(G, k=6))
    dictTimes["num_6cores"] = time.time() - start
    dictIsDisc["num_6cores"] = True

    # Number of k-shells
    # The k-shell is the subgraph induced by nodes with core number k.
    # That is, nodes in the k-core that are not in the k+1-core

    # Number of 2-shells
    start = time.time()
    dictSums["num_2shells"] = len(nx.k_shell(G, 2))
    dictTimes["num_2shells"] = time.time() - start
    dictIsDisc["num_2shells"] = True

    # Number of 3-shells
    start = time.time()
    dictSums["num_3shells"] = len(nx.k_shell(G, 3))
    dictTimes["num_3shells"] = time.time() - start
    dictIsDisc["num_3shells"] = True

    # Number of 4-shells
    start = time.time()
    dictSums["num_4shells"] = len(nx.k_shell(G, 4))
    dictTimes["num_4shells"] = time.time() - start
    dictIsDisc["num_4shells"] = True

    # Number of 5-shells
    start = time.time()
    dictSums["num_5shells"] = len(nx.k_shell(G, 5))
    dictTimes["num_5shells"] = time.time() - start
    dictIsDisc["num_5shells"] = True

    # Number of 6-shells
    start = time.time()
    dictSums["num_6shells"] = len(nx.k_shell(G, 6))
    dictTimes["num_6shells"] = time.time() - start
    dictIsDisc["num_6shells"] = True

    start = time.time()
    listOfCliques = list(nx.enumerate_all_cliques(G))
    enum_all_cliques_time = time.time() - start

    # Number of 4-cliques
    start = time.time()
    n4Clique = 0
    for li in listOfCliques:
        if len(li) == 4:
            n4Clique += 1
    dictSums["num_4cliques"] = n4Clique
    dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_4cliques"] = True

    # Number of 5-cliques
    start = time.time()
    n5Clique = 0
    for li in listOfCliques:
        if len(li) == 5:
            n5Clique += 1
    dictSums["num_5cliques"] = n5Clique
    dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_5cliques"] = True

    # Maximal size of a clique in the graph
    start = time.time()
    dictSums["max_clique_size"] = len(approximation.clique.max_clique(G))
    dictTimes["max_clique_size"] = time.time() - start
    dictIsDisc["max_clique_size"] = True

    # Approximated size of a large clique in the graph
    start = time.time()
    dictSums["large_clique_size"] = approximation.large_clique_size(G)
    dictTimes["large_clique_size"] = time.time() - start
    dictIsDisc["large_clique_size"] = True

    # Number of shortest path of size k
    start = time.time()
    listOfPLength = list(nx.shortest_path_length(G))
    path_length_time = time.time() - start

    # when k = 3
    start = time.time()
    n3Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n3Paths += tmp.count(3)
    dictSums["num_shortest_3paths"] = n3Paths / 2
    dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_3paths"] = True

    # when k = 4
    start = time.time()
    n4Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n4Paths += tmp.count(4)
    dictSums["num_shortest_4paths"] = n4Paths / 2
    dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_4paths"] = True

    # when k = 5
    start = time.time()
    n5Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n5Paths += tmp.count(5)
    dictSums["num_shortest_5paths"] = n5Paths / 2
    dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_5paths"] = True

    # when k = 6
    start = time.time()
    n6Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n6Paths += tmp.count(6)
    dictSums["num_shortest_6paths"] = n6Paths / 2
    dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_6paths"] = True

    # Size of the minimum (weight) node dominating set:
    # A subset of nodes where each node not in the subset has for direct
    # neighbor a node of the dominating set.
    start = time.time()
    T = approximation.min_weighted_dominating_set(G)
    dictSums["size_min_node_dom_set"] = len(T)
    dictTimes["size_min_node_dom_set"] = time.time() - start
    dictIsDisc["size_min_node_dom_set"] = True

    # Idem but with the edge dominating set
    start = time.time()
    T = approximation.min_edge_dominating_set(G)
    dictSums["size_min_edge_dom_set"] = 2 * len(
        T)  # times 2 to have a number of nodes
    dictTimes["size_min_edge_dom_set"] = time.time() - start
    dictIsDisc["size_min_edge_dom_set"] = True

    # The Wiener index of a graph is the sum of the shortest-path distances
    # between each pair of reachable nodes. For pairs of nodes in undirected graphs,
    # only one orientation of the pair is counted.
    # (On LCC otherwise inf)
    start = time.time()
    dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc)
    dictTimes["wiener_index_LCC"] = time.time() - start
    dictIsDisc["wiener_index_LCC"] = True

    # Betweenness node centrality (averaged over nodes):
    # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j)
    # where sigma is the number of shortest path between i and j going through u or not

    start = time.time()
    betweenness = list(nx.betweenness_centrality(G).values())
    time_betweenness = time.time() - start

    # Averaged across nodes
    start = time.time()
    dictSums["betweenness_centrality_mean"] = np.mean(betweenness)
    dictTimes["betweenness_centrality_mean"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_mean"] = False

    # Maximum across nodes
    start = time.time()
    dictSums["betweenness_centrality_max"] = max(betweenness)
    dictTimes["betweenness_centrality_max"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_max"] = False

    # Central point dominance
    # CPD = sum_u(B_max - B_u)/(N-1)
    start = time.time()
    dictSums["central_point_dominance"] = sum(
        max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1)
    dictTimes["central_point_dominance"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["central_point_dominance"] = False

    # Estrata index : sum_i^n exp(lambda_i)
    # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G
    start = time.time()
    dictSums["Estrata_index"] = nx.estrada_index(G)
    dictTimes["Estrata_index"] = time.time() - start
    dictIsDisc["Estrata_index"] = False

    # Eigenvector centrality
    # For each node, it is the average eigenvalue centrality of its neighbors,
    # where centrality of node i is taken as the i-th coordinate of x
    # such that Ax = lambda*x (for the maximal eigen value)

    # Averaged
    start = time.time()
    dictSums["avg_eigenvec_centrality"] = np.mean(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["avg_eigenvec_centrality"] = time.time() - start
    dictIsDisc["avg_eigenvec_centrality"] = False

    # Maximum
    start = time.time()
    dictSums["max_eigenvec_centrality"] = max(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["max_eigenvec_centrality"] = time.time() - start
    dictIsDisc["max_eigenvec_centrality"] = False

    ### Noise generation ###

    # Noise simulated from a Normal(0,1) distribution
    start = time.time()
    dictSums["noise_Gauss"] = ss.norm.rvs(0, 1)
    dictTimes["noise_Gauss"] = time.time() - start
    dictIsDisc["noise_Gauss"] = False

    # Noise simulated from a Uniform distribution [0-50]
    start = time.time()
    dictSums["noise_Unif"] = ss.uniform.rvs(0, 50)
    dictTimes["noise_Unif"] = time.time() - start
    dictIsDisc["noise_Unif"] = False

    # Noise simulated from a Bernoulli B(0.5) distribution
    start = time.time()
    dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5)
    dictTimes["noise_Bern"] = time.time() - start
    dictIsDisc["noise_Bern"] = True

    # Noise simulated from a discrete uniform distribution [0,50[
    start = time.time()
    dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50)
    dictTimes["noise_disc_Unif"] = time.time() - start
    dictIsDisc["noise_disc_Unif"] = True

    resDicts = (dictSums, dictTimes, dictIsDisc)

    return resDicts
Ejemplo n.º 48
0
# -*- coding: utf-8 -*-

import networkx as nx
import community
import matplotlib.pyplot as plt

#path="/PycharmProjects/untitled/polbooks.gml"
Graph = nx.read_gml("dolphins.gml")

print Graph.graph
#输出节点信息
print Graph.nodes(data=True)
#输出边信息
print Graph.edges()
#计算图或网络的传递性
print nx.transitivity(Graph)
#节点个数
print Graph.number_of_nodes()
#边数
print Graph.number_of_edges()
#节点邻居的个数
#print Graph.neighbors(1)
# 图划分
part = part = community.best_partition(Graph)
print part
#计算模块度
mod = community.modularity(part, Graph)
print mod
community.best_partition(Graph, )
#绘图
values = [part.get(node) for node in Graph.nodes()]
Ejemplo n.º 49
0
f.write('\nРёбер ' + str(G.number_of_edges()))
print('Радиус:')
f.write('\nРадиус компонент связности:\n')
components = list(nx.connected_component_subgraphs(G))
for comp in sorted(components, key=lambda c: c.size(), reverse=True):
    print(nx.radius(comp))
    f.write(str(nx.radius(comp)) + '\n')
print('Центральные вершины:')
f.write('Центральные вершины:\n')
deg = nx.degree_centrality(G)
for nodeid in sorted(deg, key=deg.get, reverse=True):
    if nodeid <= len(labels):
        print(nodeid, labels[nodeid])
        f.write(str(nodeid) + ' ' + labels[nodeid])
        f.write('\n')
print('Кластерный коэффициент:')
print(nx.average_clustering(G))
f.write('Кластерный коэффициент:\n' + str(nx.average_clustering(G)))
print(nx.transitivity(G))
f.write('\n' + str(nx.transitivity(G)))
f.close()

import matplotlib.pyplot as plt

pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G, pos, node_color='red', node_size=10)
nx.draw_networkx_edges(G, pos, edge_color='blue')
nx.draw_networkx_labels(G, pos, font_size=14, font_family='Arial')
plt.axis('off')
plt.show()
Ejemplo n.º 50
0
def processa(nomeEntrada, toStdOut=False):
    """Processa arquivo da rede, pondo as saídas na pasta com o nome 'out/<rede>'
    Saídas:
        - ?
    """
    pastaSaída = criaPastaSaída(nomeEntrada)
    with open(pastaSaída + '/stats.txt', 'w') as arq:

        def printa(*args):
            """Escreve a saída no arquivo de estatísticas, pliz"""
            if toStdOut:
                print(' ', *args)
            else:
                arq.write(' '.join(map(str, args)))
                arq.write('\n')

        # lê a rede do arquivo de entrada
        G = nx.read_weighted_edgelist(nomeEntrada, nodetype=int, comments='%')
        # e acha o maior componente
        maiorComponente = max(nx.connected_component_subgraphs(G), key=len)

        distribuiçãoDeGraus = list(map(lambda t: t[1], G.degree()))
        histograma = nx.degree_histogram(G)
        n_nós = len(G)
        probabilidadeGraus = list(map(lambda x: x / n_nós, histograma))
        grausAcumulados = np.flipud(np.cumsum(np.flipud(probabilidadeGraus)))

        # distribuição lei de potência
        leiPotência = powerlaw.Fit(distribuiçãoDeGraus, fit_method='KS')
        printa('Lei de potência - Alpha:', leiPotência.power_law.alpha,
               '\txmin:', leiPotência.power_law.xmin)

        # cálculo de medidas globais
        printa('Grau médio:', sum(distribuiçãoDeGraus) / n_nós)
        printa(
            'Segundo momento da distribuição do grau:',
            reduce(lambda acc, ki: acc + ki**2, distribuiçãoDeGraus, 0) /
            n_nós)
        printa(
            'Entropia de Shannon:',
            -reduce(lambda acc, prob: acc + prob * math.log(prob),
                    grausAcumulados))
        printa('Média do coeficiente de aglomeração local:',
               nx.average_clustering(G))
        printa('Coeficiente de aglomeração pela fórmula da transitividade:',
               nx.transitivity(G))
        printa('Média dos menores caminhos:',
               nx.average_shortest_path_length(maiorComponente))
        printa('Eficiência:', eficiência(G))
        printa('Diâmetro:', nx.diameter(maiorComponente))

        # cálculo de clustering
        distribuiçãoAglomeração = list(nx.clustering(G).values())
        pirso = np.array(
            list(
                filter(lambda x: x[0] != 0 and x[1] != 0,
                       zip(distribuiçãoDeGraus, distribuiçãoAglomeração))))
        printa('Correlação de Pearson de k(i) X cc(i):',
               stats.pearsonr(np.log10(pirso[:, 0]), np.log10(pirso[:, 1]))[0])

    ##  Plots  ##
    # plot da distribuição do grau
    plt.figure('Distribuição do grau')
    plt.clf()
    plt.plot(probabilidadeGraus, 'r-', label='Probabilidade')
    plt.plot(grausAcumulados, 'b-', label='Prob. acumulada complementar')
    plt.title('Distribuição do grau')
    plt.yscale('log')
    plt.xscale('log')
    plt.legend(loc='lower left')
    plt.savefig(pastaSaída + '/dist-grau.png')
    # plot do k(i) vs cc(i)
    plt.figure('k(i) X cc(i)')
    plt.clf()
    plt.plot(distribuiçãoDeGraus, distribuiçãoAglomeração, 'bo')
    plt.title('Distribuição de grau X coeficiente de aglomeração')
    plt.xlabel('k(i)')
    plt.ylabel('cc(i)')
    plt.yscale('log')
    plt.xscale('log')
    plt.savefig(pastaSaída + '/kXcc.png')
    # plot do coeficiente de aglomeração acumulado
    plt.figure('Coeficiente de aglomeração')
    plt.clf()
    plt.hist(distribuiçãoAglomeração,
             bins=100,
             histtype='step',
             normed=True,
             cumulative=True)
    plt.xlabel('cc')
    plt.ylabel('P (X < x)')
    plt.title(
        'Distribuição de probabilidade acumulada do coeficiente de aglomeração local'
    )
    plt.savefig(pastaSaída + '/aglomeração.png')
Ejemplo n.º 51
0
def get_clustering_values(gcc, ego) -> tuple:
    clustering_coefficient = nx.clustering(gcc, ego)
    avg_clustering_coefficient = nx.average_clustering(gcc)
    transitivity = nx.transitivity(gcc)

    return clustering_coefficient, avg_clustering_coefficient, transitivity
Ejemplo n.º 52
0
def get_small_worldness(filename):
    import networkx as nx
    threshold = 0
    f = open(filename[:-4] + '_small_worldness.dat', 'w')
    for i in range(0, 101):
        threshold = float(i) / 100
        G = get_threshold_matrix(filename, threshold)
        ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))

        cluster = nx.average_clustering(G)
        ER_cluster = nx.average_clustering(ER_graph)

        transi = nx.transitivity(G)
        ER_transi = nx.transitivity(ER_graph)

        print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' % (
            threshold, cluster, ER_cluster, transi, ER_transi)

        f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster))
        components = nx.connected_component_subgraphs(G)
        ER_components = nx.connected_component_subgraphs(ER_graph)

        values = []
        ER_values = []
        for i in range(len(components)):
            if nx.number_of_nodes(components[i]) > 1:
                values.append(nx.average_shortest_path_length(components[i]))
        for i in range(len(ER_components)):
            if nx.number_of_nodes(ER_components[i]) > 1:
                ER_values.append(
                    nx.average_shortest_path_length(ER_components[i]))
        if len(values) == 0:
            f.write("\t0.")
        else:
            f.write("\t%f" % (sum(values) / len(values)))

        if len(ER_values) == 0:
            f.write("\t0.")
        else:
            f.write("\t%f" % (sum(ER_values) / len(ER_values)))

        f.write("\t%f\t%f" % (transi, ER_transi))

        if (ER_cluster * sum(values) * len(values) * sum(ER_values) *
                len(ER_values)) > 0:
            S_WS = (cluster / ER_cluster) / ((sum(values) / len(values)) /
                                             (sum(ER_values) / len(ER_values)))
        else:
            S_WS = 0.
        if (ER_transi * sum(values) * len(values) * sum(ER_values) *
                len(ER_values)) > 0:
            S_Delta = (transi / ER_transi) / (
                (sum(values) / len(values)) /
                (sum(ER_values) / len(ER_values)))
        else:
            S_Delta = 0.

        f.write("\t%f\t%f" % (S_WS, S_Delta))
        f.write("\n")

    f.close()
    print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
Ejemplo n.º 53
0
def answer_fourteen():
    G = answer_thirteen()
    return nx.transitivity(G), nx.average_clustering(G)
Ejemplo n.º 54
0
G.add_edge("B", "D")

# report
print(nx.info(G))  # reports basic info about network
print("########### nodes #############")
# print(G.number_of_nodes())
print(G.nodes())
print("########### edges #############")
# print(G.number_of_edges())
print(G.edges())

# report adjacencies
print("########### adjacenies ########")
for node in G.nodes:
    print(
        node + ": " + str(list(G.adj[node]))
    )  # it's good to use list function before printing # list function turns native nx dictionary into more human-readable list
print(list(G["A"]))  # list form
# print(G["jus"]) # dictionary form

# density
density = nx.density(G)
print("########### density ###########")
print("Network density:", density)

# triads
print("########### triads ############")
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

# I don't know how transitivity is calculated here.
Ejemplo n.º 55
0
def omega(G, niter=100, nrand=10, seed=None):
    """Returns the small-world coefficient (omega) of a graph

    The small-world coefficient of a graph G is:

    omega = Lr/L - C/Cl

    where C and L are respectively the average clustering coefficient and
    average shortest path length of G. Lr is the average shortest path length
    of an equivalent random graph and Cl is the average clustering coefficient
    of an equivalent lattice graph.

    The small-world coefficient (omega) ranges between -1 and 1. Values close
    to 0 means the G features small-world characteristics. Values close to -1
    means G has a lattice shape whereas values close to 1 means G is a random
    graph.

    Parameters
    ----------
    G : NetworkX graph
        An undirected graph.

    niter: integer (optional, default=100)
        Approximate number of rewiring per edge to compute the equivalent
        random graph.

    nrand: integer (optional, default=10)
        Number of random graphs generated to compute the average clustering
        coefficient (Cr) and average shortest path length (Lr).

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.


    Returns
    -------
    omega : float
        The small-work coefficient (omega)

    Notes
    -----
    The implementation is adapted from the algorithm by Telesford et al. [1]_.

    References
    ----------
    .. [1] Telesford, Joyce, Hayasaka, Burdette, and Laurienti (2011).
           "The Ubiquity of Small-World Networks".
           Brain Connectivity. 1 (0038): 367-75.  PMC 3604768. PMID 22432451.
           doi:10.1089/brain.2011.0038.
    """
    import numpy as np

    # Compute the mean clustering coefficient and average shortest path length
    # for an equivalent random graph
    randMetrics = {"C": [], "L": []}
    for i in range(nrand):
        Gr = random_reference(G, niter=niter, seed=seed)
        Gl = lattice_reference(G, niter=niter, seed=seed)
        randMetrics["C"].append(nx.transitivity(Gl))
        randMetrics["L"].append(nx.average_shortest_path_length(Gr))

    C = nx.transitivity(G)
    L = nx.average_shortest_path_length(G)
    Cl = np.mean(randMetrics["C"])
    Lr = np.mean(randMetrics["L"])

    omega = (Lr / L) - (C / Cl)

    return omega
Ejemplo n.º 56
0
from scipy import stats
from operator import mul  # or mul=lambda x,y:x*y
from fractions import Fraction
import sys


# Calculates binomial coefficient (n over k)
def nCk(n, k):
    return int(reduce(mul, (Fraction(n - i, i + 1) for i in range(k)), 1))


# Read the network in form of edge list, unweighted and undirected
net = nx.read_edgelist(sys.argv[1], nodetype=int)

# calculate the transitivity of the network
C = nx.transitivity(net)
# Make dictionary nodeID:degree
d = dict(nx.degree(net))

# The branching is calculated as P2/P1
# The intermodular connectivity as P3/P2
suma1 = 0
P2 = 0
for key in d:
    suma1 += int(d[key])
    P2 += nCk(int(d[key]), 2)
P1 = suma1 * 0.5
C3 = C * P2 / 3.0
suma = 0
for u, v in net.edges():
    suma = suma + (d[u] - 1) * (d[v] - 1)
Ejemplo n.º 57
0
 def test_path(self):
     G = nx.path_graph(10)
     assert nx.transitivity(G) == 0.0
Ejemplo n.º 58
0
def transitivity_score(A, beta):
    G = nx.from_numpy_matrix(A)
    return nx.transitivity(G)
Ejemplo n.º 59
0
 def test_k5(self):
     G = nx.complete_graph(5)
     assert nx.transitivity(G) == 1.0
     G.remove_edge(1, 2)
     assert nx.transitivity(G) == 0.875
Ejemplo n.º 60
0
def createGraph(selfGraph):
    """
        Create graph, network entity with networkx library from selfGraph (a dictionnary).
        Then look at his attributes, from a point of view of network theory
    """
    #(0) Create network (empty)
    G = nx.Graph()

    #(1) Add Nodes
    G.add_nodes_from(list(selfGraph.keys()))

    #(2)) Add edges with weight specified directly
    #First, build edge set:
    edgesList=[]
    for node1 in selfGraph.keys():
        for node2 in selfGraph[node1][1]:
            weight12=selfGraph[node1][1][node2]#weight of the edge, relatedness of 2 nodes
            if not [node2, node1, weight12] in edgesList: #as symmetric edges
                edgesList.append([node1, node2, weight12])
    G.add_weighted_edges_from(edgesList)    #G.add_edges_from()   #G.add_edge(2, 3, weight=0.9)

    #(3) Add Attributes Nodes:  attribute data to be in the form dictionary: keys: nodes name, values: attributes.
    #NB: can have different type attributes: nx.set_node_attributes(G, att_dic, 'name_att')
    weightNode={w: selfGraph[w][0] for w in selfGraph.keys()}
    nx.set_node_attributes(G, weightNode, 'relevancy')   #To access them: G.nodes[node]['relevancy']

    #(4) Add Attributes Edges. Dont need for now, as already added the weight in the edges. But could add other attributes here.
    #nx.set_edge_attributes(G, weightEdge, 'relatedness')

    #(5) Look at properties related to self Graph
    descriptionSelf=nx.info(G) + "\n"
    descriptionSelf+='Density of Self: {}'.format(nx.density(G)) + "\n"
    descriptionSelf+='Am I connected Connected? '+ str(nx.is_connected(G)) + "\n"
    components = nx.connected_components(G)
    descriptionSelf+='I have {} connected components'.format(nx.number_connected_components(G)) + "\n"
    largest_component = max(components, key=len)
    subSelf = G.subgraph(largest_component) # Create a "subgraph" of just the largest component
    diameter = nx.diameter(subSelf)
    descriptionSelf+='The diameter of my largest Connected Component is:'+ str(diameter)  + "\n"
    #Transitivity, like density, expresses how interconnected a graph is in terms of a ratio of actual over possible connections.
    #Transitivity is the ratio of all triangles over all possible triangles.
    descriptionSelf+="My transitivity coefficient is"+ str(nx.transitivity(G))  + "\n"
    #Centrality node: Find which nodes are the most important ones in your network.
    degree_dict = dict(G.degree(G.nodes())) #degree is connectivity of each node: how many egde
    nx.set_node_attributes(G, degree_dict, 'degree') #First add degree each nodes as extra attribute
    sorted_degree = sorted(degree_dict.items(), key=operator.itemgetter(1), reverse=True) #sort this degree list
    #print(sorted_degree[:3])
    descriptionSelf+= "The three bigger hubs in me are: " + ', '.join(elt[0] for elt in sorted_degree[:3]) + "\n"
    #Other centralities than just hubs:
    #EIgenvector Centrality is a kind of extension of degree—it looks at a combination of a node’s edges and the edges of that node’s neighbors.
    #Eigenvector centrality cares if you are a hub, but it also cares how many hubs you are connected to. Like second order connectivity
    #Betweenness centrality looks at all the shortest paths that pass through a particular node (see above).
    betweenness_dict = nx.betweenness_centrality(G)
    #eigenvector_dict = nx.eigenvector_centrality(G) #too computationally heavy for VB?
    nx.set_node_attributes(G, betweenness_dict, 'betweenness')     # Assign each to an attribute in your network
    #nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')
    sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True)
    descriptionSelf+="Three most central concepts in me are:"+ ' , '.join(elt[0] for elt in sorted_betweenness[:3])+ "\n"
    #Could add other properties>>>
    #Community detection within Self: with modularity, different clusterm Clustered Self etc. >>>
    print(descriptionSelf)

    return G, descriptionSelf