Ejemplo n.º 1
0
def can_combine_cluster2(cl1, cl2):
    combine = False
    temp_graph1 = G.subgraph(cl1)
    temp_graph2 = G.subgraph(cl2)
    temp_graph_all = G.subgraph(cl1 + cl2)
    
    if len(cl1) >= len(cl2):
        common_elements = list(set(cl1).intersection(set(cl2)))
        if len(common_elements) > 0.8*len(cl2):
            combine = True
        #print common_elements
    else:
        common_elements = list(set(cl2).intersection(set(cl1)))
        if len(common_elements) > 0.8*len(cl1): 
            combine = True


    clustering_coeff_1   = nx.average_clustering(temp_graph1)
    clustering_coeff_2   = nx.average_clustering(temp_graph2)
    clustering_coeff_all = nx.average_clustering(temp_graph_all)
    #print cl1
    #print cl2
    #print (str)(clustering_coeff_1) + " " + (str)(clustering_coeff_2) +" "+ (str)(clustering_coeff_all)
    #print " "
    
    if combine:
        if (clustering_coeff_all >= .8*clustering_coeff_1) and (clustering_coeff_all >= 0.8*clustering_coeff_2):
            return True
    else:
        if (clustering_coeff_all >= clustering_coeff_1) and (clustering_coeff_all >= clustering_coeff_2):
            return True
    return False 
def compare_graphs(graph):
    n = nx.number_of_nodes(graph)
    m = nx.number_of_edges(graph)
    k = np.mean(list(nx.degree(graph).values()))
    erdos = nx.erdos_renyi_graph(n, p=m/float(n*(n-1)/2))
    barabasi = nx.barabasi_albert_graph(n, m=int(k)-7)
    small_world = nx.watts_strogatz_graph(n, int(k), p=0.04)
    print(' ')
    print('Compare the number of edges')
    print(' ')
    print('My network: ' + str(nx.number_of_edges(graph)))
    print('Erdos: ' + str(nx.number_of_edges(erdos)))
    print('Barabasi: ' + str(nx.number_of_edges(barabasi)))
    print('SW: ' + str(nx.number_of_edges(small_world)))
    print(' ')
    print('Compare average clustering coefficients')
    print(' ')
    print('My network: ' + str(nx.average_clustering(graph)))
    print('Erdos: ' + str(nx.average_clustering(erdos)))
    print('Barabasi: ' + str(nx.average_clustering(barabasi)))
    print('SW: ' + str(nx.average_clustering(small_world)))
    print(' ')
    print('Compare average path length')
    print(' ')
    print('My network: ' + str(nx.average_shortest_path_length(graph)))
    print('Erdos: ' + str(nx.average_shortest_path_length(erdos)))
    print('Barabasi: ' + str(nx.average_shortest_path_length(barabasi)))
    print('SW: ' + str(nx.average_shortest_path_length(small_world)))
    print(' ')
    print('Compare graph diameter')
    print(' ')
    print('My network: ' + str(nx.diameter(graph)))
    print('Erdos: ' + str(nx.diameter(erdos)))
    print('Barabasi: ' + str(nx.diameter(barabasi)))
    print('SW: ' + str(nx.diameter(small_world)))
Ejemplo n.º 3
0
def MvsD(A, Au, M, D):
    """docstring for MvsD"""
    #Calculate the number of nodes
    print("Number of nodes in A  : " + str(len(A.nodes())))
    print("Number of nodes in Au : " + str(len(Au.nodes())))
    #Calculate the number of links
    print("Number of links in A  : " + str(len(A.edges())))
    print("Number of links in Au : " + str(len(Au.edges())))
    t = nx.average_clustering(Au)
    print("network clustering coefficient for Au : " + str(t))
    print("")
    #Calculate the number of nodes
    print("Number of nodes in M  : " + str(len(M.nodes())))
    print("Number of nodes in D : " + str(len(D.nodes())))
    t = nx.average_clustering(M)
    print("network clustering coefficient for M  : " + str(t))
    t = nx.average_clustering(D)
    print("network clustering coefficient for D : " + str(t))
    
    
    MavgD = float(sum(M.degree().values()))/float(len(M.nodes()))
    print("Connectivity M : " + str(MavgD))
    DavgD = float(sum(D.degree().values()))/float(len(D.nodes()))
    print("Connectivity D : " + str(DavgD))
    
    pass
Ejemplo n.º 4
0
def t_t_cc(path=r"d:\data\9.txt"):
    rstr = ''
    g = nx.Graph()
    g = read_file_txt(g, path)
    w = [14,13,12,6]
    print nx.average_clustering(g)
    for each in w:
        R=gRa(g,each)
        pg=r_perturbR(g, R)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    try:
        path=path.replace('9','9_cc')
        f=open(path, 'w')
    except:
        print "int Create File error"

    p = np.array(w)/14.0
    for each in p:
        pg=r_perturbS(g, each)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    f.write(rstr)
    f.close()
Ejemplo n.º 5
0
def Type2AlmostCompleteGraph(n, m):
    if (BinomialCoefficient(n - 2, 2) + 4 <= m) and (m <= BinomialCoefficient(n - 1, 2) + 1):
        first_candidate = nx.complete_graph(n - 2)
        remaining_edges = m - BinomialCoefficient(n - 2, 2)
        first_candidate.add_edge(n - 2, 0)
        first_candidate.add_edge(n - 2, 1)
        for vertex_index in range(remaining_edges - 2):
            first_candidate.add_edge(n - 1, vertex_index)
        first_coefficient = nx.average_clustering(first_candidate)

        second_candidate = nx.complete_graph(n - 2)
        second_candidate.add_edge(n - 2, n - 1)
        remaining_edges = m - BinomialCoefficient(n - 2, 2) - 1
        number_of_common_neighbors = remaining_edges / 2
        for vertex_index in range(number_of_common_neighbors):
            second_candidate.add_edge(vertex_index, n - 2)
            second_candidate.add_edge(vertex_index, n - 1)
        if (remaining_edges - 2 * number_of_common_neighbors) == 1:
            second_candidate.add_edge(vertex_index + 1, n - 2)
        second_coefficient = nx.average_clustering(second_candidate)

        if first_coefficient > second_coefficient:
            G = first_candidate.copy()
        else:
            G = second_candidate.copy()
        return G
Ejemplo n.º 6
0
def can_combine_cluster(cl1, cl2):
    global G
    cl1_int = []
    cl2_int = []
    for string in cl1:
        cl1_int.append(int(string))
    for string in cl2:
        cl2_int.append(int(string))

    temp_graph1 = G.subgraph(cl1_int)
    temp_graph2 = G.subgraph(cl2_int)
    temp_graph_all = G.subgraph(cl1_int + cl2_int)

    clustering_coeff_1 = nx.average_clustering(temp_graph1)
    clustering_coeff_2 = nx.average_clustering(temp_graph2)
    clustering_coeff_all = nx.average_clustering(temp_graph_all)
    # print (str)(clustering_coeff_1) + " " + (str)(clustering_coeff_2) +" "+ (str)(clustering_coeff_all)

    if clustering_coeff_1 == 1:
        clustering_coeff_1 = 0.96

    if clustering_coeff_2 == 1:
        clustering_coeff_2 = 0.96

    if (clustering_coeff_1 == 0) and (clustering_coeff_2 == 0):
        return False

    fraction = 0.95
    if (clustering_coeff_all > fraction * clustering_coeff_1) and (
        clustering_coeff_all > fraction * clustering_coeff_2
    ):
        # print "combine"
        return True
    return False
Ejemplo n.º 7
0
def t_GrQc_cc(path=r"d:\data\CA-GrQc.txt"):
    rstr = ''
    g = nx.Graph()
    g = read_file_txt(g, path)
    w = [14496,13454,12394,9782]
    for each in w:
        R=gRa(g,each)
        pg=r_perturbR(g, R)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    try:
        path=path.replace('GrQc','GrQc_cc')
        f=open(path, 'w')
    except:
        print "int readFileTxt open error"

    p = np.array(w)/14496.0
    for each in p:
        pg=r_perturbS(g, each)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    f.write(rstr)
    f.close()
Ejemplo n.º 8
0
def algorithm(w1,w2,w3,w4,G1,G2,G3,G4):
	try:
		cc=np.array([nx.average_clustering(G1,weight='weight'),nx.average_clustering(G2,weight='weight'),nx.average_clustering(G3,weight='weight'),nx.average_clustering(G4,weight='weight')])
		spl=np.array([nx.average_shortest_path_length(G1,weight='weight'),nx.average_shortest_path_length(G2,weight='weight'),nx.average_shortest_path_length(G3,weight='weight'),nx.average_shortest_path_length(G4,weight='weight')])
		nds=np.array([nx.number_of_nodes(G1),nx.number_of_nodes(G2),nx.number_of_nodes(G3),nx.number_of_nodes(G4)])
		edgs= np.array([nx.number_of_edges(G1),nx.number_of_edges(G2),nx.number_of_edges(G3),nx.number_of_edges(G4)])
		if valid(cc):
			cc=stats.zscore(cc)
		else:
			cc=np.array([.1,.1,.1,.1])
		cc= cc-min(cc)+.1
		if valid(spl):
			spl=stats.zscore(spl)
		else:
			spl=np.array([.1,.1,.1,.1])
		spl= spl-min(spl)+.1
		if valid(nds):
			nds=stats.zscore(nds)
		else:
			nds=np.array([.1,.1,.1,.1])
		nds = nds-min(nds)+.1
		if valid(edgs):
			edgs=stats.zscore(edgs)
		else:
			edgs=np.array([.1,.1,.1,.1])
		edgs=edgs-min(edgs)+.1
		r1=(w1*cc[0]+w2*spl[0]+w3*nds[0]+w4*edgs[0])*1000
		r2=(w1*cc[1]+w2*spl[1]+w3*nds[1]+w4*edgs[1])*1000
		r3=(w1*cc[2]+w2*spl[2]+w3*nds[2]+w4*edgs[2])*1000
		r4=(w1*cc[3]+w2*spl[3]+w3*nds[3]+w4*edgs[3])*1000
		d={'Player 1:': r1, 'Player 2:': r2,'Player 3:': r3, 'Player 4:': r4}
		rank = sorted(d.items(), key=lambda x: x[1], reverse=True)
		return ["USAU RANKINGS",str(rank[0][0])+ " " + str(int(rank[0][1])),str(rank[1][0])+" "+ str(int(rank[1][1])),str(rank[2][0])+" "+ str(int(rank[2][1])),str(rank[3][0])+" "+str(int(rank[3][1]))]
	except:
		return ["Unable to compute rankings!  Need data","Player 1","Player 2","Player 3","Player 4"]
def getCoherenceMeasure(essay):
	graph = makeWordGraph(essay)
	# obtain clustering coefficient			
	clustCoeffList=nx.clustering(graph)
	#####
	print getScore(clustCoeffList,graph)
	print nx.average_clustering(graph)
Ejemplo n.º 10
0
def t_facebook_cc(path=r"d:\data\facebook1.txt"):
    rstr = ''
    g = nx.Graph()
    g = read_file_txt(g, path)
    w = [1945, 1294, 860, 643]
    for each in w:
        R=gRa(g,each)
        pg=r_perturbR(g, R)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    try:
        path=path.replace('book1','book1_cc')
        f=open(path, 'w')
    except:
        print "int readFileTxt open error"

    p = np.array(w)/4813.0
    for each in p:
        pg=r_perturbS(g, each)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    f.write(rstr)
    f.close()
Ejemplo n.º 11
0
def main():
    tempo_dir = "../corpus-local/tempo-txt"
    file_regex = ".*\.txt"

    G = build_graph(tempo_dir, file_regex)
    """
  ccs = nx.clustering(G)
  avg_clust = sum(ccs.values()) / len(ccs)
  """
    print tempo_dir
    print "\tAda " + str(len(G.nodes())) + " node."
    print "\tAda " + str(len(G.edges())) + " edge."
    print "\tClustering coefficient      : " + str(nx.average_clustering(G))
    print "\tAverage shortest path length"
    for g in nx.connected_component_subgraphs(G):
        print "\t\t" + str(nx.average_shortest_path_length(g))

    kompas_dir = "../corpus-local/kompas-txt"
    G = build_graph(kompas_dir, file_regex)
    print kompas_dir
    print "\tAda " + str(len(G.nodes())) + " node."
    print "\tAda " + str(len(G.edges())) + " edge."
    print "\tClustering coefficient      : " + str(nx.average_clustering(G))
    print "\tAverage shortest path length"
    for g in nx.connected_component_subgraphs(G):
        print "\t\t" + str(nx.average_shortest_path_length(g))
Ejemplo n.º 12
0
def t_Gnutella_cc(path=r"d:\data\p2p-Gnutella08.txt"):
    rstr = ''
    g = nx.Graph()
    g = read_file_txt(g, path)
    w = [20777,18700,17995,17023]
    for each in w:
        R=gRa(g,each)
        pg=r_perturbR(g, R)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    try:
        path=path.replace('p2p-Gnutella','GrQcp2p-Gnutella_cc')
        f=open(path, 'w')
    except:
        print "int Create File error"

    p = np.array(w)/20777.0
    for each in p:
        pg=r_perturbS(g, each)
        rstr=rstr+'{0:8},{1:10.4}'.format(each,nx.average_clustering(pg))
        rstr=rstr+'\n'

    f.write(rstr)
    f.close()
Ejemplo n.º 13
0
def check_and_merge_clusters(index):
    global clusters
    global G
        
    given_cluster = []
    total_clusters = len(clusters)
    cluster_coeff_all = [0]*total_clusters
    cluster_coeff_temp = [0]*total_clusters
    for string in clusters[index]:
        given_cluster.append(int(string))
    given_graph = G.subgraph(given_cluster)
    clustering_coeff_given   = nx.average_clustering(given_graph)
    
    temp_index = 0
    while temp_index < total_clusters:
        temp_cluster = []
        for string in clusters[temp_index]:
            temp_cluster.append(int(string))
        temp_graph = G.subgraph(temp_cluster)
        temp_graph_all = G.subgraph(temp_cluster + given_cluster)

        clustering_coeff_all = nx.average_clustering(temp_graph_all)
        clustering_coeff_temp = nx.average_clustering(temp_graph)
        cluster_coeff_all[temp_index] = clustering_coeff_all
        cluster_coeff_temp[temp_index] = clustering_coeff_temp        
        temp_index = temp_index + 1

    # Find the index with highest coefficient and combine them
    max_index = cluster_coeff_all.index(max(cluster_coeff_all))
    if clustering_coeff_given > .94:
        clustering_coeff_given = 0.94
    if cluster_coeff_temp[max_index] > .94:
        cluster_coeff_temp[max_index] =0.94
    if (cluster_coeff_all[max_index] >= .95*clustering_coeff_given) and (cluster_coeff_all[max_index] >= .95*cluster_coeff_temp[max_index]):
        combine_cluster(index, max_index)
Ejemplo n.º 14
0
def test_clustering(size):
    print("Barabasi-Albert:")
    ba = networkx.barabasi_albert_graph(1000, 4)
    print("Clustering: ", networkx.average_clustering(ba))
    print("Average length: ", networkx.average_shortest_path_length(ba))
    print("Watts-Strogatz:")
    ws = networkx.watts_strogatz_graph(size, 4, 0.001)
    print("Clustering: ", networkx.average_clustering(ws))
    print("Average length: ", networkx.average_shortest_path_length(ws))
Ejemplo n.º 15
0
def gen_graph_stats (graph):
	G = nx.read_graphml(graph)
	stats = {}

	edges, nodes = 0,0
	for e in G.edges_iter(): edges += 1
	for n in G.nodes_iter(): nodes += 1
	stats['Edges'] = (edges,'The number of edges within the Graph')
	stats['Nodes'] = (nodes, 'The number of nodes within the Graph')
	print "%i edges, %i nodes" % (edges, nodes)


	# Accessing the highest degree node
	center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0]
	stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree')


	hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0])
	print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball)
	stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '')
	# print "Center: %s" % G[center]

	# print "Shortest Path to Center: %s" % p


	print "Degree: %0.5f" % degree
	stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.')

	print "Order: %i" % G.number_of_nodes()
	stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.')

	print "Size: %i" % G.number_of_edges()
	stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.')

	print "Clustering: %0.5f" % nx.average_clustering(G)
	stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.')

	print "Transitivity: %0.5f" % nx.transitivity(G)
	stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.')

	part = community.best_partition(G)
	# values = [part.get(node) for node in G.nodes()]

	# nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
	# plt.show()

	mod = community.modularity(part,G)
	print "modularity: %0.5f" % mod
	stats['Modularity'] = (mod,'The modularity of a partition of a graph.')

	knn = nx.k_nearest_neighbors(G)
	print knn
	stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre')


	return G, stats
Ejemplo n.º 16
0
def get_average_cluster_coefficient(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_average_cc.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    print 'threshold: %f, average cluster coefficient: %f' %(threshold, nx.average_clustering(G))
    f.write("%f\t%f\n" % (threshold, nx.average_clustering(G)))
  f.close()
Ejemplo n.º 17
0
def get_small_worldness(filename):
  import networkx as nx
  threshold = 0
  f = open(filename[:-4]+'_small_worldness.dat','w')
  for i in range(0,101):
    threshold = float(i)/100
    G = get_threshold_matrix(filename, threshold)
    ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))

    cluster = nx.average_clustering(G)
    ER_cluster = nx.average_clustering(ER_graph)
    
    transi = nx.transitivity(G)
    ER_transi = nx.transitivity(ER_graph)

    print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi)

    f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster))
    components = nx.connected_component_subgraphs(G)
    ER_components = nx.connected_component_subgraphs(ER_graph)

    values = []
    ER_values = []
    for i in range(len(components)):
      if nx.number_of_nodes(components[i]) > 1:
        values.append(nx.average_shortest_path_length(components[i]))
    for i in range(len(ER_components)):
      if nx.number_of_nodes(ER_components[i]) > 1:
        ER_values.append(nx.average_shortest_path_length(ER_components[i]))
    if len(values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(values)/len(values)))

    if len(ER_values) == 0:
      f.write("\t0.")
    else:
      f.write("\t%f" % (sum(ER_values)/len(ER_values)))
    
    f.write("\t%f\t%f" % (transi, ER_transi))  
    
    if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_WS = 0.
    if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
      S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
    else:
      S_Delta = 0.
    
    f.write("\t%f\t%f" % (S_WS, S_Delta))  
    f.write("\n")
    
  f.close()  
  print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity" 
Ejemplo n.º 18
0
def test_random_reference():
    G = nx.connected_watts_strogatz_graph(50, 6, 0.1, seed=rng)
    Gr = random_reference(G, niter=1, seed=rng)
    C = nx.average_clustering(G)
    Cr = nx.average_clustering(Gr)
    assert_true(C > Cr)

    assert_raises(nx.NetworkXError, random_reference, nx.Graph())
    assert_raises(nx.NetworkXNotImplemented, random_reference, nx.DiGraph())

    H = nx.Graph(((0, 1), (2, 3)))
    Hl = random_reference(H, niter=1, seed=rng)
Ejemplo n.º 19
0
def get_small_worldness(G, thr):
	f = open(out_prfx + 'small_worldness.dat', 'a')
	g = open(out_prfx + 'cc_trans_ER.dat', 'a')
	#g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n')
	ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G))
	# erdos-renyi, binomial random graph generator ...(N,D:density)	
	cluster = nx.average_clustering(G)   # clustering coef. of whole network
	ER_cluster = nx.average_clustering(ER_graph)	#cc of random graph
	
	transi = nx.transitivity(G)
	ER_transi = nx.transitivity(ER_graph)

	g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi ))
	
	f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster))
	components = nx.connected_component_subgraphs(G)
	ER_components = nx.connected_component_subgraphs(ER_graph)

	values = []
	ER_values = []
	for i in range(len(components)):
		if nx.number_of_nodes(components[i]) > 1:
			values.append(nx.average_shortest_path_length(components[i]))
	for i in range(len(ER_components)):
		if nx.number_of_nodes(ER_components[i]) > 1:
			ER_values.append(nx.average_shortest_path_length(ER_components[i]))
	if len(values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(values)/len(values))) # pathlenght

	if len(ER_values) == 0:
		f.write("\t0.")
	else:
		f.write("\t%f" % (sum(ER_values)/len(ER_values)))

	f.write("\t%f\t%f" % (transi, ER_transi))  

	if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))  
	else:
		S_WS = 0.
	if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 :
		S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values)))
	else:
		S_Delta = 0.

	f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness 
	f.write("\n")

	f.close() 
	g.close()	 
def run(G, cut_pct, iterations=10):
  print nx.average_clustering(G)
  nodes = G.nodes()
  edges = G.edges()
  for i in range(iterations):
    np.random.shuffle(nodes)
    cut_count = int(cut_pct*len(nodes))
    selected_nodes = nodes[0:-cut_count]
    not_selected_nodes = set(nodes)-set(selected_nodes)
    not_selected_edges = G.subgraph(not_selected_nodes).edges()
    H = G.subgraph(nodes)
    H.remove_edges_from(not_selected_edges)
    H.remove_nodes_from(list(set(not_selected_nodes)&set(nx.isolates(H))))
    print nx.average_clustering(H)
Ejemplo n.º 21
0
def random_25kc(G0, nswap=1, max_tries=100):
    """
    只判断四个节点及邻居节点度相关的聚类系数
    """
    if nswap>max_tries:
        raise nx.NetworkXError("Number of swaps > number of tries allowed.")
    if len(G0) < 4:
        raise nx.NetworkXError("Graph has less than four nodes.")
    G = copy.deepcopy(G0)
    n=0
    swapcount=0
    while swapcount < nswap:
        (u,x)=random.sample(G.nodes(),2)    
        v=random.choice(list(G[u]))
        y=random.choice(list(G[x]))
        if G.degree(v)!=G.degree(y) or v==y or len([u,v,x,y])<4:
            continue                    # 若节点v,y度不相同,重新选择
        n+=1
        if (y not in G.neighbors(u)) and (v not in G.neighbors(x)) and ((u,v)in G.edges()) and ((x,y) in G.edges()): 
            G.add_edge(u,y)
            G.add_edge(x,v)
            G.remove_edge(u,v)
            G.remove_edge(x,y)
            swapcount+=1
            if not nx.is_connected(G):
                G.add_edge(u,v)
                G.add_edge(x,y)
                G.remove_edge(u,y)
                G.remove_edge(x,v)
                swapcount -= 1
                continue
            l = map(lambda t:(t[1],t[0]), G0.degree([u,v,x,y]+list(G[u])+list(G[v])+list(G[x])+list(G[y])).items())  #(度,节点)组成的列表
            D = dict_degree_nodeslist(l)
            for i in range(len(D)):
                avcG0 = nx.average_clustering(G0, nodes=D.values()[i], weight=None, count_zeros=True)
                avcG = nx.average_clustering(G, nodes=D.values()[i], weight=None, count_zeros=True)
                i += 1
                if avcG0 != avcG:   #若置乱前后度相关的聚类系数不同,则撤销此次置乱操作
                    G.add_edge(u,v)
                    G.add_edge(x,y)
                    G.remove_edge(u,y)
                    G.remove_edge(x,v)
                    swapcount -= 1
                    break    
        if n >= max_tries:
            e=('Maximum number of swap attempts (%s) exceeded '%n +
            'before desired swaps achieved (%s).'%nswap)
            print e
            break
    return G
def compareAvgClusteringCoeff(masterGraph,wordGraph,worksheet,row):
    avgClusteringMaster = nx.average_clustering(masterGraph)
    avgClusteringWord = nx.average_clustering(wordGraph)
    #worksheet.write(row,1,avgClusteringMaster)
    #worksheet.write(row,2,avgClusteringWord)

    result = False    
    if(avgClusteringMaster >= avgClusteringWord):
        result = True
    #worksheet.write(row,3,result)
    
    if result == True:
        return 1
    else:
        return -1
Ejemplo n.º 23
0
def Attributes_of_Graph(G):
    print "*Statistic attributes of graphs:"
    print "N", nx.number_of_nodes(G)
    print "M", nx.number_of_edges(G)

    print "C", nx.average_clustering(G)
    #print "<d>", nx.average_shortest_path_length(G)
    print "r", nx.degree_assortativity_coefficient(G)

    degree_list = list(G.degree_iter())
    max_degree = 0
    min_degree = 0
    avg_degree_1 = 0.0
    avg_degree_2 = 0.0
    for node in degree_list:
        avg_degree_1 = avg_degree_1 + node[1]
        avg_degree_2 = avg_degree_2 + node[1]*node[1]
        if node[1] > max_degree:
            max_degree = node[1]
        if node[1] < min_degree:
            min_degree = node[1]
    #end for
    avg_degree = avg_degree_1/len(degree_list)
    avg_degree_square = (avg_degree_2/len(degree_list)) / (avg_degree*avg_degree)
    print "<k>", avg_degree
    print "k_max", max_degree
    print "H", avg_degree_square
    print "DH", float(max_degree-min_degree)/G.number_of_nodes()
Ejemplo n.º 24
0
def calc_clustering_coefficient(g, dest_file):
    """
    calc_clustering_coefficient(g)
    Calculate & plot clustering coefficient of the graph g and writes data to the created data output file
    :param g:   graph as source
    :return:    ---
    """
    func_intro = "\n\nClustering Co-Efficient ..."
    logging.info(cs_ref, func_intro)
    print func_intro
    with open(dest_file, "a") as dat_file:
        dat_file.write(func_intro)

    cce = nx.clustering(g)  # calculate clustering co-efficient
    with open(dest_file, "a") as dat_file:
        dat_file.write("\n\tClustering Coefficients for nodes in graph = \t" + str(cce))
    average_cce = nx.average_clustering(g)
    with open(dest_file, "a") as dat_file:
        dat_file.write("\n\tAverage Clustering Coefficient for graph = \t" + str(average_cce))

    for edge in g.edges():  # plot clustering co-efficient
        if floor(edge[0] / 5.) != floor(edge[1] / 5.):
            if random.random() < 0.95:
                g.remove_edge(edge[0], edge[1])
    plt.figure(3)
    fixed_pos = {1: (0, 0), 10: (1, 1), 30: (1, 0), 50: (0, 1)}
    pos = nx.spring_layout(g, fixed=fixed_pos.keys(), pos=fixed_pos)
    nx.draw_networkx(g, pos=pos)
    plt.title("Clustering Co-efficient" + src_file)
    plt.savefig("plots/cs1_clustering_coefficient.png")
    plt.show()
Ejemplo n.º 25
0
def analyze_first_level_panels():
    results = {}
    
    for d in first_level_topic_list:
        print "\n*********DESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")"
        G = build_panel_network_by_descriptor(d)
        print "\nDESCRIPTOR: " + first_level_topic_list[d] + "(" + str(d) + ")"
        print "Nodes:", G.number_of_nodes()
        print "Edges:", G.number_of_edges()
        res_clique = analize_cliques(G)
        res_degree = analize_degrees(G)
        res_weight = analize_edges(G)
        d_final = dict(res_clique)
        d_final.update(res_degree)
        d_final.update(res_weight)
        d_final['id'] = d
        d_final['avg_clustering'] = nx.average_clustering(G)
        results[first_level_topic_list[d]] = d_final
        
    print "Writing json..."
    json.dump(results, open('./networks/first_level_panels_analysis.json','w'), indent = 2)
    print "Writing csvs..."
    df = DataFrame(results)
    df.to_csv('./networks/first_level_panels_analysis.csv')
    dfinv = df.transpose()
    dfinv.to_csv('./networks/first_level_panels_analysis_inv.csv')
Ejemplo n.º 26
0
def printStats(filename):
	'''
	Converts json adjacency list into networkx to calculate and print the
	graphs's 
	  - average clustering coefficient
	  - overall clustering coefficient
	  - maximum diameter
	  - average diameter
	  - number of paritions using community.best_parition
	  - modularity of community.best_partition
	'''
	g = makeGraphFromJSON(filename)
	
	print "Average Clustering Coefficient: %f" % nx.average_clustering(g)
	print "Overall Clustering Coefficient: %f" % nx.transitivity(g)
	
	connected_subgraphs = list(nx.connected_component_subgraphs(g))
	largest = max(nx.connected_component_subgraphs(g), key=len)
	print "# Connected Components: %d" % len(connected_subgraphs)
	print "    Maximal Diameter: %d" % nx.diameter(largest)
	print "    Average Diameter: %f" % nx.average_shortest_path_length(largest)

	# Find partition that maximizes modularity using Louvain's algorithm
	part = community.best_partition(g)	
	print "# Paritions: %d" % (max(part.values()) + 1)
	print "Louvain Modularity: %f" % community.modularity(part, g)
Ejemplo n.º 27
0
def ws_calc(path):
    """
    Given a path to a file graph generated by the GMM, calucualte C(p) and L(p)
    """
    G=nx.read_graphml(path)
    file_split=path.split('_')
    return({'p':float(file_split[4]), 'cc':nx.average_clustering(G), 'avg.pl':nx.average_shortest_path_length(G)})
Ejemplo n.º 28
0
    def connected_components(self):
        """
        Returns basic statistics about the connected components of the
        graph. This includes their number, order, size, diameter, radius,
        average clusttering coefficient, transitivity, in addition to basic
        info about the largest and smallest connected components.
        """
        cc_stats = {}
        cc = nx.connected_components(self.graph.structure)

        for index, component in enumerate(cc):
            cc_stats[index] = {}
            this_cc = cc_stats[index]

            this_cc["order"] = len(component)
            this_cc["size"] = len(self.graph.structure.edges(component))

            subgraph = self.graph.structure.subgraph(component)
            this_cc["avg_cluster"] = nx.average_clustering(subgraph)
            this_cc["transitivity"] = nx.transitivity(subgraph)

            eccentricity = nx.eccentricity(subgraph)
            ecc_values = eccentricity.values()
            this_cc["diameter"] = max(ecc_values)
            this_cc["radius"] = min(ecc_values)

        return cc_stats
Ejemplo n.º 29
0
def get_characteristics(G, filename):
  import networkx as nx
  print 'calculating characteristics'
    
  n_nodes = nx.number_of_nodes(G)
  n_edges = nx.number_of_edges(G)
  n_components = nx.number_connected_components(G)
  print 'number of nodes:', n_nodes
  print 'number of edges:', n_edges
  print 'number of components:', n_components
 
  print 'degree histogram'
  check_sum = 0.
  degree_hist = {}
  for node in G:
    if G.degree(node) not in degree_hist:
      degree_hist[G.degree(node)] = 1
    else:
      degree_hist[G.degree(node)] += 1
    
  keys = degree_hist.keys()
  keys.sort()
  for item in keys:
    print item, degree_hist[item]
    check_sum += float(degree_hist[item])/float(n_nodes)
    
  print "check sum: %f" % check_sum
            
  #print 'clustering coefficient'
  print 'clustering coefficient of full network', nx.average_clustering(G)
  return 0
Ejemplo n.º 30
0
def whole_graph_metrics(graph, weighted=False):
    graph_metrics = {}

    # Shortest average path length
    graph_metrics['avg_shortest_path'] = \
        nx.average_shortest_path_length(graph, weight=weighted)

    # Average eccentricity
    ecc_dict = nx.eccentricity(graph)
    graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values()))

    # Average clustering coefficient
    # NOTE: Option to include or exclude zeros
    graph_metrics['avg_ccoeff'] = \
        nx.average_clustering(graph, weight=weighted, count_zeros=True)

    # Average node betweeness
    avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_node_btwn'] = \
        np.mean(np.array(avg_node_btwn_dict.values()))

    # Average edge betweeness
    avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_edge_btwn'] = \
        np.mean(np.array(avg_edge_btwn_dict.values()))

    # Number of isolates
    graph_metrics['isolates'] = len(nx.isolates(graph))

    return graph_metrics
Ejemplo n.º 31
0
    print(nx.in_degree_centrality(G))
    print(nx.out_degree_centrality(G))

# Global clustering coefficient or transitivity of a graph
# Remember to convert to undirected graph
print('\nClustering coefficients of Ego users')

# Getting average clustering for the starting users
starting_users_clustering = []
for users in starting_users:
    x = nx.clustering(G.to_undirected(), users)
    starting_users_clustering.append(x)
    print(x)

print('Average clustering for G is {}'.format(
    nx.average_clustering(G.to_undirected())))
print('\nTransitivity')
print(nx.transitivity(G.to_undirected()))

# Distances: short path between userName1 and userName2
# print(nx.shortest_path(G, userName1, userName2))
# print(nx.shortest_path_length(G, userName1, userName2))

# Diameter and eccentricity are applicable when the graph is strongly connected
# number of strongly/weakly connected components
if nx.is_strongly_connected(G):
    print('G is strongly connected')
    print(sorted(nx.strongly_connected_components(G)))
    print(nx.number_connected_components(G))
    print(
        nx.eccentricity(G)
Ejemplo n.º 32
0
def master(struct_save_name="ProteinDict_ten_thousand",
           edge_type="ligands",
           edge_comm_num=3,
           property="processes",
           graph_filename="Protein-Protein_Graph_Default_Name",
           load_graph=False,
           print_dict_props=False,
           bipart_graph=False,
           bipartite_filename="Bipartite_Default_Name",
           show_plots=False,
           avg_clust=False,
           print_graph_props=False,
           degree_dist=False,
           k_clique=False,
           mod_max=False,
           fluid=False,
           louv=False,
           k_property=20,
           num_k_cliques=7,
           num_fluid_comms=100,
           std_val=-0.5,
           k_clique_opt=False,
           start_k_clique_opt=3,
           end_k_clique_opt=10,
           num_trials_k=3,
           opt_fluid=False,
           start_fluid_comms=100,
           end_fluid_comms=300,
           fluid_step_size=20,
           fluid_num_trials=3):
    Structure_Dict = {}
    Structure_Dict = hf.readDict(struct_save_name, Structure_Dict)

    #Here we print out some helpful information about the dataset we are using
    if print_dict_props == True:
        avg_ligands = hf.get_mean_property(Structure_Dict, "ligands")
        print("Average Number of Ligands:", avg_ligands)

        avg_subunits = hf.get_mean_property(Structure_Dict, "subunits")
        print("Average Number of Subunits:", avg_subunits)

        avg_functions = hf.get_mean_property(Structure_Dict, "functions")
        print("Average Number of Functions:", avg_functions)

        avg_processes = hf.get_mean_property(Structure_Dict, "processes")
        print("Average Number of Processes:", avg_processes)

        # Get Total Number of Ligands, Functions, Proccesses and Subunits
        num_ligands = len(hf.get_all_property(Structure_Dict, "ligands"))
        print("Number of Ligands:", num_ligands)

        num_subunits = len(hf.get_all_property(Structure_Dict, "subunits"))
        print("Number of Subunits:", num_subunits)

        num_functions = len(hf.get_all_property(Structure_Dict, "functions"))
        print("Number of Functions:", num_functions)

        num_processes = len(hf.get_all_property(Structure_Dict, "processes"))
        print("Number of Processes:", num_processes)

    #Here we create a bipartite graph of ligands and proteins, which can be analyzed on its own, or used to
    #to create a projected graph.
    if bipart_graph == True:
        Protein_Bipartite_Graph = nx.Graph()
        struct_name_set = set()
        # Create a bipartite graph in which there are structure nodes and ligand ndoes
        for (struct_name, struct) in Structure_Dict.items():
            struct_name_set.add(struct_name)
            hf.create_Edge(struct, Protein_Bipartite_Graph, property)

        print('Bipartite Nodes:', len(Protein_Bipartite_Graph.nodes()))
        print('Bipartite Edges:', len(Protein_Bipartite_Graph.edges()))
        nx.write_gml(Protein_Bipartite_Graph, bipartite_filename)

    #Here we create a new projected graph
    if load_graph == False:
        # Create a projected graph from the bipartite
        Protein_Graph = hf.create_projected_graph(Structure_Dict,
                                                  edge_comm_num, edge_type)
        # Get the Giant Component of graph
        Protein_Graph_GC = Protein_Graph.subgraph(
            sorted(nx.connected_components(Protein_Graph),
                   key=len,
                   reverse=True)[0])
        nx.write_gml(Protein_Graph, graph_filename)

    #If the garph has already been created, load in the graph to save time
    if load_graph == True:
        Protein_Graph = nx.read_gml(graph_filename)
        Protein_Graph_GC = Protein_Graph.subgraph(
            sorted(nx.connected_components(Protein_Graph),
                   key=len,
                   reverse=True)[0])

    #Print out some useful informatoion about the graph
    if print_graph_props == True:
        print('Protein_Graph Nodes:', len(Protein_Graph.nodes()))
        print('Protein_Graph Edges:', len(Protein_Graph.edges()))
        print('Protein_Graph Num connected Components:',
              nx.number_connected_components(Protein_Graph))
        print('Protein_Graph Num edges in largest Components:',
              len(Protein_Graph_GC.edges()))
        print('Protein_Graph Num nodes in largest Components:',
              len(Protein_Graph_GC.nodes()))

    # K-Clique Implementation
    if k_clique == True:
        print('Begin K_Clique')
        #Create a copy of the graph, which will be used when we lable nodes by community
        k_clique_graph = Protein_Graph_GC.copy()

        #You can use a predetermined k, or optimize the k for the graph
        if k_clique_opt == False:
            k_clique_comms_pre_del = nx.algorithms.community.k_clique_communities(
                Protein_Graph_GC, num_k_cliques)
            k_clique_comms_pre_del = list(list(k_clique_comms_pre_del))
        else:
            k_clique_comms_pre_del = hf.opt_k_clique(Protein_Graph_GC,
                                                     start_k_clique_opt,
                                                     end_k_clique_opt,
                                                     num_trials_k)

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in k_clique_comms_pre_del
                                ]) / len(k_clique_comms_pre_del)

        #Get the graph similiarty score
        K_clique_score_pre_del = hf.score_graph(k_clique_comms_pre_del,
                                                Structure_Dict,
                                                k_property,
                                                property,
                                                already_list=True)
        print(K_clique_score_pre_del, len(k_clique_comms_pre_del),
              avg_comm_pre_del, hf.num_nodes(k_clique_comms_pre_del))

        #Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(k_clique_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "K_Clique_" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        #Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        k_clique_comms = hf.delete_comms(Protein_Graph_GC,
                                         k_clique_comms_pre_del, std_val)

        #Get the graph similiarty score after deletion
        k_clique_score = hf.score_graph(k_clique_comms,
                                        Structure_Dict,
                                        k_property,
                                        property,
                                        already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm)
                        for comm in k_clique_comms]) / len(k_clique_comms)
        print(k_clique_score, len(k_clique_comms), avg_comm,
              hf.num_nodes(k_clique_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(k_clique_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "K_Clique_" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(k_clique_graph,
                               hf.list_to_dict(k_clique_comms_pre_del),
                               "Community")

        #Save the graph with nodes labled by community
        nx.write_gml(
            k_clique_graph, "K_Clique_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End K_Clique')

    # Modularity Maximization Implementation
    if mod_max == True:
        print('Begin Modularity Maximization')
        # Create a copy of the graph, which will be used when we lable nodes by community
        mod_graph = Protein_Graph_GC.copy()

        #Find communities using modularity maximization
        mod_max_comms_pre_del = nx.algorithms.community.modularity_max.greedy_modularity_communities(
            Protein_Graph_GC)
        mod_max_comms_pre_del = list(list(mod_max_comms_pre_del))

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in mod_max_comms_pre_del
                                ]) / len(mod_max_comms_pre_del)

        # Get the graph similiarty score
        mod_max_score_pre_del = hf.score_graph(mod_max_comms_pre_del,
                                               Structure_Dict,
                                               k_property,
                                               property,
                                               already_list=True)
        print(mod_max_score_pre_del, len(mod_max_comms_pre_del),
              avg_comm_pre_del, hf.num_nodes(mod_max_comms_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(mod_max_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Mod_Max" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        mod_max_comms = hf.delete_comms(Protein_Graph_GC,
                                        mod_max_comms_pre_del, std_val)

        # Get the graph similiarty score after deletion
        mod_max_score = hf.score_graph(mod_max_comms,
                                       Structure_Dict,
                                       k_property,
                                       property,
                                       already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm)
                        for comm in mod_max_comms]) / len(mod_max_comms)
        print(mod_max_score, len(mod_max_comms), avg_comm,
              hf.num_nodes(mod_max_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(mod_max_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Mod_Max" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(mod_graph,
                               hf.list_to_dict(mod_max_comms_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            mod_graph, "Mod_Max_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Modularity Maximization')

    # Fluid Implementation
    if fluid == True:
        print('Begin Fluid')
        # Create a copy of the graph, which will be used when we lable nodes by community
        fluid_graph = Protein_Graph_GC.copy()

        # You can use a predetermined number of communities, or optimize the number of communieis for the graph
        if opt_fluid == False:
            fluid_comms_pre_del = nx.algorithms.community.asyn_fluid.asyn_fluidc(
                Protein_Graph_GC, num_fluid_comms)
            fluid_comms_pre_del = list(list(fluid_comms_pre_del))
        else:
            fluid_comms_pre_del = hf.opt_fluid(Protein_Graph_GC,
                                               start_fluid_comms,
                                               end_fluid_comms,
                                               fluid_step_size,
                                               fluid_num_trials)

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in fluid_comms_pre_del
                                ]) / len(fluid_comms_pre_del)

        # Get the graph similiarty score
        fluid_score_pre_del = hf.score_graph(fluid_comms_pre_del,
                                             Structure_Dict,
                                             k_property,
                                             property,
                                             already_list=True)
        print(fluid_score_pre_del, len(fluid_comms_pre_del), avg_comm_pre_del,
              hf.num_nodes(fluid_comms_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(fluid_comms_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Fluid" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in
        # the graph
        fluid_comms = hf.delete_comms(Protein_Graph_GC, fluid_comms_pre_del,
                                      std_val)

        # Get the graph similiarty score after deletion
        fluid_score = hf.score_graph(fluid_comms,
                                     Structure_Dict,
                                     k_property,
                                     property,
                                     already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm) for comm in fluid_comms]) / len(fluid_comms)
        print(fluid_score, len(fluid_comms), avg_comm,
              hf.num_nodes(fluid_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(fluid_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Fluid" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(fluid_graph,
                               hf.list_to_dict(fluid_comms_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            fluid_graph, "Fluid_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Fluid')

    # louvian Implmentation
    if louv == True:
        print('Begin Louvain')
        # Create a copy of the graph, which will be used when we lable nodes by community
        louv_graph = Protein_Graph_GC.copy()

        #Create communities using the louvian
        opt_louv = hf.optimize_louv(Protein_Graph_GC, Structure_Dict, 100, 1,
                                    property, k_property)
        louv_comm_pre_del = hf.Get_Community(opt_louv[0])

        # Get the average size of found communities
        avg_comm_pre_del = sum([len(comm) for comm in louv_comm_pre_del
                                ]) / len(louv_comm_pre_del)

        # Get the graph similiarty score
        louv_score_pre_del = hf.score_graph(louv_comm_pre_del,
                                            Structure_Dict,
                                            k_property,
                                            property,
                                            already_list=True)
        print(louv_score_pre_del, len(louv_comm_pre_del), avg_comm_pre_del,
              hf.num_nodes(louv_comm_pre_del))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(louv_comm_pre_del,
                          Structure_Dict,
                          k_property,
                          property,
                          "Louv" + str(k_property) + "_" + property +
                          "_Pre_Del_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Delete some communities based off there modularity score, and the standard deviation of community scores in the graph
        louv_comms = hf.delete_comms(Protein_Graph_GC, louv_comm_pre_del,
                                     std_val)

        # Get the graph similiarty score after deletion
        louv_score = hf.score_graph(louv_comms,
                                    Structure_Dict,
                                    k_property,
                                    property,
                                    already_list=True)

        # Get the average size of found communities after deleting 'bad' communities
        avg_comm = sum([len(comm) for comm in louv_comms]) / len(louv_comms)
        print(louv_score, len(louv_comms), avg_comm, hf.num_nodes(louv_comms))

        # Create a plot of each community similiarity score vs. a random communities similarity score
        hf.plot_vs_random(louv_comms,
                          Structure_Dict,
                          k_property,
                          property,
                          "Louv" + str(k_property) + "_" + property +
                          "_Comms_" + edge_type + "_edges",
                          show_plots=show_plots)

        # Label nodes by community
        nx.set_node_attributes(louv_graph, hf.list_to_dict(louv_comm_pre_del),
                               "Community")

        # Save the graph with nodes labled by community
        nx.write_gml(
            louv_graph, "Louv_Protein_Protein_" + edge_type +
            "_edges_Network_" + str(k_property) + "_" + property + ".gml")
        print('End Louvain')

    # Create Degree Distribution Plot and print out the expexted degree of the node
    if degree_dist == True:
        x, y, expected_degree = hf.degree_dist(Protein_Graph_GC)
        print("Expected Degree:", expected_degree)
        plt.figure()
        plt.loglog(x, y, 'bo')
        plt.title("Degree distribution")
        plt.xlabel("log(degree values)")
        plt.ylabel("log(degree frequencies)")
        plt.savefig('degree_dist_' + edge_type + '.png')
        plt.show()

    #Find the average clustering coefficient of the graph
    if avg_clust == True:
        average_clustering = nx.average_clustering(Protein_Graph_GC)
        print("Average Clustering Coefficient:", average_clustering)
Ejemplo n.º 33
0
def network_models():
    erdos = []
    watts = []
    barabasi = []
    # generate 30 networks of each model
    print("Generating networks...")
    bar = progressbar.ProgressBar(max_value=30)
    for i in range(30):
        bar.update(i)
        erdos.append(nx.erdos_renyi_graph(500, 0.1))
        watts.append(nx.watts_strogatz_graph(1000, 10, 0.1))
        barabasi.append(nx.barabasi_albert_graph(2000, 10))
    bar.finish()

    # degree distribution (one of each)
    print("Finding degree distributions...")
    dists = {}
    dists["Erdös-Rényi"] = degree_distribution(erdos[0])
    dists["Watts-Strogatz"] = degree_distribution(watts[0])
    dists["Barabási-Albert"] = degree_distribution(barabasi[0])

    # plot
    print("Plotting...")
    sns.set()

    pp.title("Erdös-Rényi - Degree Distribution")
    pp.hist(list(erdos[0].degree().values()), dists["Erdös-Rényi"], color=colors[0])
    pp.ylabel("Frequency")
    pp.xlabel("Degree (k)")
    pp.grid(False)
    pp.savefig('plots/erdos-degree-dist.png')
    pp.clf()

    pp.title("Watts-Strogatz - Degree Distribution")
    pp.hist(list(watts[0].degree().values()), dists["Watts-Strogatz"], color=colors[1])
    pp.ylabel("Frequency")
    pp.xlabel("Degree (k)")
    pp.grid(False)
    pp.savefig('plots/watts-degree-dist.png')
    pp.clf()

    pp.title("Barabási-Albert - Degree Distribution")
    pp.hist(list(barabasi[0].degree().values()), dists["Barabási-Albert"], color=colors[2])
    pp.ylabel("Frequency")
    pp.xlabel("Degree (k)")
    pp.grid(False)
    pp.savefig('plots/barabasi-degree-dist.png')
    pp.clf()
    print("Done plotting.")

    # table
    print("Taking measures...")
    lens = {}
    lens["erdos"] = []
    lens["watts"] = []
    lens["barabasi"] = []

    degrees = {}
    degrees["erdos"] = []
    degrees["watts"] = []
    degrees["barabasi"] = []

    clusterings = {}
    clusterings["erdos"] = []
    clusterings["watts"] = []
    clusterings["barabasi"] = []

    assortativities = {}
    assortativities["erdos"] = []
    assortativities["watts"] = []
    assortativities["barabasi"] = []

    shortest_paths = {}
    shortest_paths["erdos"] = []
    shortest_paths["watts"] = []
    shortest_paths["barabasi"] = []

    entropies = {}
    entropies["erdos"] = []
    entropies["watts"] = []
    entropies["barabasi"] = []

    moments = {}
    moments["erdos"] = []
    moments["watts"] = []
    moments["barabasi"] = []

    print("Calculating Erdös-Rényi measurements...")
    for graph in erdos:
        lens["erdos"].append(len(graph))
        degrees["erdos"].append(average_degree(graph))
        clusterings["erdos"].append(nx.average_clustering(graph))
        assortativities["erdos"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["erdos"].append(nx.average_shortest_path_length(graph))
        entropies["erdos"].append(entropy(graph))
        moments["erdos"].append(stat_moment(graph, 2))

    print("Calculating Watts-Strogatz measurements...")
    for graph in watts:
        lens["watts"].append(len(graph))
        degrees["watts"].append(average_degree(graph))
        clusterings["watts"].append(nx.average_clustering(graph))
        assortativities["watts"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["watts"].append(nx.average_shortest_path_length(graph))
        entropies["watts"].append(entropy(graph))
        moments["watts"].append(stat_moment(graph, 2))

    print("Calculating Barabási-Albert measurements...")
    for graph in barabasi:
        lens["barabasi"].append(len(graph))
        degrees["barabasi"].append(average_degree(graph))
        clusterings["barabasi"].append(nx.average_clustering(graph))
        assortativities["barabasi"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["barabasi"].append(nx.average_shortest_path_length(graph))
        entropies["barabasi"].append(entropy(graph))
        moments["barabasi"].append(stat_moment(graph, 2))

    print("Measurements for Erdös-Rényi networks")
    # median
    print("Median of...")
    print("Number of nodes = %d" % np.median((lens["erdos"])))
    print("Degrees = %.4f" % np.median((degrees["erdos"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["erdos"])))
    print("Assortativity = %.4f" % np.median((assortativities["erdos"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["erdos"])))
    print("Shannon entropies = %.4f" % np.median((entropies["erdos"])))
    print("Second stat moments = %.4f" % np.median((moments["erdos"])))

    # deviation
    print("Standard Deviation of...")
    print("Number of nodes = %d" % np.std((lens["erdos"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["erdos"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["erdos"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["erdos"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["erdos"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["erdos"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["erdos"]), ddof=1))

    print("Measurements for Watts-Strogatz networks")
    # median
    print("Median of...")
    print("Number of nodes = %d" % np.median((lens["watts"])))
    print("Degrees = %.4f" % np.median((degrees["watts"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["watts"])))
    print("Assortativity = %.4f" % np.median((assortativities["watts"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["watts"])))
    print("Shannon entropies = %.4f" % np.median((entropies["watts"])))
    print("Second stat moments = %.4f" % np.median((moments["watts"])))

    # deviation
    print("Standard Deviation of...")
    print("Number of nodes = %d" % np.std((lens["watts"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["watts"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["watts"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["watts"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["watts"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["watts"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["watts"]), ddof=1))

    print("Measurements for Barabási-Albert networks")
    # median
    print("Median of...")
    print("Number of nodes = %d" % np.median((lens["barabasi"])))
    print("Degrees = %.4f" % np.median((degrees["barabasi"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi"])))
    print("Assortativity = %.4f" % np.median((assortativities["barabasi"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi"])))
    print("Shannon entropies = %.4f" % np.median((entropies["barabasi"])))
    print("Second stat moments = %.4f" % np.median((moments["barabasi"])))

    # deviation
    print("Standard Deviation of...")
    print("Number of nodes = %d" % np.std((lens["barabasi"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["barabasi"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["barabasi"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["barabasi"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["barabasi"]), ddof=1))
Ejemplo n.º 34
0
    if not node1 in senses:
        continue
    adjacent = [n for n in senses[node1] if n in G and n != node1]
    for node2 in adjacent:
        if node2 not in G[node1]:
            G.add_edge(node1, node2, weight=senses[node1][node2])

logging.info('Graph for %s has %d nodes and %d edges', cid, len(G.nodes()),
             len(G.edges()))

table_rows = ''.join([
    '<TR><TD>{:s}</TD><TD>{:f}</TD></TR>'.format(*h)
    for h in hypernyms[cid].items()
])
table = '<TABLE><TR><TD COLSPAN="2"><B>Hypernyms</B></TD></TR>%s</TABLE>' % table_rows
avg_C = nx.average_clustering(G, weight='weight')

gv = Graph(comment='Cluster {:s} for {:s}'.format(cid,
                                                  ', '.join(hypernyms[cid])),
           encoding='utf-8',
           engine='sfdp',
           format='svg')
gv.body.append('label="Graph for {:s}, average C={:.4f}"'.format(cid, avg_C))
gv.body.append('size="10,10"')
gv.body.append('outputorder=edgesfirst')
gv.body.append('overlap=false')
gv.body.append('splines=true')
gv.node_attr.update(color='#ffffff', margin='0')
gv.edge_attr.update(color='#666666')

gv.node('Legend', label='<{:s}>'.format(table), shape='none', margin='0')
Ejemplo n.º 35
0
    """
    return len([
        x for x in network.nodes_iter()
        if network.nodes[x]['feautures'][feature_index] > 0
    ]) // network.order() == 1


if __name__ == '__main__':
    print("Running tests.")
    print("Loading network...")
    load_network()
    print("done.")

    failures = 0

    def test(actual, expected, test_name):
        global failures  #lol python scope
        try:
            print("testing %s..." % (test_name, ))
            assert actual == expected, "%s failed (%s != %s)!" % (
                test_name, actual, expected)
            print("%s passed (%s == %s)." % (test_name, actual, expected))
        except AssertionError as e:
            print(e)
            failures += 1

    test(network.order(), 4039, "order")
    test(network.size(), 88234, "size")
    test(round(nx.average_clustering(network), 4), 0.6055, "clustering")
    print("%d tests failed." % (failures, ))
Ejemplo n.º 36
0
        fundamental = set(np.where(ib - newib > 0)[0]) # 基础违约银行集合
    while delta.sum() > 0.01:
        ib = newib
        newib = -maxVector(-IB,-maxVector(np.dot(ib,pi) + E,np.zeros(N)))
        delta = abs(ib - newib)
    #     sum = sum + 1
    # print(sum)
    contagion = set(np.where(ib - newib > 0)[0])
    contagion = contagion.difference(fundamental) # 传染违约银行集合
    return newib,fundamental,contagion

global N # 银行个数
N = 80

G = nx.barabasi_albert_graph(N,1)                               # 创建无标度网络
average_clustering = nx.average_clustering(G)                   # 计算平均聚集度
average_degree_connectivity = nx.average_degree_connectivity(G) # 计算平均连接度
degree = G.degree()                                             # 得到每个节点的度
degree_histogram = nx.degree_histogram(G)                       # 网络度分布
g = nx.to_numpy_array(G)                                        # 得到信用关系矩阵(不含借贷方向)
r,R = relation(g,0,0)                                       # 得到信用关系矩阵(含借贷方向)
L = balanceSheet(r,100,2)                                       # 得到借贷规模矩阵
IL = sum(L)                                                     # 银行间贷款
L = L.T
IB = sum(L)                                                     # 银行间借款
p = 0.4                                                         # 杠杆
data = generateData(IL,IB,p)                                    # 得到资产负债表数据
temp = np.array(data < 0)
if True in temp:
    print('error')
shock = abs(np.random.normal(0,0.3,(N)))                        # 给出冲击
Ejemplo n.º 37
0
plt.title("Degree rank plot of duplication divergence model")
plt.ylabel("degree")
plt.xlabel("rank")
plt.savefig('degree_rank_duplication')

degree_sequence = nx.degree_histogram(G_part)
plt.figure()
plt.loglog(degree_sequence, 'b-', marker='o')
plt.title("Degree rank plot of partial model")
plt.ylabel("degree")
plt.xlabel("rank")
plt.savefig('degree_rank_partial_model')

# Compute the clustering coefficient for each network. Is the clustering coefficient maintained as the networks become larger?

print 'The average clustering coefficient of the model divergence duplication is:', nx.average_clustering(
    G_dupli)
print 'The average clustering coefficient of the model partial duplication is:', nx.average_clustering(
    G_part)

# Is the clustering coefficient maintained as the networks become larger?
for i in range(3000, 7000, 1000):
    G_dupli = partial_dupli(1, 0.2, 0.3, i)
    print 'The number of nodes of the network is: %d' % (i)
    print 'The average_clustering coefficient is:', nx.average_clustering(
        G_dupli)

# For the Yeast protein interaction network:
yeast = pd.read_csv(argv[1], sep='\s')
print ' The head of dataframe before filtering:\n'
print yeast.head()
print ' The head of dataframe after filtering:\n'
Ejemplo n.º 38
0
dataset = 'youtube'
names = ['feature', 'label', 'graph', 'idx_train', 'idx_eval', 'idx_test']
objects = []
for i in range(len(names)):
    f = open("./data/{}/{}.bin".format(dataset, names[i]), 'rb')
    if sys.version_info > (3, 0):  # if python==3.x
        objects.append(pkl.load(f, encoding='latin1'))
    else:  # if python==2.x
        objects.append(pkl.load(f))
feature, label, graph, idx_train, idx_eval, idx_test = objects

print("Below shows the type of the stored objects:")
print("-- feature: type={}, shape={}".format(type(feature), feature.shape))
print("-- label: type={}, shape={}, entry_type={}".format(
    type(label), label.shape, type(label[0][0])))
print("-- graph: type={}, node num={}".format(type(graph), len(graph)))
print("-- idx_train: type={}, size={}".format(type(idx_train), len(idx_train)))
print("-- idx_eval: type={}, size={}".format(type(idx_eval), len(idx_eval)))
print("-- idx_test: type={}, size={}".format(type(idx_test), len(idx_test)))

G = nx.from_dict_of_lists(graph)
density = nx.density(G)
print('density: ', density)
coeffiences = nx.average_clustering(G)
print('clustering coefficience: ', coeffiences)
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)
betweenness_dict = nx.eigenvector_centrality(G)
avg_betweenness = np.mean(np.array([v for k, v in betweenness_dict.items()]))
print('avg betweenness: ', avg_betweenness)
Ejemplo n.º 39
0
def clustering(G):
    clustering_coeff = nx.average_clustering(G)
    print "clustering coeff : " + str(clustering_coeff)
Ejemplo n.º 40
0
    with open(comPath + f,
              'rb') as fp, open('../usedCsv/replyDict_' + f[12:14] + '.pickle',
                                'rb') as fReply:
        comObj = pickle.load(fp)
        replyDict = pickle.load(fReply)

        community_size_dict = {}
        community_link_dict = {}

        loadCommunity()

        g = nx.Graph()
        community_node_size = []
        for community in sorted(community_size_dict.keys()):
            community_node_size.append(len(community_size_dict[community]))
            g.add_node(community, size=len(community_size_dict[community]))
            community_node_size_normalise = [
                float(i) / sum(community_node_size)
                for i in community_node_size
            ]

        for node in sorted(community_link_dict.keys()):
            for edge in community_link_dict[node]:
                g.add_edge(node, edge, weight=community_link_dict[node][edge])

    edgewidth = [d['weight'] for (u, v, d) in g.edges(data=True)]
    nx.write_gexf(g, 'community_graph_' + f[12:14] + '.gexf')

    print('Nodes:', len(g.nodes()), 'Links:', len(g.edges()),
          'Average cluster:', nx.average_clustering(g))
Ejemplo n.º 41
0
def write_lex_stats(b, num, f, f2, d_pos, lang, Graph=False):
    """Use Levenshtein package to calcualte lev and count up mps, neighbors, etc"""
    total = 0.
    total_diff = nltk.defaultdict(int)
    total_same = nltk.defaultdict(int)
    mps = 0
    neighbors = 0
    homophones = 0
    lev_total = 0.0
    lev_total_diff = 0.0
    lev_total_same = 0.0
    diff = 0
    init = 0
    last = 0
    specific_mps = defaultdict(int)
    specific_mps_init = defaultdict(int)
    ndict = nltk.defaultdict(int)
    mdict = nltk.defaultdict(int)
    hdict = nltk.defaultdict(int)
    uniq = nltk.defaultdict(int)
    avg_lev = nltk.defaultdict(list)

    tot = len(b) * 1.0
    g = nx.Graph()
    g.l = {}
    lengths_all = nltk.defaultdict(int)
    for item in b:
        g.add_node(item)
        length = len(item)
        lengths_all[len(item)] += 1
    for item in itertools.combinations(b, 2):
        lev = Levenshtein.distance(item[0], item[1])
        if len(item[0]) == len(item[1]):
            avg_lev[item[0]].append(lev)
            avg_lev[item[1]].append(lev)
        if lev == 0:
            homophones += 1
            hdict[item[0]] += 1
        elif lev == 1:
            g.add_edge(item[0], item[1])
            neighbors += 1
            ndict[item[0]] += 1
            ndict[item[1]] += 1
            if (d_pos[len(item[0])][item[0]] != d_pos[len(item[1])][item[1]]):
                diff += 1

            if len(item[0]) == len(item[1]):  #if it's a minimal pair
                l = len(item[0])
                pair_ph = find_minimal_pair_diff(item[0], item[1])
                specific_mps["_".join(sorted(pair_ph))] += 1
                pos1 = item[0].index(pair_ph[0])
                pos2 = item[1].index(pair_ph[1])
                if (d_pos[len(item[0])][item[0]] != d_pos[len(
                        item[1])][item[1]]):
                    lev_total_diff += lev
                    total_diff[len(item[0])] += 1
                else:
                    lev_total_same += lev
                    total_same[len(item[0])] += 1
                if pos1 == pos2 and pos1 == 0:
                    specific_mps_init["_".join(sorted(pair_ph))] += 1
                    init += 1
                if pos1 == pos2 and pos1 == len(item[0]) - 1:
                    last += 1
                mps += 1
                mdict[item[0]] += 1  #*log(dict_b[item[1]])
                mdict[item[1]] += 1  #*log(dict_b[item[0]])

        uniq[item[0]] = 1
        total += 1
        lev_total += lev

    poss_same = nltk.defaultdict(int)
    poss_diff = nltk.defaultdict(int)
    for l in d_pos.keys():
        count = nltk.defaultdict(int)
        for cat in [
                'A', 'ADV', 'C', 'ART', 'N', 'PRON', 'NUM', 'EXP', 'V', 'PREP',
                'NOM', 'VER', 'PRO', 'PRE', 'AUX', 'ADJ', 'CON'
        ]:
            count[cat] = len(
                [i for i in d_pos[l].keys() if d_pos[l][i] == cat])
            poss_same[l] += (count[cat] * (count[cat] - 1)) / 2
        for p in itertools.combinations(count.keys(), 2):
            poss_diff[l] += count[p[0]] * count[p[1]]
        if poss_diff[l] == 0: poss_diff[l] = 1
        if poss_same[l] == 0: poss_same[l] = 1


#        total_diff[l] = 1.0*total_diff[l]/ poss_diff[l]
#        total_same[l] = 1.0*total_same[l]/ poss_same[l]
#        print l, total_diff[l], total_same[l]
    total_d = 1.0 * sum(total_diff.values()) / sum(poss_diff.values())
    total_s = 1.0 * sum(total_same.values()) / sum(poss_same.values())
    Gcc = nx.connected_component_subgraphs(g)
    #print num, len(Gcc[0]),  len(Gcc[1])
    #    print "neighbors", neighbors
    #    print "average clustering", average_clustering(g)
    if graph == True:
        plt.figure(figsize=(50, 50))
        pos = nx.spring_layout(g)
        nx.draw_networkx(g,
                         pos,
                         with_labels=False,
                         node_size=40,
                         edge_color='0.8',
                         node_color='k')
        plt.savefig('graph/' + str(num))
    conf = specific_mps["b_p"] + specific_mps["d_t"] + specific_mps[
        "g_k"] + specific_mps["f_v"] + specific_mps["s_z"] + specific_mps["S_Z"]
    dist = specific_mps["t_Z"] + specific_mps["d_S"] + specific_mps[
        "g_f"] + specific_mps["p_z"] + specific_mps["k_v"] + specific_mps["b_s"]
    conf_init = specific_mps["b_p"] + specific_mps["d_t"] + specific_mps[
        "g_k"] + specific_mps["f_v"] + specific_mps["s_z"] + specific_mps["S_Z"]
    dist_init = specific_mps["t_Z"] + specific_mps["d_S"] + specific_mps[
        "g_f"] + specific_mps["p_z"] + specific_mps["k_v"] + specific_mps["b_s"]
    f.write(",".join([
        str(x) for x in [
            num,
            len(hdict),
            len(b) -
            (len(uniq) - len(hdict)) - 1, mps, neighbors, lev_total / total,
            len(b),
            nx.average_clustering(g),
            nx.transitivity(g),
            len(nx.connected_component_subgraphs(g)[0]) /
            tot, specific_mps["b_p"], specific_mps["d_t"], specific_mps["g_k"],
            total_d, total_s, conf, dist, conf_init, dist_init, diff /
            neighbors, init, last
        ]
    ]) + "\n")

    for item in b:
        if len(item) < 15:
            f2.write(",".join([
                str(num),
                str(item),
                str(hdict[item]),
                str(mdict[item] / (hdict[item] + 1.)),
                str(ndict[item] / (hdict[item] + 1.)),
                str(1.0 * sum(avg_lev[item]) / len(avg_lev[item])),
                str(len(item))
            ]) + "\n")
    return
Ejemplo n.º 42
0
def weakenClu(G0, nswap=1, max_tries=100, connected=1):
    # 保证度分度特性不变的情况下随机交换连边

    if not nx.is_connected(G0):
        raise nx.NetworkXError("非连通图,必须为连通图")
    if G0.is_directed():
        raise nx.NetworkXError("仅适用于无向图")
    if nswap > max_tries:
        raise nx.NetworkXError("交换次数超过允许的最大次数")
    if len(G0) < 4:
        raise nx.NetworkXError("节点数太少,至少要含四个节点")

    tn = 0  #尝试次数
    swapcount = 0  #有效交换次数

    G = copy.deepcopy(G0)
    # keys,deges =zip(*G.degree().items())
    keys, edges = zip(*dict(G.degree()).items())
    cdf = nx.utils.cumulative_distribution(edges)  # 计算度的累积分布
    path = nx.average_shortest_path_length(G)
    print(path)
    while swapcount < nswap:
        if tn >= max_tries:
            e = ('尝试次数 (%s) 已超过允许的最大次数' % tn + '有效交换次数(%s)' % swapcount)
            print(e)
            break
        tn += 1
        oldG = copy.deepcopy(G)
        avcOldG = nx.average_clustering(oldG)
        #保证度分布不变的情况下,随机选取两条边u-v,x-y
        (ui, xi) = nx.utils.discrete_sequence(2,
                                              cdistribution=cdf)  #返回长度为2的采样序列
        if ui == xi:
            continue
        u = keys[ui]
        x = keys[xi]
        v = random.choice(list(G[u]))
        y = random.choice(list(G[x]))

        if len(set([u, v, x, y])) == 4:
            if (y not in G[u]) and (v not in G[x]):
                G.add_edge(u, y)
                G.add_edge(v, x)
                G.remove_edge(u, v)
                G.remove_edge(x, y)
                avcNewG = nx.average_clustering(G)
                if avcOldG < avcNewG:
                    G.add_edge(u, v)
                    G.add_edge(x, y)
                    G.remove_edge(u, y)
                    G.remove_edge(x, v)
                    continue
        if connected == 1:
            if not nx.is_connected(G):
                G.add_edge(u, v)
                G.add_edge(x, y)
                G.remove_edge(u, y)
                G.remove_edge(x, v)
                continue
        new_path = nx.average_shortest_path_length(G)
        ret = abs(path - new_path)
        if ret > 0.1:
            G.add_edge(u, v)
            G.add_edge(x, y)
            G.remove_edge(u, y)
            G.remove_edge(x, v)
            continue
        swapcount = swapcount + 1
    return G
Ejemplo n.º 43
0
verbs = ['бежать_VERB', 'идти_VERB', 'ускоряться_VERB', '_VERB', 'перемещаться_VERB', 'двигаться_VERB', 'шагать_VERB', 'нестись_VERB', 'лететь_VERB', 'скакать_VERB', 'ехать_VERB']

G = nx.Graph()
G.add_nodes_from(verbs)
for word in verbs:
  if word in model:
    Cos = model.similarity(word, word)
    if Cos < 0.9999 and Cos > 0.5:
       G.add_edge(word, word)
   else:
     print('There is no such word in model')
print('узлы', G.nodes())
print('рёбра', G,edges())

pos = nx.spring_layout(G)
nx.draw_networkx_nodes(dolphin_G, pos, node_color='black', node_size=25) 
nx.draw_networkx_edges(dolphin_G, pos, edge_color='red')
nx.draw_networkx_labels(dolphin_G, pos, font_size=10, font_family='Arial')
plt.axis('off') 
plt.show()

central_words = []
deg = nx.degree_centrality(G)
for nodeid in sorted(deg, key=deg.get, reverse=True):
  central_words.append(nedeid)
print('Центральные слова в графе:', ", ".join(central_words[:3]))

print('Радиус графа:', nx.redius(G))

print('Коэффициент кластеризации:', nx.average_clustering(G))
Ejemplo n.º 44
0
NUM_NODES = 4158

if __name__ == "__main__":
    G = nx.Graph()
    with open("gr_qc_coauthorships.txt", "r") as f:
        for line in f:
            lst = line.strip('\n').split(' ')
            G.add_node(lst[0])
            G.add_node(lst[1])
            G.add_edge(lst[0], lst[1])
    degrees = []
    for node in G.nodes():
        degrees.append(G.degree(node))
    degrees = sorted(degrees)
    avg_cluster = nx.average_clustering(G)
    overall_cluster = nx.transitivity(G) * 3
    '''max_diam = nx.diameter(G)
	avg_diam = nx.average_shortest_path_length(G)
	print("Average clustering coefficient is: " + str(avg_cluster))
	print("Overall clustering coefficient is: " + str(overall_cluster))
	print("Maximal diameter is: " + str(max_diam))
	print("Average diameter is: " + str(avg_diam))
	plt.hist(degrees, bins = 'auto')
	plt.xlabel("Degree of node")
	plt.ylabel("Number of nodes")'''
    plt.figure()
    values, base = np.histogram(degrees, bins=40)
    ccdf = np.ones(len(values)) - np.cumsum(values / sum(values))
    plt.plot(ccdf)
    plt.title("CCDF of Degree Nodes")
Ejemplo n.º 45
0
randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb)
nx.draw_networkx(randomNetwork)

#Degree distribution
degree_sequence = [d for n, d in randomNetwork.degree()]
plt.hist(degree_sequence,bins='auto',density=1)
#Calculate number of edges (L) for random networks
randomNum=1000
Nedges=np.zeros(randomNum)
ClusteringCoeff=np.zeros(randomNum)
for i in range(randomNum):
    randomArray=np.tril(np.random.random_sample([nodeSize,nodeSize]))
    np.fill_diagonal(randomArray,0)
    randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb)
    Nedges[i]=randomNetwork.number_of_edges()
    ClusteringCoeff[i]=nx.average_clustering(randomNetwork)
plt.hist(Nedges,bins='auto',density=1)
plt.hist(ClusteringCoeff,bins='auto',density=1)

#Evolution of random networks
nodeSize=100
randomProbRange=np.arange(0.001,0.031,0.001)
randomProbLccSize=np.zeros(len(randomProbRange))
for i in range(len(randomProbRange)):
    randomProb=randomProbRange[i]
    randomArray=np.tril(np.random.random_sample([nodeSize,nodeSize]))
    np.fill_diagonal(randomArray,0)
    randomNetwork=nx.to_networkx_graph(randomArray>1-randomProb)
    lcc_node = max(nx.connected_components(randomNetwork),key=len)
    randomNetworkLcc=randomNetwork.subgraph(lcc_node)
    randomProbLccSize[i]=randomNetworkLcc.number_of_nodes()
Ejemplo n.º 46
0
def average_clustering_coefficient(G):
	info = nx.info(G).split()
	if info[2] == 'MultiGraph':
		return nx.average_clustering(nx.Graph(G))
	else:
		return nx.average_clustering(G)
Ejemplo n.º 47
0
def task_2_6(file, density, freq, f) :
    # creating labels list
    table = pd.read_csv("data/channel_locations.txt", delimiter='\s+', )
    labels = list(table.label)
    for i in range(len(labels)) :
        # clean labels name
        labels[i] = labels[i].replace('..', '')
        labels[i] = labels[i].replace('.', '')
    labels_dic = dict(zip([x for x in range(len(labels))], labels))
    pdc = PDC(file, freq)
    # adjacency matrix
    a_matrix = pdc.adj_matrix(density)
    # direct4ed graph
    G = nx.from_numpy_matrix(a_matrix, create_using=nx.DiGraph)

    ### GLOBAL INDECES
    # avarage clustering coefficient
    avg_clustering_coeff = nx.average_clustering(G)
    # avarage shortest path length
    avg_shortest_path_length = nx.average_shortest_path_length(G)

    ### LOCAL INDECES
    # degree dictionaries
    # for each node (0-63) return:
    # the number of edges adjacent to the node
    degree = dict(nx.degree(G))
    nodes_list = list(degree.keys())
    degree_df = pd.DataFrame.from_dict(degree, orient='index', columns=["Degree"])
    # the number of edges pointing to the node
    in_degree = dict(G.in_degree())
    in_degree_df = pd.DataFrame.from_dict(in_degree, orient='index', columns=["In-Degree"])
    # the number of edges pointing out of the node
    out_degree = dict(G.out_degree())
    out_degree_df = pd.DataFrame.from_dict(out_degree, orient='index', columns=["Out-Degree"])

    ### LIST THE FIRST 10 CHANNELS for local indeces
    top_10_degree = []
    top_10_in = []
    top_10_out = []
    for i in range(10) :
        # add first i channel to list
        top_10_degree.append(labels[max(degree, key=degree.get)])
        # put to zero the value
        degree[max(degree, key=degree.get)] = 0
        top_10_in.append(labels[max(in_degree, key=in_degree.get)])
        in_degree[max(in_degree, key=in_degree.get)] = 0
        top_10_out.append(labels[max(out_degree, key=out_degree.get)])
        out_degree[max(out_degree, key=out_degree.get)] = 0
    indeces = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    top_10_degree_df = pd.DataFrame(top_10_degree, index=indeces, columns=['Degree'])
    top_10_in_df = pd.DataFrame(top_10_in, index=indeces, columns=['In-Degree'])
    top_10_out_df = pd.DataFrame(top_10_out, index=indeces, columns=['Out-Degree'])

    # save top 10 to file csv
    top_10_df = pd.concat([top_10_degree_df, top_10_in_df, top_10_out_df], axis=1)
    try :
        os.remove('results/task_2_6_ %s' % f + '_top_10.csv')
        top_10_df.to_csv('results/task_2_6_ %s' % f + '_top_10.csv')
    except :
        top_10_df.to_csv('results/task_2_6_ %s' % f + '_top_10.csv')

    # save all degree on csv
    all_degree_df = pd.concat([degree_df, in_degree_df, out_degree_df], axis=1)
    all_degree_df = all_degree_df.rename(index=labels_dic)
    try :
        os.remove('results/task_2_6_%s' % f + '_all_degree.csv')
        all_degree_df.to_csv('results/task_2_6_%s' % f + '_all_degree.csv')
    except :
        all_degree_df.to_csv('results/task_2_6_%s' % f + '_all_degree.csv')

    # save global indeces to file txt
    try :
        os.remove('results/task_2_6_%s' % f + '_global.txt')
        text_file = open('results/task_2_6_%s' % f + '_global.txt', "w")
        text_file.write("Avarage Clustering Coefficient: %f \n \n" % avg_clustering_coeff)
        text_file.write("Avarage Shortest Path: %f" % avg_shortest_path_length)
        text_file.close()
    except :
        text_file = open('results/task_2_6_%s' % f + '_global.txt', "w")
        text_file.write("Avarage Clustering Coefficient: %f \n \n" % avg_clustering_coeff)
        text_file.write("Avarage Shortest Path: %f" % avg_shortest_path_length)
        text_file.close()
    print("Done!")
Ejemplo n.º 48
0
def inter_random_25k(G0, node_community, n_swap=1, max_tries=100, connected=1):
    """Returns a 2.5K null model beased on random reconnection algorithm inter communities

    Parameters
    ----------
    G0 : undirected and unweighted graph
    node_community : list
        nodes and the communities they belong to
    n_swap : int (default = 1)
        Number of double-edge swaps to perform
    max_tries : int (default = 100)
        Maximum number of attempts to swap edges
    connected : int
        keep the connectivity of the graph or not.
        1 : keep,    0 : not keep

    Notes
    -----
    Keep the 2.5k-characteristic unchanged and the graph connected.
    Swap edges inter communities.

    """
    judge_error(G0, n_swap, max_tries, connected)

    # Number of attempts to swap
    n_try = 0
    # Number of effective swaps
    swapcount = 0
    G = copy.deepcopy(G0)
    keys, degrees = zip(*G.degree().items())
    cdf = nx.utils.cumulative_distribution(degrees)

    while swapcount < n_swap:
        if n_try >= max_tries:
            print('Maximum number of swap attempts (%s) exceeded ' % n_try +
                  'before desired swaps achieved (%s)' % swapcount)
            break
        n_try += 1

        # Keep the degree distribution unchanged,choose two edges (u-v,x-y)
        # randomly
        (ui, xi) = nx.utils.discrete_sequence(2, cdistribution=cdf)
        if ui == xi:
            continue
        u = keys[ui]
        x = keys[xi]
        v = random.choice(list(G[u]))
        y = random.choice(list(G[x]))
        # Make sure the four nodes are not repeated.
        if len(set([u, v, x, y])) == 4:
            # Make sure the chosen edges are inner community.
            if edge_in_community(node_community,
                                 (u, v)) == 0 and edge_in_community(
                                     node_community, (x, y)) == 0:
                # Make sure the edges created are inner community.
                if edge_in_community(node_community,
                                     (u, y)) == 0 and edge_in_community(
                                         node_community, (v, x)) == 0:
                    # Keep the degree matching characteristic of nodes
                    # unchanged.
                    if G.degree(v) == G.degree(y):
                        # Make sure the new edges are not exist in the original
                        # graph.
                        if (y not in G[u]) and (v not in G[x]):
                            G.add_edge(u, y)
                            G.add_edge(v, x)

                            G.remove_edge(u, v)
                            G.remove_edge(x, y)

                            degree_node_list = map(
                                lambda t: (t[1], t[0]),
                                G0.degree([u, v, x, y] + list(G[u]) +
                                          list(G[v]) + list(G[x]) +
                                          list(G[y])).items())

                            dict_degree = count_degree_nodes(degree_node_list)
                            for i in range(len(dict_degree)):
                                avcG0 = nx.average_clustering(
                                    G0,
                                    nodes=list(dict_degree.values())[i],
                                    weight=None,
                                    count_zeros=True)
                                avcG = nx.average_clustering(
                                    G,
                                    nodes=list(dict_degree.values())[i],
                                    weight=None,
                                    count_zeros=True)
                                i += 1
                                # If the degree-related clustering coefficient changed after scrambling
                                # withdraw this operation about scrambling.
                                if avcG0 != avcG:
                                    G.add_edge(u, v)
                                    G.add_edge(x, y)
                                    G.remove_edge(u, y)
                                    G.remove_edge(x, v)
                                    break
                                # if connected = 1 but the original graph is not connected fully,
                                # withdraw the operation about the swap of
                                # edges.
                                if connected == 1:
                                    if not nx.is_connected(G):
                                        G.add_edge(u, v)
                                        G.add_edge(x, y)
                                        G.remove_edge(u, y)
                                        G.remove_edge(x, v)
                                        continue
                                swapcount += 1

    return G
Ejemplo n.º 49
0
import networkx as nx
import matplotlib.pyplot as plt
NUM = 282
p = 0.055
c = 14

G = nx.random_graphs.watts_strogatz_graph(NUM, c, p)
nx.draw(G, pos=nx.circular_layout(G))
plt.show()
degree = nx.degree_histogram(G)
x = range(len(degree))
y = [z / float(sum(degree)) for z in degree]
s = 0
for i in range(len(y)):
    s += (y[i] * (i + 1))
print("Average Degree:", s)
print("Average Clustering:", nx.average_clustering(G))
try:
    print("Average Path Length:", nx.average_shortest_path_length(G))
except nx.exception.NetworkXError:
    print("Graph is not connected.")
plt.figure()
plt.scatter(x, y, marker='.')
# log
plt.figure()
plt.loglog(x, y, linewidth=0, marker='.')
plt.show()
Ejemplo n.º 50
0
print(maximum_clique_size)

maximu_cliques = [x for x in cliques if len(x) == maximum_clique_size]
print(maximu_cliques)

n_maximum_cliques = len(maximu_cliques)
print(n_maximum_cliques)

average_clique_size = sum(sizes_of_cliques) / n_cliques
print(average_clique_size)

maximum_clique_sets = [set(x) for x in maximu_cliques]
print(maximum_clique_sets)

# Clustering Coefficient

nx.clustering(g)

nx.average_clustering(g)

ego_net = nx.ego_graph(g, "53")
len(ego_net)

nx.average_clustering(ego_net)

plt.figure(figsize=(8, 8))
pos = nx.random_layout(ego_net)
nx.draw_networkx(ego_net, pos, node_size=300, with_labels=True)

nx.clustering(ego_net)
Ejemplo n.º 51
0
def fc_metrics_subareas(fcfile_pickle,
                        subareas=['M1', 'STN', 'GP'],
                        subtitle='M1DBS'):
    """
        cc: average Clustering Coefficient

        nbc: Node Betweenness centrality ()
    """

    with open(fcfile_pickle, 'rb') as handle:
        fc = pickle.load(handle)

    imcohs = fc['imcohs']
    pvals = fc['pvals']
    chnAreas = fc['chnAreas']

    idxs_remain = []
    chnAreas_new = []
    for ci, carea in enumerate(chnAreas):
        for sarea in subareas:
            if sarea.lower() in carea.lower():
                idxs_remain.append(ci)
                chnAreas_new.append(carea)

    idxs_remain = np.array(idxs_remain)

    tmp = imcohs[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    imcohs = tmp

    tmp = pvals[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    pvals = tmp

    chnAreas = chnAreas_new

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(imcohs.shape)
    if len(rows) > 0:
        weight[rows, cols] = imcohs[rows, cols]

    weight = abs(weight)

    G = nx.Graph()
    G.add_nodes_from(np.arange(0, weight.shape[0]))

    for i in range(0, weight.shape[0] - 1):
        for j in range(i + 1, weight.shape[0]):
            if weight[i, j] > 0:
                G.add_edge(i, j, weight=weight[i, j])

    cc = nx.average_clustering(G)
    nbcs = nx.degree_centrality(G)

    folder, filename = os.path.split(fcfile_pickle)[0], os.path.split(
        fcfile_pickle)[1]
    metricfile = os.path.join(folder, 'metric_' + subtitle + '_' + filename)

    metrics = dict()
    metrics['cc'] = cc
    metrics['nbcs'] = nbcs
    metrics['chnAreas'] = fc['chnAreas']

    with open(metricfile, 'wb') as f:
        pickle.dump(metrics, f)
Ejemplo n.º 52
0
def BA_model():
    barabasi05 = []
    barabasi10 = []
    barabasi15 = []
    barabasi20 = []
    # generate 30 networks, do the same as one
    for i in range(10):
        #generate Barabasi network with p = power
        barabasi05.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=0.5)))
        barabasi10.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=1)))
        barabasi15.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=1.5)))
        barabasi20.append(ig_to_nx(ig.Graph.Barabasi(500, 10, power=2)))

    print("Finding degree distributions...")
    dists = {}
    dists["barabasi05"] = degree_distribution(barabasi05[0])
    dists["barabasi10"] = degree_distribution(barabasi10[0])
    dists["barabasi15"] = degree_distribution(barabasi15[0])
    dists["barabasi20"] = degree_distribution(barabasi20[0])

    # table
    print("Taking measures...")

    lens = {}
    lens["barabasi05"] = []
    lens["barabasi10"] = []
    lens["barabasi15"] = []
    lens["barabasi20"] = []

    degrees = {}
    degrees["barabasi05"] = []
    degrees["barabasi10"] = []
    degrees["barabasi15"] = []
    degrees["barabasi20"] = []

    clusterings = {}
    clusterings["barabasi05"] = []
    clusterings["barabasi10"] = []
    clusterings["barabasi15"] = []
    clusterings["barabasi20"] = []

    assortativities = {}
    assortativities["barabasi05"] = []
    assortativities["barabasi10"] = []
    assortativities["barabasi15"] = []
    assortativities["barabasi20"] = []

    shortest_paths = {}
    shortest_paths["barabasi05"] = []
    shortest_paths["barabasi10"] = []
    shortest_paths["barabasi15"] = []
    shortest_paths["barabasi20"] = []

    entropies = {}
    entropies["barabasi05"] = []
    entropies["barabasi10"] = []
    entropies["barabasi15"] = []
    entropies["barabasi20"] = []

    moments = {}
    moments["barabasi05"] = []
    moments["barabasi10"] = []
    moments["barabasi15"] = []
    moments["barabasi20"] = []

    for graph in barabasi05:
        lens["barabasi05"].append(len(graph))
        degrees["barabasi05"].append(average_degree(graph))
        clusterings["barabasi05"].append(nx.average_clustering(graph))
        assortativities["barabasi05"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["barabasi05"].append(nx.average_shortest_path_length(graph))
        entropies["barabasi05"].append(entropy(graph))
        moments["barabasi05"].append(stat_moment(graph, 2))

    for graph in barabasi10:
        lens["barabasi10"].append(len(graph))
        degrees["barabasi10"].append(average_degree(graph))
        clusterings["barabasi10"].append(nx.average_clustering(graph))
        assortativities["barabasi10"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["barabasi10"].append(nx.average_shortest_path_length(graph))
        entropies["barabasi10"].append(entropy(graph))
        moments["barabasi10"].append(stat_moment(graph, 2))

    for graph in barabasi15:
        lens["barabasi15"].append(len(graph))
        degrees["barabasi15"].append(average_degree(graph))
        clusterings["barabasi15"].append(nx.average_clustering(graph))
        assortativities["barabasi15"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["barabasi15"].append(nx.average_shortest_path_length(graph))
        entropies["barabasi15"].append(entropy(graph))
        moments["barabasi15"].append(stat_moment(graph, 2))

    for graph in barabasi20:
        lens["barabasi20"].append(len(graph))
        degrees["barabasi20"].append(average_degree(graph))
        clusterings["barabasi20"].append(nx.average_clustering(graph))
        assortativities["barabasi20"].append(nx.degree_assortativity_coefficient(graph))
        shortest_paths["barabasi20"].append(nx.average_shortest_path_length(graph))
        entropies["barabasi20"].append(entropy(graph))
        moments["barabasi20"].append(stat_moment(graph, 2))

    print("Calculating Barabási-Albert measurements for alfa = 0.5...")
    # median
    print("Median of Barabasi alfa = 0.5")
    print("Number of nodes = %d" % np.median((lens["barabasi05"])))
    print("Degrees = %.4f" % np.median((degrees["barabasi05"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi05"])))
    print("Assortativity = %.4f" % np.median((assortativities["barabasi05"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi05"])))
    print("Shannon entropies = %.4f" % np.median((entropies["barabasi05"])))
    print("Second stat moments = %.4f" % np.median((moments["barabasi05"])))

    # deviation
    print("Standard Deviation of Barabasi alfa = 0.5")
    print("Number of nodes = %d" % np.std((lens["barabasi05"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["barabasi05"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi05"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["barabasi05"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi05"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["barabasi05"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["barabasi05"]), ddof=1))

    print("Calculating Barabási-Albert measurements for alfa = 1.0...")
    # median
    print("Median of Barabasi alfa = 1.0")
    print("Number of nodes = %d" % np.median((lens["barabasi10"])))
    print("Degrees = %.4f" % np.median((degrees["barabasi10"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi10"])))
    print("Assortativity = %.4f" % np.median((assortativities["barabasi10"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi10"])))
    print("Shannon entropies = %.4f" % np.median((entropies["barabasi10"])))
    print("Second stat moments = %.4f" % np.median((moments["barabasi10"])))

    # deviation
    print("Standard Deviation of Barabasi alfa = 1.0")
    print("Number of nodes = %d" % np.std((lens["barabasi10"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["barabasi10"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi10"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["barabasi10"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi10"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["barabasi10"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["barabasi10"]), ddof=1))

    print("Calculating Barabási-Albert measurements for alfa = 1.5...")
    # median
    print("Median of Barabasi alfa = 1.5")
    print("Number of nodes = %d" % np.median((lens["barabasi15"])))
    print("Degrees = %.4f" % np.median((degrees["barabasi15"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi15"])))
    print("Assortativity = %.4f" % np.median((assortativities["barabasi15"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi15"])))
    print("Shannon entropies = %.4f" % np.median((entropies["barabasi15"])))
    print("Second stat moments = %.4f" % np.median((moments["barabasi15"])))

    # deviation
    print("Standard Deviation of Barabasi alfa = 1.5")
    print("Number of nodes = %d" % np.std((lens["barabasi15"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["barabasi15"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi15"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["barabasi15"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi15"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["barabasi15"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["barabasi15"]), ddof=1))

    print("Calculating Barabási-Albert measurements for alfa = 2.0...")
    # median
    print("Median of Barabasi alfa = 2.0")
    print("Number of nodes = %d" % np.median((lens["barabasi20"])))
    print("Degrees = %.4f" % np.median((degrees["barabasi20"])))
    print("Clustering coefficient = %.4f" % np.median((clusterings["barabasi20"])))
    print("Assortativity = %.4f" % np.median((assortativities["barabasi20"])))
    print("Shortest paths = %.4f" % np.median((shortest_paths["barabasi20"])))
    print("Shannon entropies = %.4f" % np.median((entropies["barabasi20"])))
    print("Second stat moments = %.4f" % np.median((moments["barabasi20"])))

    # deviation
    print("Standard Deviation of Barabasi alfa = 2.0")
    print("Number of nodes = %d" % np.std((lens["barabasi20"]), ddof=1))
    print("Degrees = %.4f" % np.std((degrees["barabasi20"]), ddof=1))
    print("Clustering coefficient = %.4f" % np.std((clusterings["barabasi20"]), ddof=1))
    print("Assortativity = %.4f" % np.std((assortativities["barabasi20"]), ddof=1))
    print("Shortest paths = %.4f" % np.std((shortest_paths["barabasi20"]), ddof=1))
    print("Shannon entropies = %.4f" % np.std((entropies["barabasi20"]), ddof=1))
    print("Second stat moments = %.4f" % np.std((moments["barabasi20"]), ddof=1))
Ejemplo n.º 53
0
def smallworldstats(graph):
    avgclustering = nx.average_clustering(graph)
    avgpathlength = nx.average_shortest_path_length(graph)
    return avgclustering, avgpathlength
Ejemplo n.º 54
0
def boxStats(boxNet): #fordavid other three calculated here?
    ## matrices    
    boxNodes = len(boxNet)
    boxMat = nx.to_numpy_matrix(boxNet)
    boxSparse = csgraph_from_dense(boxMat)
    boxMatPath = shortest_path(boxSparse, method='auto', directed=False, return_predecessors=False, unweighted=True, overwrite=False)    
    boxPathList = []
    pairsNumBox = len(list(combinations(range(boxNodes), 2)))
    for i in range(boxNodes-1):
        for j in range(i+1, boxNodes):
            tempDist = boxMatPath[i][j]
            if tempDist > 0 and np.isfinite(tempDist):
                boxPathList.append(tempDist)
    
    ##boxNet characteristics
    degreeRaw = list(boxNet.degree())
    degreeBox = []
    for i in degreeRaw:
        degreeBox.append(i)
    degreeNormBox = np.divide(degreeBox, np.sum(degreeBox), dtype = float)
    
    diameterPathBox = np.max(boxPathList)
    avgPathDistBox = np.mean(boxPathList)
    nEdgesBox = np.divide(np.sum(degreeBox), 2, dtype = float)
    edgePBox = nx.density(boxNet)
    globalEfficiencyBox = np.divide(sum(np.divide(1, boxPathList, dtype = float)),pairsNumBox , dtype = float)
    radiusBox = nx.radius(boxNet)
    kCoreBox = max(list(nx.core_number(boxNet).values()))
    degreeAssortBox = nx.degree_assortativity_coefficient(boxNet)
    avgDegreeBox = np.mean(degreeBox)
    maxDegreeBox = max(degreeBox)
    eValsBox = np.linalg.eigvals(boxMat)
    spectralRadiusAdjBox = max(abs(eValsBox))
    eigenCentDictBox = nx.eigenvector_centrality_numpy(boxNet, weight=None)
    eigenCentRawBox = list(eigenCentDictBox.values())
    eigenCentBox = np.divide(eigenCentRawBox, sum(eigenCentRawBox), dtype = float)
    colorsBox = nx.coloring.greedy_color(boxNet, strategy=nx.coloring.strategy_connected_sequential_bfs)
    colorNumBox = len(list(set(list(colorsBox.values()))))
    avgClustCoeffBox = nx.average_clustering(boxNet)                        
    scaledSpectralRadiusBox = np.divide(spectralRadiusAdjBox, avgDegreeBox, dtype = float)
    if motifChoice == 1:
        freqMBox = motifCalc4(boxNet)
    else:
        freqMBox =  [0.166666667, 0.166666667, 0.166666667, 0.166666667, 0.166666667, 0.166666667]
    # network entropy
    lapMatBox= np.asarray(nx.to_numpy_matrix(nx.from_scipy_sparse_matrix(nx.laplacian_matrix(boxNet))))
    eValsLapBox = np.linalg.eigvals(lapMatBox)
    eValsLapBoxSorted = sorted(np.real(eValsLapBox))
    spectralGapBox = eValsLapBoxSorted[1]
    degreeSumBox = np.sum(degreeBox)
    lapMatBoxNorm =  np.divide(lapMatBox, degreeSumBox, dtype = float)
    eValsLapBoxNorm = np.linalg.eigvals(lapMatBoxNorm)
    eValsLapNonZeroBoxNorm = []
    for i in eValsLapBoxNorm:
        j = abs(i)
        if j > 0:
            eValsLapNonZeroBoxNorm.append(j)
    vonEntropyBox = np.divide(entropyCalc(eValsLapNonZeroBoxNorm), math.log(boxNodes,2), dtype = float)
    degreeEntropyBox = np.divide(entropyCalc(degreeNormBox), math.log(boxNodes,2), dtype = float)
    KSEntropyBox = np.divide(math.log(spectralRadiusAdjBox, 2), math.log(boxNodes-1,2), dtype = float)
    motifEntropyBox = np.divide(entropyCalc(freqMBox), math.log(len(freqMBox),2), dtype = float)
    popEntropyBox = np.divide(entropyCalc(eigenCentBox), math.log(boxNodes,2), dtype = float)
    graphEntropyBox = np.divide(graphEntropyCalc(colorsBox), math.log(boxNodes,2), dtype = float)
    
    return edgePBox, radiusBox, kCoreBox, degreeAssortBox, diameterPathBox, avgPathDistBox, nEdgesBox, globalEfficiencyBox, avgDegreeBox, maxDegreeBox, spectralRadiusAdjBox, spectralGapBox, scaledSpectralRadiusBox, colorNumBox, avgClustCoeffBox, freqMBox, motifEntropyBox, vonEntropyBox, graphEntropyBox, popEntropyBox, KSEntropyBox, degreeEntropyBox
Ejemplo n.º 55
0
#BG       1359   ZYL           3074             DAC           3076            0       313 772      1
#4H       8463   ZYL           3074             DAC           3076            0       313          1

# ZYL all destinations were DAC airport .ie  its outdegree

conn1[conn1['main Airport'] == 'ZYL'].count()  # 4

## Betweeness Centrality
b = nx.betweenness_centrality(g)  # Betweeness_Centrality
print(max(b))
#ZYL max betweeness centrality

## Eigen-Vector Centrality
evg = nx.eigenvector_centrality(g)  # Eigen vector centrality
print(max(evg))
#ZYL has maximum influence.

# cluster coefficient
cluster_coeff = nx.clustering(g)
cluster_zyl = nx.clustering(g, 'ZYL')  #0.8333333333333334
# If cluster coefficient is closer to 1 it is clique.
print(cluster_coeff)

#ZYL
#clustering coefficient is a measure of the degree to which nodes in a graph tend to cluster together
# 2 nodes that are connected are likely to be apart of major network
# Average clustering
cc = nx.average_clustering(g)
print(cc)
#0.4870933566129556
df = pd.concat(li, axis=0, ignore_index=True)

df = df[['Source', 'Target']]
df.drop_duplicates(subset=['Source', 'Target'], inplace=True)

#print(df.head(20))

# create the ASOIAF networkx object
G = nx.from_pandas_edgelist(df, source='Source', target='Target')

# create a random graph networkx object
#G = nx.erdos_renyi_graph(len(G.nodes()), 2*len(G.edges())/(len(G.nodes())*(len(G.nodes())-1)))
#G = nx.watts_strogatz_graph(len(G.nodes()), 7, 0.3)

print("\n\nNumber of nodes: %d" % len(G.nodes()))

print("\n\nNumber of edges: %d" % len(G.edges()))

graphs = list(nx.connected_component_subgraphs(G))
print("\n\nConnected components: %d (size: %d)" %
      (len(graphs), len(graphs[0].nodes())))

print("\n\nAverage shortest path: %f" %
      nx.average_shortest_path_length(graphs[0]))

print("\n\nDiameter: %d" % nx.diameter(graphs[0]))

print("\n\nAverage clustering coefficient: %f" % nx.average_clustering(G))

degree_distribution.plot_degree_distribution(graphs[0])
Ejemplo n.º 57
0
def group_evaluate_trace(xnr_user_no,
                         nodes,
                         all_influence,
                         all_sensitive,
                         date_time,
                         G=None):
    result = {}
    result['xnr_user_no'] = xnr_user_no
    result['nodes'] = nodes
    result['num'] = len(nodes)

    #从redis中获取社区转发网络
    count = 0
    scan_cursor = 1
    now_ts = time.time()
    now_date_ts = datetime2ts(ts2datetime(now_ts))
    #get redis db number
    db_number = get_db_num(now_date_ts)
    print 'db_number:', str(db_number)
    #get redis db
    print 'retweet_dict::', retweet_redis_dict
    retweet_redis = retweet_redis_dict[str(db_number)]
    comment_redis = comment_redis_dict[str(db_number)]

    retweet_result = []
    for uid in nodes:
        item_1 = str('retweet_' + uid)
        # print 'item_lookup::',item_1,type(item_1)
        re_result = retweet_redis.hgetall(item_1)
        if re_result:
            save_dict = dict()
            save_dict['uid'] = uid
            save_dict['uid_retweet'] = re_result
            retweet_result.append(save_dict)
# print 'test_result::',retweet_result
# print 'aaa:::', retweet_redis.hgetall('retweet_'+str(nodes[-1]))

#print 'retweet_redis::',retweet_redis
#print 'comment_redis::',comment_redis
    ''' 
    re_scan = retweet_redis.scan(scan_cursor,count=10)
    for item in re_scan[1]:
       # item_list = item.split('_')
        print 'item::',item,type(item)
        item_result = retweet_redis.hgetall(item)
        print 'item_result::',item_result
   # print 'hlen::',retweet_redis.hlen()
   # print 'hgetall::',retweet_redis.hgetall()
    retweet_result = retweet_redis.hgetall(nodes)
    comment_result = comment_redis.hgetall(nodes)
    '''
    # print 'retweet_result:::',retweet_result
    #print 'comment_result:::',comment_result

    G_i = nx.Graph()
    for i in retweet_result:
        # print 'i:',i
        # if not i['found']:
        #     continue
        uid_retweet = i['uid_retweet']
        max_count = max([int(n) for n in uid_retweet.values()])
        G_i.add_weighted_edges_from([
            (i['uid'], j, float(uid_retweet[j]) / max_count)
            for j in uid_retweet.keys() if j != i['uid'] and j and i['uid']
        ])
    '''
    for i in comment_result:
        # print 'comment_i:',i
        if not i['found']:
            continue
        uid_comment = json.loads(i['_source']['uid_comment'])
        max_count = max([int(n) for n in uid_comment.values()])
        G_i.add_weighted_edges_from([(i['_source']['uid'],j,float(uid_comment[j])/max_count) for j in uid_comment.keys() if j != i['_source']['uid'] and j and i['_source']['uid']])
    '''

    sub_g = G_i.subgraph(nodes)

    result['density'] = round(nx.density(sub_g), 4)
    #print 'ave_cluster::',nx.average_clustering(sub_g)
    try:
        result['cluster'] = round(nx.average_clustering(sub_g), 4)
    except:
        result['cluster'] = 0
    result['transitivity'] = round(nx.transitivity(sub_g), 4)

    ##将结果换成当天的计算结果
    influence_field = 'user_index'
    sensitive_field = 'sensitive'
    influence_result = get_influence_value(date_time, influence_field, nodes)
    sensitive_result = get_sensitive_value(date_time, sensitive_field, nodes)

    result['max_influence'] = round(
        (max(influence_result) / float(all_influence)) * 100, 4)
    result['mean_influence'] = round(
        ((sum(influence_result) / len(influence_result)) /
         float(all_influence)) * 100, 4)

    max_sensitive = round((max(sensitive_result) / float(all_sensitive)) * 1,
                          4)
    if max_sensitive > 100:
        result['max_sensitive'] = 100.0000
    else:
        result['max_sensitive'] = max_sensitive
    result['mean_sensitive'] = round(
        ((sum(sensitive_result) / len(sensitive_result)) /
         float(all_sensitive)) * 1, 4)

    return result
Ejemplo n.º 58
0
    "Branch Avenue", "Suitland", "Naylor Road", "Southern Avenue",
    "Congress Heights", "Anacostia", "Navy Yard–Ballpark", "Waterfront",
    "L'Enfant Plaza", "Archives", "Gallery Place", "Mount Vernon Square",
    "Shaw – Howard University", "U Street", "Columbia Heights",
    "Georgia Avenue–Petworth", "Fort Totten", "West Hyattsville",
    "Prince George's Plaza", "College Park–University of Maryland", "Greenbelt"
]
add_edges(metro, green_line)
silver_line = [
    "Wiehle–Reston East", "Spring Hill", "Greensboro", "Tysons Corner",
    "McLean", "East Falls Church", "Ballston–MU", "Virginia Square–GMU",
    "Clarendon", "Court House", "Rosslyn", "Foggy Bottom – GWU",
    "Farragut West", "McPherson Square", "Metro Center", "Federal Triangle",
    "Smithsonian", "L'Enfant Plaza", "Federal Center SW", "Capitol South",
    "Eastern Market", "Potomac Avenue", "Stadium–Armory", "Benning Road",
    "Capitol Heights", "Addison Road", "Morgan Boulevard", "Largo Town Center"
]
add_edges(metro, silver_line)
#nx.draw(metro_distance, with_labels = True, node_size = 100)
nx.write_graphml(metro, os.getcwd() + "\\test.graphml")
print("clossness centality")
print(nx.closeness_centrality(metro))
print("degree centality")
print(nx.degree_centrality(metro))
print("betweenness centrality")
print(nx.betweenness_centrality(metro))
print("circuit rank")
print(nx.number_of_edges(metro) - nx.number_of_nodes(metro) + 1)
print("Average clustering")
print(nx.average_clustering(metro))
Ejemplo n.º 59
0
        n1 = set((amazonBooks[asin]['Categories']).split())
        n2 = set((amazonBooks[a]['Categories']).split())
        n1In2 = n1 & n2
        n1Un2 = n1 | n2
        if (len(n1Un2)) > 0:
            similarity = round(len(n1In2)/len(n1Un2),2)
        copurchaseGraph.add_edge(asin, a.strip(), weight=similarity)

# get degree centrality and clustering coefficients 
# of each ASIN and add it to amazonBooks metadata
dc = networkx.degree(copurchaseGraph)
for asin in networkx.nodes(copurchaseGraph):
    metadata = amazonBooks[asin]
    metadata['DegreeCentrality'] = int(dc[asin])
    ego = networkx.ego_graph(copurchaseGraph, asin, radius=1)
    metadata['ClusteringCoeff'] = round(networkx.average_clustering(ego),2)
    amazonBooks[asin] = metadata

# write amazonBooks data to file
# (all except copurchase data - becuase that data is now in the graph)
fhw = open('./amazon-books.txt', 'w', encoding='utf-8', errors='ignore')
fhw.write("Id\t" + "ASIN\t" + "Title\t" + 
        "Categories\t" + "Group\t" #+ "Copurchased\t" + 
        "SalesRank\t" + "TotalReviews\t" + "AvgRating\t"
        "DegreeCentrality\t" + "ClusteringCoeff\n")
for asin,metadata in amazonBooks.items():
    fhw.write(metadata['Id'] + "\t" + \
            asin + "\t" + \
            metadata['Title'] + "\t" + \
            metadata['Categories'] + "\t" + \
            metadata['Group'] + "\t" + \
Ejemplo n.º 60
0
karateclub = nx.read_gml("karate.gml")

#analyzing dataset

# calculating average degree

N, K = karateclub.order(), karateclub.size()
print("degree of node 1: ", karateclub.degree(1))
avg_deg = float(K) / N
print("Nodes: ", N)
print("Edges: ", K)
print("Average degree: ", avg_deg)

#clustering coefficients
print("clustering coefficient node 1: ", nx.clustering(karateclub, 1))
print("average clustering coefficient", nx.average_clustering(karateclub))
#Betweenness centrality
print("betweenness centrality of node 1: ",
      nx.betweenness_centrality(karateclub)[1])


def avg_betw_centr(graph):
    i = 1
    a = 0
    N = int(graph.order())
    while i < N + 1:
        a = a + float(nx.betweenness_centrality(graph)[i])
        i = i + 1
    avg = float(a) / float(graph.order())
    print("average betweenness centrality: ", avg)