def main(): for path in files: print(path) B = nx.Graph() G = nx.read_gml('graphs/' + path) B.add_nodes_from(relevant_subreddits, bipartite=0) nodeList = set([n for n in G.nodes()]) for reddit in relevant_subreddits: if reddit in nodeList: nodeList.remove(reddit) B.add_nodes_from(list(nodeList), bipartite=1) # print(nx.number_of_nodes(G)) # B.add_edges_from([e for e in G.edges()]) edgeList = [] for e in G.edges(): if e[0] not in relevant_subreddits and e[ 1] not in relevant_subreddits: continue edgeList.append(e) B.add_edges_from(edgeList) # print(nx.number_of_edges(G)) # print(bipartite.is_bipartite(B)) # print(bipartite.average_clustering(B)) result = random.sample(list(nodeList), 500) print('Clustering Coefficient') print(bipartite.average_clustering(B, result)) print('Density') print(bipartite.density(B, result))
def graph_metrics(m): ''' Input: internal representation, n by L Return: 2-tuple - clustering coefficients of a bipartite graph built from m, a measure of local density of the connectivity ref: https://networkx.org/documentation/stable//reference/algorithms/generated/networkx.algorithms.bipartite.cluster.clustering.html#networkx.algorithms.bipartite.cluster.clustering - modularity: relative density of edges inside communities with respect to edges outside communities. ref: https://python-louvain.readthedocs.io/en/latest/api.html#community.modularity ''' sM = scipy.sparse.csr_matrix(m) G = bipartite.matrix.from_biadjacency_matrix(sM) avg_c = bipartite.average_clustering(G, mode="dot") partition = community_louvain.best_partition(G) modularity = community.modularity(partition, G) return avg_c, modularity
print("sa") B.add_edges_from(edges) print("sa") bi0 = list(bi0) bi1 = list(bi1) random.seed() for i in range((int)(percentage / 100 * len(bi0))): r = random.randint(1, len(bi0) - 1) print(i) if bi0[r] not in bi0_sample: bi0_sample.append(bi0[r]) random.seed() for i in range((int)(percentage / 100 * len(bi1))): r = random.randint(1, len(bi1) - 1) print(i) if bi1[r] not in bi1_sample: bi1_sample.append(bi1[r]) sample_node = sum([bi0_sample, bi1_sample], []) print("salam") g = B.subgraph(sample_node) return g data = convert_txt_to_numpy('/Users/amirhossein/Desktop/term8/cpmplex/projects/HW2/out.actor-movie') g=create_geaph(data,20) a=bipartite.average_clustering(g) print(a)
color_dict = {'0':'b','1':'r'} #print(my_graph.nodes(data=True)) # which includes the category data '''color_list =[] #]np.empty([num_segments+num_runs]) i=0 for node in my_graph.nodes(data=True): if node[1]['cat']=='segment': color_list.append('r') elif node[1]['cat']=='date': color_list.append('b') #node[1]['ncolor']) i+=1''' # let's look at some numberssss #X, Y = bipartite.sets(my_graph) print("The overall clustering coefficient is", bipartite.average_clustering(my_graph)) print("The clustering coefficient of the segment nodes is", bipartite.average_clustering(my_graph, segment_list)) print("The clustering coefficient of the dates is", bipartite.average_clustering(my_graph, dates[0:num_runs])) print("The clustering coefficient for each connected component is") for g in nx.connected_component_subgraphs(my_graph): print(bipartite.average_clustering(g)) '''nodes = g.nodes(data=True) print(type(nodes)) these_nodes = [node for node in nodes if node[1]['cat']=='date'] print("Date avg clust coeff is ", bipartite.average_clustering(g, these_nodes)) those_nodes = [node for node in nodes if node[1]['cat']=='segment'] print(bipartite.average_clustering(g, those_nodes)) ''' # size of the largest component print("The size of the largest connected components is", len(nx.node_connected_component(my_graph, 'Sheridan Road Climb')))
def test_average_path_graph(): G = nx.path_graph(4) assert_equal(bipartite.average_clustering(G, mode='dot'), 0.5) assert_equal(bipartite.average_clustering(G, mode='max'), 0.5) assert_equal(bipartite.average_clustering(G, mode='min'), 1)
print ("The 10 percentile is ", np.percentile(List_consignees_degrees,10)) print ("The 20 percentile is ", np.percentile(List_consignees_degrees,20)) print ("The 25 percentile is ", np.percentile(List_consignees_degrees,25)) print ("The 90 percentile is ", np.percentile(List_consignees_degrees,90)) print ("The 80 percentile is ", np.percentile(List_consignees_degrees,80)) print ("The 75 percentile is ", np.percentile(List_consignees_degrees,75)) print ("The 10 percentile is s", np.percentile(List_shippers_degrees,10)) print ("The 20 percentile is s", np.percentile(List_shippers_degrees,20)) print ("The 25 percentile is s", np.percentile(List_shippers_degrees,25)) print ("The 90 percentile is s", np.percentile(List_shippers_degrees,90)) print ("The 80 percentile is s", np.percentile(List_shippers_degrees,80)) print ("The 75 percentile is s", np.percentile(List_shippers_degrees,75)) print ("The number of connected components is", nx.number_connected_components(G)) print ("The ratio of shippers by consignees is", (nx.number_of_nodes(G)-len(List_consignees_degrees))/len(List_consignees_degrees)) print ("The ratio of edges by nodes is", nx.number_of_edges(G)/nx.number_of_nodes(G)) print ("The average bipartie clustering coefficient is", bipartite.average_clustering(G)) print ("The number of consignees working with one shipper is", nb_consignees_degree_one(G)) print ("The size of the max connected component is", nx.number_of_nodes(giant)) workbook = xlsxwriter.Workbook('summary.xlsx') worksheet = workbook.add_worksheet() worksheet.write(0,1, "Number of nodes") worksheet.write(1,1, nx.number_of_nodes(G)) worksheet.write(0,2, "Number of consignees") worksheet.write(1,2, len(List_consignees_degrees)) worksheet.write(0,3, "Number of shippers") worksheet.write(1,3, nx.number_of_nodes(G)-len(List_consignees_degrees)) worksheet.write(0,4, "Number of edges") worksheet.write(1,4, nx.number_of_edges(G)) worksheet.write(0,5, "Is the graph bipartite ?")
def test_average_path_graph(): G = nx.path_graph(4) assert bipartite.average_clustering(G, mode="dot") == 0.5 assert bipartite.average_clustering(G, mode="max") == 0.5 assert bipartite.average_clustering(G, mode="min") == 1
pd.Series(student_counts).describe() pd.Series(video_counts).describe() ######################### ### Build a network ### ######################### # One group is student IDs # The other group is video IDs G = nx.Graph() G.add_nodes_from(students, bipartite=0) G.add_nodes_from(videos, bipartite=1) G.add_edges_from( list(zip(combined['useraccount_id'].values, combined['video_id'].values))) cluster_all = bipartite.average_clustering(G) density_all = bipartite.density(G) # Get degrees for unprojected graph student_degree = G.degree(students) list_student_degree = [val for (node, val) in student_degree] plt.hist(list_student_degree) plt.xlabel('Number of Unique Videos') plt.ylabel('Number of Students') plt.show() pd.Series(list_student_degree).describe() video_degree = G.degree(videos) list_video_degree = [val for (node, val) in video_degree] plt.hist(list_video_degree) plt.xlabel('Number of Unique Accessing Students')
def test_average_path_graph(): G = nx.path_graph(4) assert bipartite.average_clustering(G, mode='dot') == 0.5 assert bipartite.average_clustering(G, mode='max') == 0.5 assert bipartite.average_clustering(G, mode='min') == 1