예제 #1
0
def main():
    for path in files:
        print(path)
        B = nx.Graph()
        G = nx.read_gml('graphs/' + path)
        B.add_nodes_from(relevant_subreddits, bipartite=0)
        nodeList = set([n for n in G.nodes()])
        for reddit in relevant_subreddits:
            if reddit in nodeList:
                nodeList.remove(reddit)
        B.add_nodes_from(list(nodeList), bipartite=1)
        # print(nx.number_of_nodes(G))
        # B.add_edges_from([e for e in G.edges()])
        edgeList = []
        for e in G.edges():
            if e[0] not in relevant_subreddits and e[
                    1] not in relevant_subreddits:
                continue
            edgeList.append(e)
        B.add_edges_from(edgeList)
        # print(nx.number_of_edges(G))
        # print(bipartite.is_bipartite(B))
        # print(bipartite.average_clustering(B))

        result = random.sample(list(nodeList), 500)

        print('Clustering Coefficient')
        print(bipartite.average_clustering(B, result))
        print('Density')
        print(bipartite.density(B, result))
예제 #2
0
def graph_metrics(m):
  '''
  Input: internal representation, n by L
  Return: 2-tuple
  - clustering coefficients of a bipartite graph built from m, a measure of local density of the connectivity
  ref: https://networkx.org/documentation/stable//reference/algorithms/generated/networkx.algorithms.bipartite.cluster.clustering.html#networkx.algorithms.bipartite.cluster.clustering
  - modularity: relative density of edges inside communities with respect to edges outside communities.
  ref: https://python-louvain.readthedocs.io/en/latest/api.html#community.modularity
  '''
  sM = scipy.sparse.csr_matrix(m)
  G = bipartite.matrix.from_biadjacency_matrix(sM)
  avg_c = bipartite.average_clustering(G, mode="dot")
  partition = community_louvain.best_partition(G)
  modularity = community.modularity(partition, G)

  return avg_c, modularity
    print("sa")
    B.add_edges_from(edges)
    print("sa")
    bi0 = list(bi0)
    bi1 = list(bi1)
    random.seed()
    for i in range((int)(percentage / 100 * len(bi0))):
        r = random.randint(1, len(bi0) - 1)
        print(i)
        if bi0[r] not in bi0_sample:
            bi0_sample.append(bi0[r])

    random.seed()
    for i in range((int)(percentage / 100 * len(bi1))):
        r = random.randint(1, len(bi1) - 1)
        print(i)
        if bi1[r] not in bi1_sample:
            bi1_sample.append(bi1[r])
    sample_node = sum([bi0_sample, bi1_sample], [])
    print("salam")
    g = B.subgraph(sample_node)
    return g




data = convert_txt_to_numpy('/Users/amirhossein/Desktop/term8/cpmplex/projects/HW2/out.actor-movie')
g=create_geaph(data,20)
a=bipartite.average_clustering(g)
print(a)
예제 #4
0
color_dict = {'0':'b','1':'r'}
#print(my_graph.nodes(data=True)) # which includes the category data

'''color_list =[] #]np.empty([num_segments+num_runs])
i=0
for node in my_graph.nodes(data=True):
	if node[1]['cat']=='segment':
		color_list.append('r')
	elif node[1]['cat']=='date':
		color_list.append('b') #node[1]['ncolor'])
	i+=1'''

# let's look at some numberssss
#X, Y = bipartite.sets(my_graph)
print("The overall clustering coefficient is", bipartite.average_clustering(my_graph))
print("The clustering coefficient of the segment nodes is", bipartite.average_clustering(my_graph, segment_list))
print("The clustering coefficient of the dates is", bipartite.average_clustering(my_graph, dates[0:num_runs]))
print("The clustering coefficient for each connected component is")
for g in nx.connected_component_subgraphs(my_graph):
	print(bipartite.average_clustering(g))

	'''nodes = g.nodes(data=True)
	print(type(nodes))
	these_nodes = [node for node in nodes if node[1]['cat']=='date']
	print("Date avg clust coeff is ", bipartite.average_clustering(g, these_nodes)) 
	those_nodes = [node for node in nodes if node[1]['cat']=='segment']
	print(bipartite.average_clustering(g, those_nodes)) '''

# size of the largest component
print("The size of the largest connected components is", len(nx.node_connected_component(my_graph, 'Sheridan Road Climb')))
예제 #5
0
def test_average_path_graph():
    G = nx.path_graph(4)
    assert_equal(bipartite.average_clustering(G, mode='dot'), 0.5)
    assert_equal(bipartite.average_clustering(G, mode='max'), 0.5)
    assert_equal(bipartite.average_clustering(G, mode='min'), 1)
예제 #6
0
def test_average_path_graph():
    G = nx.path_graph(4)
    assert_equal(bipartite.average_clustering(G, mode='dot'), 0.5)
    assert_equal(bipartite.average_clustering(G, mode='max'), 0.5)
    assert_equal(bipartite.average_clustering(G, mode='min'), 1)
예제 #7
0
print ("The 10 percentile is ", np.percentile(List_consignees_degrees,10))
print ("The 20 percentile is ", np.percentile(List_consignees_degrees,20))
print ("The 25 percentile is ", np.percentile(List_consignees_degrees,25))
print ("The 90 percentile is ", np.percentile(List_consignees_degrees,90))
print ("The 80 percentile is ", np.percentile(List_consignees_degrees,80))
print ("The 75 percentile is ", np.percentile(List_consignees_degrees,75))
print ("The 10 percentile is s", np.percentile(List_shippers_degrees,10))
print ("The 20 percentile is s", np.percentile(List_shippers_degrees,20))
print ("The 25 percentile is s", np.percentile(List_shippers_degrees,25))
print ("The 90 percentile is s", np.percentile(List_shippers_degrees,90))
print ("The 80 percentile is s", np.percentile(List_shippers_degrees,80))
print ("The 75 percentile is s", np.percentile(List_shippers_degrees,75))
print ("The number of connected components is", nx.number_connected_components(G))
print ("The ratio of shippers by consignees is", (nx.number_of_nodes(G)-len(List_consignees_degrees))/len(List_consignees_degrees))
print ("The ratio of edges by nodes is", nx.number_of_edges(G)/nx.number_of_nodes(G))
print ("The average bipartie clustering coefficient is", bipartite.average_clustering(G))
print ("The number of consignees working with one shipper is", nb_consignees_degree_one(G))
print ("The size of the max connected component is", nx.number_of_nodes(giant))

workbook = xlsxwriter.Workbook('summary.xlsx')
worksheet = workbook.add_worksheet()

worksheet.write(0,1, "Number of nodes")
worksheet.write(1,1, nx.number_of_nodes(G))
worksheet.write(0,2, "Number of consignees")
worksheet.write(1,2, len(List_consignees_degrees))
worksheet.write(0,3, "Number of shippers")
worksheet.write(1,3, nx.number_of_nodes(G)-len(List_consignees_degrees))
worksheet.write(0,4, "Number of edges")
worksheet.write(1,4, nx.number_of_edges(G))
worksheet.write(0,5, "Is the graph bipartite ?")
예제 #8
0
def test_average_path_graph():
    G = nx.path_graph(4)
    assert bipartite.average_clustering(G, mode="dot") == 0.5
    assert bipartite.average_clustering(G, mode="max") == 0.5
    assert bipartite.average_clustering(G, mode="min") == 1
예제 #9
0
pd.Series(student_counts).describe()
pd.Series(video_counts).describe()

#########################
###  Build a network  ###
#########################

# One group is student IDs
# The other group is video IDs
G = nx.Graph()
G.add_nodes_from(students, bipartite=0)
G.add_nodes_from(videos, bipartite=1)
G.add_edges_from(
    list(zip(combined['useraccount_id'].values, combined['video_id'].values)))

cluster_all = bipartite.average_clustering(G)
density_all = bipartite.density(G)

# Get degrees for unprojected graph
student_degree = G.degree(students)
list_student_degree = [val for (node, val) in student_degree]
plt.hist(list_student_degree)
plt.xlabel('Number of Unique Videos')
plt.ylabel('Number of Students')
plt.show()
pd.Series(list_student_degree).describe()

video_degree = G.degree(videos)
list_video_degree = [val for (node, val) in video_degree]
plt.hist(list_video_degree)
plt.xlabel('Number of Unique Accessing Students')
예제 #10
0
def test_average_path_graph():
    G = nx.path_graph(4)
    assert bipartite.average_clustering(G, mode='dot') == 0.5
    assert bipartite.average_clustering(G, mode='max') == 0.5
    assert bipartite.average_clustering(G, mode='min') == 1