Ejemplo n.º 1
0
def detectBetweenness(G, numClusters, sites, bipartite):
	Gnew = copy.deepcopy(G)
	numComponents = nx.number_connected_components(G)

	betweenness = nx.edge_betweenness_centrality(Gnew,  weight='capacity')
	pickle.dump(betweenness, open("betweennessUnipartite.p", "wb"))
	#betweenness = pickle.load("betweenessUnipartite.p", "rb")
	
	while (numComponents < numClusters):
		print "num components is now ",  numComponents ### REMEMBER TO DELETE THIS ###

		# calculate betweenness of each edge
		betweenness = nx.edge_betweenness_centrality(Gnew,  weight='capacity')

		## identify and remove the edge with highest betweenness
		max_ = max(betweenness.values())
		for k, v in betweenness.iteritems():
			if float(v) == max_:
				G.remove_edge(k[0], k[1])
		numComponents = nx.number_connected_components(G)

	clusters = {}
	i=0
	j = 0
	for component in list(nx.connected_components(Gnew)):
		for node in component:
			if node in sites:
				clusters[node] = i
				j +=1
		print j, "Nodes in cluster ", i
		j = 0
		i += 1

	return clusters
Ejemplo n.º 2
0
	def find_groups_girvan_newman(self, num_groups):
		if (num_groups==1):
			return set([self.G])
		elif (num_groups in self.groupCache):
			#return a copy of the stored set
			return self.groupCache[num_groups].copy()
		elif (num_groups > len(self.G.nodes())):
			return self.find_groups(len(self.G.nodes))
		#returns set of subgraphs
		previous_partition = self.find_groups(num_groups-1)

		#map subgraph to betweenness dict (a dict mapping edges to betweenness)
		betweenness_map = {subgraph:nx.edge_betweenness_centrality(subgraph) for subgraph in previous_partition}
		#Map subgraph to the (edge, betweenness) pair of the max betweenness in that subgraph
		betweenness_max_map = {e[0]:max(e[1].items(), key=lambda(x):x[1]) for e in betweenness_map.items() if len(e[0].nodes()) > 1}

		#Track removed edges to add them again at end of algorithm
		removed_edges = []

		#Loop until a subgraph is split
		while True:
			print "Removing edge"
			#Identify the subgraph and edge with max betweenness
			target_subgraph_edge=max(betweenness_max_map.items(), key=lambda(x):x[1][1])
			target_subgraph = target_subgraph_edge[0]
			target_edge= target_subgraph_edge[1][0]
			max_betweenness = -1
			#Remove the edge (temporarily)
			target_subgraph.remove_edge(target_edge[0], target_edge[1])
			removed_edges.append(target_edge)
			connected_components = nx.connected_components(target_subgraph)
			if len(connected_components) > 1:
				#Removing one edge from a connected component will result in max 2 connected components
				new_subgraph_1 = target_subgraph.subgraph(connected_components[0])
				new_subgraph_2 = target_subgraph.subgraph(connected_components[1])
				#Repair removed edges in target_subgraph
				target_subgraph.add_edges_from(removed_edges)
				#Remove target subgraph
				previous_partition.discard(target_subgraph)
				#Add new subgraphs
				previous_partition.add(new_subgraph_1)
				previous_partition.add(new_subgraph_2)
				#Store result
				self.groupCache[num_groups] = previous_partition
				return previous_partition.copy()
			else:
				#Recalculate betweenness, max betweenness for target subgraph
				target_betweenness = nx.edge_betweenness_centrality(target_subgraph)
				betweenness_map[target_subgraph] = target_betweenness
				betweenness_max_map[target_subgraph] = max(target_betweenness.items(), key=lambda(x):x[1])
			#Repeat loop
			continue
Ejemplo n.º 3
0
    def run_n(self, n):
        # Until there is no edge in the graph
        while len(self.G.edges()) != 0:
            # Find the most betweenness edge
            edge = max(nx.edge_betweenness_centrality(self.G).items(),
                       key=lambda item: item[1])[0]
            # Remove the most betweenness edge
            self.G.remove_edge(edge[0], edge[1])
            # Get the the connected nodes
            components = [
                list(c) for c in list(nx.connected_components(self.G))
            ]
            # Divide the graph into n parts.
            if len(components) <= n:
                # Compute Q
                currentQ = self.calculateQ(components, self.G_copy)
                if currentQ not in self.all_Q:
                    self.all_Q.append(currentQ)
                if currentQ > self.max_Q:
                    self.max_Q = currentQ
                    self.partition = components

        print('The number of communities:', len(self.partition))
        print('Max_Q:', self.max_Q)
        print(self.partition)
        return self.partition, self.all_Q, self.max_Q
Ejemplo n.º 4
0
def plot_edge_btwn(G, bins=20):
    """
    Plot the edge-betweenness distributions.

    Args:
        G: networkx graph object
    Returns:
        figure handle & axes array.
    """
    # Get edge-betweenness dictionary
    edge_btwn_dict = nx.edge_betweenness_centrality(G)

    # Sort edge-betweenness dictionary by edge-betweenness values
    edge_btwn_labels_sorted, edge_btwn_vec_sorted = \
        network_compute.get_ranked(edge_btwn_dict)

    # Open figure & axes
    fig, axs = plt.subplots(2, 1)
    # Plot histogram
    axs[0].hist(edge_btwn_vec_sorted, bins)
    axs[0].set_ylabel('Occurrences')
    axs[0].set_xlabel('Edge-betweenness')

    # Plot sorted node between values
    axs[1].scatter(np.arange(len(edge_btwn_vec_sorted)),
                   edge_btwn_vec_sorted, s=20, c='r')
    axs[1].set_xlabel('Area')
    axs[1].set_ylabel('Edge-betweenness')

    return fig, axs
 def f28(self):
     start = 0
     c_vals = nx.edge_betweenness_centrality(self.G).values()
     res = sum(c_vals)
     stop = 0
     # self.feature_time.append(stop - start)
     return res
 def test_C4(self):
     """Edge betweenness centrality: C4"""
     G=nx.cycle_graph(4)
     b=nx.edge_betweenness_centrality(G, weight=None, normalized=True)
     b_answer={(0, 1):2,(0, 3):2, (1, 2):2, (2, 3): 2}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n],b_answer[n]/6.0)
    def run(self):
        while len(self.G.edges()) != 0:
            edges = {}
            edges_betweenness_centrality = nx.edge_betweenness_centrality(
                self.G)

            #for e, ebc in edges_betweenness_centrality.items():
            #print(print(self.G.get_edge_data(e[0],e[1])))
            for e, ebc in edges_betweenness_centrality.items():
                #print(self.G.get_edge_data(e[0],e[1]))
                edge_weight = ebc / self.G.get_edge_data(e[0], e[1])['weight']
                edges[e] = edge_weight

            edge = max(edges.items(), key=lambda item: item[1])[0]
            self.G.remove_edge(edge[0], edge[1])
            components = [
                list(c) for c in list(nx.connected_components(self.G))
            ]
            if len(components) != len(self.partition):
                #compute the Q
                cur_Q = self.cal_Q(components, self.G_copy)
                if cur_Q not in self.all_Q:
                    self.all_Q.append(cur_Q)
                if cur_Q > self.max_Q:
                    self.max_Q = cur_Q
                    self.partition = components

        print('-----------the divided communities and the Max Q------------')
        print('The number of Communites:', len(self.partition))
        print('Max_Q:', self.max_Q)
        print(self.partition)
        return self.partition, self.all_Q, self.max_Q
Ejemplo n.º 8
0
def divisive_approach(graph):
    """
    a modularity-based algorithm, by deleting the weakest links in graph;
    edge betweenness is the negatively relevant strength score for a link
    :param graph: a nx.Graph
    :return: labels of each node
    """
    time_start = time.time()
    print("Calculating communities with DA...")
    g = clone_graph(graph)
    partitions = [[n for n in g.nodes()]]
    labels = list(g.nodes())
    max_q = 0.0
    while len(g.edges()) > 0:
        edge = max(nx.edge_betweenness_centrality(g).items(),
                   key=lambda item: item[1])[0]
        g.remove_edge(edge[0], edge[1])
        components = [list(c) for c in list(nx.connected_components(g))]
        if len(components) != len(partitions):
            q = cal_Q(components, graph)
            if q > max_q:
                max_q = q
                partitions = components
    for i in range(len(partitions)):
        for node in partitions[i]:
            labels[node] = i
    time_end = time.time()
    print("Calculation time:", time_end - time_start, "seconds")
    return labels
Ejemplo n.º 9
0
    def run(self):
        #Until there is no edge in the graph
        while len(self.G.edges()) != 0:
            #Find the most betweenness edge
            edge = max(nx.edge_betweenness_centrality(self.G).items(),
                       key=lambda item: item[1])[0]
            #Remove the most betweenness edge
            self.G.remove_edge(edge[0], edge[1])
            #List the the connected nodes
            components = [
                list(c) for c in list(nx.connected_components(self.G))
            ]
            if len(components) != len(self.partition):
                #compute the Q
                cur_Q = self.cal_Q(components, self.G_copy)
                if cur_Q not in self.all_Q:
                    self.all_Q.append(cur_Q)
                if cur_Q > self.max_Q:
                    self.max_Q = cur_Q
                    self.partition = components

        print('-----------the Max Q and divided communities-----------')
        print('The number of Communites:', len(self.partition))
        print("Communites:", self.partition)
        print('Max_Q:', self.max_Q)
        return self.partition, self.all_Q, self.max_Q
 def test_K5(self):
     """Edge betweenness centrality: K5"""
     G=nx.complete_graph(5)
     b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False)
     b_answer=dict.fromkeys(G.edges(),1)
     for n in sorted(G.edges()):
         assert_almost_equal(b[n],b_answer[n])
def cluster_edge_betweenness(iterations, G):
    for i in range(iterations):
        print('Iteration ', i + 1, ' of ', iterations)
        eb = nx.edge_betweenness_centrality(G, 10)
        max_eb = max(eb, key=eb.get)
        G.remove_edge(max_eb[0], max_eb[1])
    return G
    def run(self):
        while len(self.G.edges()) != 0:
            # nx.edge_betweenness_centrality 返回的是类似于 {('C', 'F'): 0.4} 这种结构
            # 计算每天边界数,寻找边届数最大的边
            edge = max(nx.edge_betweenness_centrality(self.G).items(),
                       key=lambda item: item[1])[0]
            # 移除边界数最大的边
            self.G.remove_edge(edge[0], edge[1])
            # List the the connected nodes
            components = [
                list(c) for c in list(nx.connected_components(self.G))
            ]
            if len(components) != len(self.partition):
                # compute the Q
                # nx.algorithms.community.modularity(self.G_copy, components) 可以直接调用networkx的库函数 等价于 call_Q()
                cur_Q = self.cal_Q(components, self.G_copy)
                if cur_Q not in self.all_Q:
                    self.all_Q.append(cur_Q)
                    # 还可以在这一步做一个map换成call_Q与components的关系
                if cur_Q > self.max_Q:
                    self.max_Q = cur_Q
                    self.partition = components

        print('-----------the Max Q and divided communities-----------')
        print('The number of Communites:', len(self.partition))
        print("Communites:", self.partition)
        print('Max_Q:', self.max_Q)
        return self.partition, self.all_Q, self.max_Q
 def test_P4(self):
     """Edge betweenness centrality: P4"""
     G=nx.path_graph(4)
     b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False)
     b_answer={(0, 1):3,(1, 2):4, (2, 3):3}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n],b_answer[n])
def CalculateBetweeness(graph):

    BetweenValue = nx.edge_betweenness_centrality(graph, normalized=True, k=None, weight=None, seed=None)

    graph.remove_edges_from([k for k, v in BetweenValue.iteritems() if v == max(BetweenValue.values())])

    return graph
Ejemplo n.º 15
0
 def test_normalized_P4(self):
     """Edge betweenness centrality: P4"""
     G = nx.path_graph(4)
     b = nx.edge_betweenness_centrality(G, weight=None, normalized=True)
     b_answer = {(0, 1): 3, (1, 2): 4, (2, 3): 3}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n], b_answer[n] / 6.0)
Ejemplo n.º 16
0
def file_to_dot(infile):
    interactions = defaultdict(lambda: 0)
    es = list(edges(infile))
    for a, b, users in es:
        interactions[a, b] = users
    keys, ratio = compute_ratios(interactions,
                                 lambda k: interactions[k, k] > 5)

    G = nx.Graph()
    for a, b, users in es:
        if a > b and a in keys and b in keys and users > 0 and ratio[a, b] > 0:
            rat = (ratio[a, b] + ratio[b, a]) / 2
            G.add_edge(a, b, {
                'ratio': rat,
                'users': users,
                'connection': rat**-1
            })

    btwn = nx.edge_betweenness_centrality(G, weight='connection')
    GG = nx.Graph()
    GG.add_edges_from([(a, b, {
        'weight': val
    }) for (a, b), val in btwn.items()])
    #   GG.add_edges_from([(a, b, merge({'betweenness':btwn[a,b]}, G[a][b]))
    #                       for a,b in G.edges_iter()])

    for node in GG.nodes_iter():
        GG.node[node]['height'] = GG.node[node]['width'] = size(G, node)
        GG.node[node]['color'] = color(G, node)

    Gtree = nx.minimum_spanning_tree(GG)
    Gtree_dot = nx.to_pydot(Gtree)

    return Gtree_dot.to_string()
Ejemplo n.º 17
0
def list_edge_betweenness(G):
    edge={}
    edges_list=list(G.edges())
    edge_betweenness=nx.edge_betweenness_centrality(G, normalized=True, weight='weight')
    for i in edge_betweenness.keys():
        edge[i]=edge_betweenness[i]
    return edge
Ejemplo n.º 18
0
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype):
    # NOTE: Networkx API does not allow passing a list of vertices
    # And the sampling is operated on Gnx.nodes() directly
    # We first mimic acquisition of the nodes to compare with same sources
    random.seed(seed)  # It will be called again in nx's call
    sources = random.sample(Gnx.nodes(), k)

    # NOTE: Since we sampled the Networkx graph, the sources are already
    # external ids, so we don't need to translate to external ids for
    # cugraph

    df = cugraph.edge_betweenness_centrality(
        G,
        k=sources,
        normalized=normalized,
        weight=weight,
        result_dtype=result_dtype,
    )

    nx_bc_dict = nx.edge_betweenness_centrality(
        Gnx, k=k, normalized=normalized, weight=weight, seed=seed
    )

    nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(
        columns={"betweenness_centrality": "ref_bc"}, copy=False
    )

    merged_df = df.merge(nx_df, on=['src', 'dst']).rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)

    return merged_df
Ejemplo n.º 19
0
def get_communities(graph):
	betweenness = nx.edge_betweenness_centrality(graph)
	sorted_betweeness = [x[0] for x in sorted(betweenness.items(), key = lambda x : x[1], reverse = True)]
	best_partitions = []
	max_modularity = -1.0
	graph_copy = graph.copy()
	while sorted_betweeness:
		communities = [list(x) for x in nx.connected_components(graph_copy)]
		partitions = {}
		for i in range(len(communities)):
			for node in communities[i]:
				partitions[node] = i
		modularity = community.modularity(partitions, graph_copy)
		if modularity > max_modularity:
			best_partitions = communities
			max_modularity = modularity
		elif modularity <= max_modularity:
			break;
		graph_copy.remove_edge(*sorted_betweeness[0])
		del sorted_betweeness[0]
	for partition in best_partitions:
		print sorted(partition)
	val_map = {}
	for partition in best_partitions:
		value = random.random()
		while value in val_map.values():
			value = random.random()
		for node in partition:
			val_map[node] = value
	values = [val_map.get(node) for node in graph.nodes()]
	nx.draw_spring(graph, node_color = values, node_size = 500, with_labels = True)
	plt.savefig(sys.argv[2])
Ejemplo n.º 20
0
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype):
    df = cugraph.edge_betweenness_centrality(
        G,
        k=k,
        normalized=normalized,
        weight=weight,
        seed=seed,
        result_dtype=result_dtype,
    )

    assert (
        df["betweenness_centrality"].dtype == result_dtype
    ), "'betweenness_centrality' column has not the expected type"

    nx_bc_dict = nx.edge_betweenness_centrality(
        Gnx, k=k, normalized=normalized, seed=seed, weight=weight
    )

    nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(
        columns={"betweenness_centrality": "ref_bc"}, copy=False
    )

    merged_df = df.merge(nx_df, on=['src', 'dst']).rename(
        columns={"betweenness_centrality": "cu_bc"}, copy=False
    ).reset_index(drop=True)

    return merged_df
Ejemplo n.º 21
0
def girvan_newman_algorithm(G, weight):
    """ G는 원래 네트워크 g는 Edge를 한개씩 끊어나갈 네트워크 """
    g = G.copy()
    """ initial """
    step = 0  # step
    log_step = []  # step 기록
    log_modularity = []  # modularity 기록
    old_max_m = 0  # 이전 최대 modularity 기억
    k = sorted(nx.connected_components(G), key=len,
               reverse=True)  # k 는 모두 연결되어있는 Community를 노드로 나타낸 값
    m = community.modularity(G, communities=k, weight=weight)  # modularity
    max_step = 0  # max_step은 modularity가 최대일 때 step값 기록용
    """ Girvan-Newman algorithm """
    while len(g.edges()) > 0:
        k = sorted(nx.connected_components(g), key=len,
                   reverse=True)  # 커뮤니티 추출
        m = community.modularity(G, communities=k,
                                 weight=weight)  # 추출된 커뮤니티의 modularity 계산
        if m > old_max_m:  # 이전 최대 modularity보다 현재 modularity가 높을 경우 기록
            max_step = step
            old_max_m = m
        log_step = log_step + [step]  # 로깅용
        log_modularity = log_modularity + [m]  # 로깅용
        print("step: ", step, "  modularity: ", m)
        """ remove edge """
        step = step + 1
        betweenness = nx.edge_betweenness_centrality(
            g, weight=weight)  # betweennes centrality 계산
        max_edge = max(
            betweenness,
            key=betweenness.get)  # betweeness centrality가 가장 큰 Edge 선택
        g.remove_edge(max_edge[0], max_edge[1])  # Edge 추출

    return log_step, log_modularity, max_step
Ejemplo n.º 22
0
def test_edge_betweenness_centrality_nx(
        graph_file,
        directed,
        edgevals
):
    Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals)
    assert nx.is_directed(Gnx) == directed

    nx_bc = nx.edge_betweenness_centrality(Gnx)
    cu_bc = cugraph.edge_betweenness_centrality(Gnx)

    # Calculating mismatch
    networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0])
    cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0])
    err = 0

    assert len(networkx_bc) == len(cugraph_bc)
    for i in range(len(cugraph_bc)):
        if (
            abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01
            and cugraph_bc[i][0] == networkx_bc[i][0]
        ):
            err = err + 1
            print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}")
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_bc))
Ejemplo n.º 23
0
 def test_K5(self):
     """Edge betweenness centrality: K5"""
     G=nx.complete_graph(5)
     b=nx.edge_betweenness_centrality(G, weight=None, normalized=False)
     b_answer=dict.fromkeys(G.edges(),1)
     for n in sorted(G.edges()):
         assert_almost_equal(b[n],b_answer[n])
Ejemplo n.º 24
0
def EBGirvanNewman(G):

    print("Inside EBGirvanNewman")
    initcomp = nx.number_connected_components(G)
    ncomp = initcomp
    print("Initial ncomp, ", ncomp)
    while ncomp <= initcomp:
        bw = nx.edge_betweenness_centrality(G, weight='weight')
        #print(bw)
        #find the edge with max centrality
        max_ = max(bw.values())
        print(".....................", max_)
        #find the edge with the highest centrality and remove all of them-more than one
        #for k, v in bw.iteritems():
        for k, v in bw.items():
            if float(v) == max_:
                G.remove_edge(k[0], k[1])
                global k1
                k1 = k[1]
                global k0
                k0 = k[0]
        #recalculate the no of components
        ncomp = nx.number_connected_components(G)
        print("No. of components", ncomp)
    print("Over............")
Ejemplo n.º 25
0
def gRa(g, w):
    '''w为图中的边数,表示经过减边p扰动后仍然留在数据中的边数'''
    tg = g.copy()
    Rq = nx.to_scipy_sparse_matrix(g)
    Rq = Rq.toarray()

    bw = nx.edge_betweenness_centrality(g, normalized=False)
    norm = sum(bw.values())
    e_num = len(g.edges())

    n = len(g)
    N = (n * (n - 1)) / 2
    for k, v in bw.items():
        g.add_edge(*k, weight=v)
#    print g.edges(data=True)
    R = nx.to_scipy_sparse_matrix(g, weight='weight')
    Rp = R.toarray()

    Rp = w * Rp * 2.0 / Rp.sum()

    q = float(e_num - w) / (N - e_num)

    for i, each in enumerate(Rq):
        for j, e in enumerate(each):
            if e == 0:
                Rp[i, j] = q  # 超级绕采用特别方式在Rp中加入Rq
    for i in range(n):
        Rp[i,i]=0 #去除对角线上的q
    return Rp
Ejemplo n.º 26
0
    def run(self):
        # Until there is no edge in the graph
        while len(self.G.edges()) != 0:
            # Find the most betweenness edge
            edge = max(nx.edge_betweenness_centrality(self.G).items(),
                       key=lambda item: item[1])[0]
            # Remove the most betweenness edge
            self.G.remove_edge(edge[0], edge[1])
            # Get the the connected nodes
            components = [
                list(c) for c in list(nx.connected_components(self.G))
            ]
            # When the dividing is needed, this is for finding the maxQ and record it while trying.
            if len(components) != len(self.partition):
                # Compute Q
                currentQ = self.calculateQ(components, self.G_copy)
                if currentQ not in self.all_Q:
                    self.all_Q.append(currentQ)
                if currentQ > self.max_Q:
                    self.max_Q = currentQ
                    self.partition = components

        print('The number of communities:', len(self.partition))
        print('Max_Q:', self.max_Q)
        print(self.partition)
Ejemplo n.º 27
0
def find_disjoint_graphs(my_graph):
    #Dictionary of edges with the calculated value of betweenness centrality
    edgeList = nx.edge_betweenness_centrality(my_graph)

    maxEdgeBetweenness = 0
    edgeNodes = ()

    # Loop over items and unpack each item, find maxEdgeBetweenness among all items.
    for node_id, edgeBetweennessVal in edgeList.items():
        #print("EdgeBetweenness = %f " % edgeBetweennessVal)
        #print("EdgeNodes = %s" % (node_id,))
        if edgeBetweennessVal > maxEdgeBetweenness:
            maxEdgeBetweenness = edgeBetweennessVal
            edgeNodes = node_id
    print("Highest betweenness is %f - for the edge %s" % (maxEdgeBetweenness, edgeNodes,))

    #Remove the edge with highest betweenness
    my_graph.remove_edge(edgeNodes[0], edgeNodes[1])
    print("Removed edge %s" % (edgeNodes,))
    #Add the removed edge to the edges_removed list
    edges_removed.append(edgeNodes)

    num_of_connected_components = nx.number_connected_components(my_graph)
    print("Number of connected components(sub-graphs/communities) after removing edge %s = %d" % (edgeNodes,num_of_connected_components))
    G = my_graph
    # Draw and show the graph, with labels
    nx.draw_networkx(my_graph, pos=None, with_labels=True)
    plt.show()
Ejemplo n.º 28
0
def EBC_weights(g):
    w = nx.edge_betweenness_centrality(g)
    edges = [(u, v, w[(u, v)]) for u, v in w]
    _g = g.copy()
    _g.add_weighted_edges_from(edges)

    return nx.to_numpy_array(_g)
Ejemplo n.º 29
0
def girvan_newman_step(graph):
    '''
    INPUT: Graph G
    OUTPUT: None

    Run one step of the Girvan-Newman community detection algorithm.
    Afterwards, the graph will have one more connected component.
    '''
    size = nx.number_connected_components(graph)
    edges = graph.number_of_edges()
    biggroup = len(max(nx.connected_components(graph), key=len))
    cur = 0
    while cur <= size:
        most_connected = Counter(
            nx.edge_betweenness_centrality(graph,
                                           weight='sim')).most_common(1)[0][0]
        node1 = most_connected[0]
        node2 = most_connected[1]
        outitems = (node1, node2, size, edges, biggroup, time())
        outs = '%d, %d, %d, %d, %d, %f\n' % outitems
        print outs
        sys.stdout.flush()
        #print most_connected; sys.stdout.flush()
        graph.remove_edge(*most_connected)
        cur = nx.number_connected_components(graph)
Ejemplo n.º 30
0
def whole_graph_metrics(graph, weighted=False):
    graph_metrics = {}

    # Shortest average path length
    graph_metrics['avg_shortest_path'] = \
        nx.average_shortest_path_length(graph, weight=weighted)

    # Average eccentricity
    ecc_dict = nx.eccentricity(graph)
    graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values()))

    # Average clustering coefficient
    # NOTE: Option to include or exclude zeros
    graph_metrics['avg_ccoeff'] = \
        nx.average_clustering(graph, weight=weighted, count_zeros=True)

    # Average node betweeness
    avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_node_btwn'] = \
        np.mean(np.array(avg_node_btwn_dict.values()))

    # Average edge betweeness
    avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True)
    graph_metrics['avg_edge_btwn'] = \
        np.mean(np.array(avg_edge_btwn_dict.values()))

    # Number of isolates
    graph_metrics['isolates'] = len(nx.isolates(graph))

    return graph_metrics
Ejemplo n.º 31
0
 def test_balanced_tree(self):
     """Edge betweenness centrality: balanced tree"""
     G = nx.balanced_tree(r=2, h=2)
     b = nx.edge_betweenness_centrality(G, weight="weight", normalized=False)
     b_answer = {(0, 1): 12, (0, 2): 12, (1, 3): 6, (1, 4): 6, (2, 5): 6, (2, 6): 6}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n], b_answer[n])
Ejemplo n.º 32
0
	def communitySplits(self, graph):
		"""
		Compute the splits for the formation of communities. 

		Arguments:
			graph -  A networkx graph of digraph. 

		Returns:
			The graph with weak edges removed. 	
		"""

		nConnComp = nx.number_connected_components(graph)
		nComm = nConnComp

		while (nComm <= nConnComp):
			betweenness = nx.edge_betweenness_centrality(graph)
			if (len(betweenness.values()) != 0 ):
				max_betweenness = max(betweenness.values())
			else:
				break	
			for u,v in betweenness.iteritems():
				if float(v) == max_betweenness:
					graph.remove_edge(u[0], u[1])
			nComm = nx.number_connected_components(graph)			
		return graph		
Ejemplo n.º 33
0
def edge_betweeness_centrality(X):
    """
    based on networkx function: edge_betweenness_centrality
    """
    XX = np.zeros(X.shape)
    for i, value in enumerate(X):
        adj_mat = value.reshape((np.sqrt(len(value)),-1))
        adj_mat = (adj_mat - np.min(adj_mat)) / (np.max(adj_mat) - np.min(adj_mat))
        adj_mat = 1 - adj_mat

#        th = np.mean(adj_mat) + 0.1
#        adj_mat = np.where(adj_mat < th, adj_mat, 0.)

        percent, th, adj_mat, triu = percentage_removed(adj_mat, 0.43) # 43 #63 #73
        print("percent = {0}, threshold position = {1}, threshold = {2}\n".format(percent, th, triu[th]))

        g = nx.from_numpy_matrix(adj_mat)
        print "Graph Nodes = {0}, Graph Edges = {1} ".format(g.number_of_nodes(), g.number_of_edges())
        print "\nEdge kept ratio, {0}".format(float(g.number_of_edges())/((g.number_of_nodes()*(g.number_of_nodes()-1))/2))

        bet_cent = nx.edge_betweenness_centrality(g, weight = 'weight', normalized = True)
        edge_cent = np.zeros(adj_mat.shape)

        for k in bet_cent:
            edge_cent[k[0],k[1]] = bet_cent[k]
        XX[i] = edge_cent.reshape(-1)
        print "graph {0} => mean {1}, min {2}, max {3}".format(i, np.mean(XX[i]), np.min(XX[i]), np.max(XX[i]))

    return XX
Ejemplo n.º 34
0
 def test_C4(self):
     """Edge betweenness centrality: C4"""
     G = nx.cycle_graph(4)
     b = nx.edge_betweenness_centrality(G, weight=None, normalized=True)
     b_answer = {(0, 1): 2, (0, 3): 2, (1, 2): 2, (2, 3): 2}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n], b_answer[n] / 6)
Ejemplo n.º 35
0
 def most_valuable_edge(G):
     """Returns the edge with the highest betweenness centrality
     in the graph `G`.
     """
     # We have guaranteed that the graph is non-empty, so this
     # dictionary will never be empty.
     betweenness = nx.edge_betweenness_centrality(G)
     return max(betweenness, key=betweenness.get)
Ejemplo n.º 36
0
def edge_betweenness(graph):
    #returns the maximum of edge_betweenness_centrality
    max = 0
    dict = nx.edge_betweenness_centrality(graph, True)
    for v in dict.values():
        if max < v:
            max = v
    return max
def edge_betweenness(edge_list=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list',
                     out_file=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-betweenness-10000.json'):
    G = nx.read_edgelist(edge_list, delimiter='\t')
    M = nx.edge_betweenness_centrality(G, k=10000)
    M_new = dict()
    for k, v in M.items():
        M_new[k[0]+'\t'+k[1]] = v
    json.dump(M_new, open(out_file, 'w'))
Ejemplo n.º 38
0
def edge_betweenness_centrality(f, ft, gnx):
    start = timer.start(ft, 'Edge Betweenness Centrality')
    result = nx.edge_betweenness_centrality(gnx)
    timer.stop(ft, start)

    for k in result:
        f.writelines(str(k) + ',' + str(result[k]) + '\n')
    return result
def edge_remove(G):
    dict1 = nx.edge_betweenness_centrality(G)
    list_tup = []
    for i in dict1:
        k = (i, dict1[i])
        list_tup.append(k)
    list_tup.sort(key=lambda x:x[1], reverse=True)
    return list_tup[0][0];
Ejemplo n.º 40
0
def split_graph(G):
    initial_communities = nx.number_connected_components(G)
    while initial_communities == nx.number_connected_components(G):
        betweenness = nx.edge_betweenness_centrality(G)
        edge_array = np.array([key for key, val in betweenness.items()])
        between_array = np.array([val for key, val in betweenness.items()])
        most_important_edge = edge_array[np.argmax(between_array)]
        G.remove_edge(most_important_edge[0], most_important_edge[1])
Ejemplo n.º 41
0
def _calc_edges_betweenness(G):
    # For each edge calculates the betweenness.
    edge_betweenness = dict.fromkeys(G.edges, 0)
    edge_betweenness_by_pair_of_nodes = nx.edge_betweenness_centrality(G)
    for key in edge_betweenness:
        edge_betweenness[key] = edge_betweenness_by_pair_of_nodes[key[:2]]
    ### G.edges[list(G.edges)[0]]['betweenness']
    return edge_betweenness
Ejemplo n.º 42
0
def bc_filter(G):
    g = G.copy()
    for u, v, d in g.edges(data=True):
        g[u][v]['inverse_weight'] = 1/d['weight']

    for (u, v), d in nx.edge_betweenness_centrality(g, weight='inverse_weight').items():
        g[u][v]['BC'] = d
    return g
Ejemplo n.º 43
0
def Betweenness_Edge_centrality(G, fn):
	'''
	G: Graph
	fn: funtion of n used to delimit the number of hotspots
	'''
	edgeBetDict = nx.edge_betweenness_centrality(G)
	edgeBetCentr = SortDictionary(edgeBetDict)[:int(fn)]
	return edgeBetCentr
Ejemplo n.º 44
0
def setCapacity(G, factor):
	if(G.number_of_edges() > 0):
		bb=nx.edge_betweenness_centrality(G, normalized=True, weight='weight')

		for edge in G.edges(data=True):
			edge[2]['capacity'] = (bb[(edge[0],edge[1])]*factor)

	return G
 def test_balanced_tree(self):
     """Edge betweenness centrality: balanced tree"""
     G=nx.balanced_tree(r=2,h=2)
     b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False)
     b_answer={(0, 1):12,(0, 2):12,
               (1, 3):6,(1, 4):6,(2, 5):6,(2,6):6}
     for n in sorted(G.edges()):
         assert_almost_equal(b[n],b_answer[n])
Ejemplo n.º 46
0
def get_betweenness_dictionary(edges,costs):
    ## This is slow, and uses a graph library.  (This is ALMOST what you're implementing for the programming assignment!)
    G = nx.Graph()
    G.add_edges_from(edges)
    for u,v in G.edges:
        G[u][v]['cost'] = costs[u][v]
    centralities = nx.edge_betweenness_centrality(G, normalized=False, weight='cost')
    return centralities
Ejemplo n.º 47
0
class Topology(object, nx.Graph):

    for N in [20, 30, 40]:
        for delta in [2, 4, 8]:
            fmax_vector = []
            for i in range(5):
                nodes = range(N)
                np.random.seed(5)
                degree = [delta for i in xrange(N)]
                G = nx.directed_havel_hakimi_graph(degree, degree)
                G = nx.DiGraph(G)

                bb = nx.edge_betweenness_centrality(G, normalized=False)
                nx.set_edge_attributes(G, 'weight', bb)
                nx.set_edge_attributes(G, 'capacity', bb)

                T_matrix = np.zeros((N, N))

                for s in nodes:
                    for d in nodes:
                        if s != d:
                            flow = np.random.uniform(0.5, 1.5)
                            T_matrix[s, d] = flow
                            if G.has_edge(s, d):
                                G.edge[s][d]['weight'] = flow
                                G.edge[s][d]['capacity'] = np.random.randint(
                                    8, 12)

                f_value = 0
                (p, a) = (0, 0)
                for i in range(N):
                    for j in range(N):
                        edges = nx.shortest_path(G, i, j, weight='weight')
                        for k in range(len(edges) - 1):
                            G.edge[edges[k]][edges[
                                k + 1]]['weight'] += T_matrix[i][j]
                    if i != j:
                        flow_value = nx.maximum_flow_value(G, i, j)
                        if flow_value > f_value:
                            f_value = flow_value
                            (p, a) = (i, j)

                fmax = 0
                (s_f, d_f) = (0, 0)

                for s in G.edge:
                    for d in G.edge[s]:
                        if G.edge[s][d]['weight'] > fmax:
                            fmax = G.edge[s][d]['weight']
                            (s_f, d_f) = (s, d)
                fmax_vector.append(fmax)
                tot_edges = G.number_of_edges()

            np.set_printoptions(precision=3)
            #print T_matrix
            #print tot_edges
            print 'N = ' + str(N) + ' D = ' + str(delta) + 'fmax = ' + str(
                np.mean(fmax_vector))  #+ ' flow = ' + str(flow_value)
Ejemplo n.º 48
0
def centralize_graph(graph, epb='lgth', efb='capa', ndg='capa', nec='capa', npr='capa'):
    """Compute edge centralities.

    Parameters
    ----------
    graph : original graph
    epb : edge property used for computation of edge path betweenness
    efb : "                                          flow betweenness
    ndg : "                                          degree centrality
    nec : "                                          eigenvector centrality
    npr : "                                          page rank

    Returns
    -------
    graphCentralities : graph with computed edge centralities

    """
    graphCentralities = graph.copy()
    edges = graphCentralities.edges(data=True)
    edgeCapacity = 1.0 * np.array([property['capa'] for node1, node2, property in edges])
    edgeCapacity /= edgeCapacity.sum()
    edgeLength = 1.0 / edgeCapacity
    for index, (node1, node2, property) in enumerate(edges):
        property['capa'] = edgeCapacity[index]
        property['lgth'] = edgeLength[index]
    edgeBetweenCentrality = nx.edge_betweenness_centrality(graphCentralities, weight=epb)
    edgeFlowBetweennessCentrality = nx.edge_current_flow_betweenness_centrality(graphCentralities, weight=efb)
    lineGraph = nx.line_graph(graphCentralities)
    degree = graphCentralities.degree(weight=ndg)
    for node1, node2, property in lineGraph.edges(data=True):
        intersectingNodes = list(set(node1).intersection(node2))[0]
        property[ndg] = degree[intersectingNodes]
    eigenvectorCentrality = nx.eigenvector_centrality_numpy(lineGraph, weight=ndg)
    pageRank = nx.pagerank(lineGraph, weight=ndg)
    degreeCentrality = dict(lineGraph.degree(weight=ndg))
    for index, (node1, node2, property) in enumerate(edges):
        edge = (node1, node2)
        if (edge in edgeBetweenCentrality.keys()):
            property['epb'] = edgeBetweenCentrality[edge]
        else:
            property['epb'] = edgeBetweenCentrality[edge[::-1]]
        if (edge in edgeFlowBetweennessCentrality.keys()):
            property['efb'] = edgeFlowBetweennessCentrality[edge]
        else:
            property['efb'] = edgeFlowBetweennessCentrality[edge[::-1]]
        if (edge in degreeCentrality.keys()):
            property['ndg'] = degreeCentrality[edge]
        else:
            property['ndg'] = degreeCentrality[edge[::-1]]
        if (edge in eigenvectorCentrality.keys()):
            property['nec'] = eigenvectorCentrality[edge]
        else:
            property['nec'] = eigenvectorCentrality[edge[::-1]]
        if (edge in pageRank.keys()):
            property['npr'] = pageRank[edge]
        else:
            property['npr'] = pageRank[edge[::-1]]
    return(graphCentralities)
Ejemplo n.º 49
0
        def most_valuable_edge(G):
            """Returns the edge with the highest betweenness centrality
            in the graph `G`.

            """
            # We have guaranteed that the graph is non-empty, so this
            # dictionary will never be empty.
            betweenness = nx.edge_betweenness_centrality(G)
            return max(betweenness, key=betweenness.get)
Ejemplo n.º 50
0
def compute_edge_betweenness(g):
    mp.weight_graph(g)
    ebc = nx.edge_betweenness_centrality(g,'weight')

    m = 0
    for v in ebc.values():
        if v>m:
            m = v

    return ebc,m    
Ejemplo n.º 51
0
 def edge_betweenness_centrality(self, k=None):
     """ Calculate the edge betweenness centrality of each pair of hyperedges
         in mobile network. The algorithm uses `distance` to weight each segment.
     """
     road_bw = nx.edge_betweenness_centrality(self.graph, k=k, weight='distance')
     mobile_bw = {}
     for (source, target), betweenness in road_bw.items():
         if source in self.coordmapr and target in self.coordmapr:
             mobile_bw[(self.coordmapr[source], self.coordmapr[target])] = betweenness
     return mobile_bw
Ejemplo n.º 52
0
 def find_best_edge(G0):
     """
     Networkx implementation of edge_betweenness
     returns a dictionary. Make this into a list,
     sort it and return the edge with hoghest betweenness.
     """
     eb = nx.edge_betweenness_centrality(G0)
     eb_il = eb.items()
     eb_il.sort(key=lambda x: x[1], reverse=True)
     return eb_il[0][0]
def Girvannewman(G):
    initialcomp = nx.number_connected_components(G)
    '''totalnumcomp = initialcomp
    while totalnumcomp <= initialcomp:'''
    bw = nx.edge_betweenness_centrality(G)
    maximum_value = max(bw.values())
    for key, value in bw.iteritems():
        if float(value) == maximum_value:
            G.remove_edge(key[0],key[1])
    totalnumcomp = nx.number_connected_components(G)
Ejemplo n.º 54
0
def process_data(denom=100000, round=0):
	f = csv.reader(open("../applab_new_6.csv", 'rb'), delimiter=',')
	db = nx.DiGraph()
	full_users = set()
	i = 0
	uniquect = 0
	for line in f:
		if i % 100000 == 0 : print "processed", i, "lines"
		if i == 1000: break
		sender, receiver, date, time, duration, cost, location, region = map(lambda x: x.strip(), line)
		if sender not in full_users:
			uniquect += 1
			full_users.add(sender)
			if uniquect <= 2: #% denom - round == 0:
				db.add_node(sender)
				if db.has_node(receiver) == False:
					db.add_node(receiver)
		else:
			if db.has_node(receiver) == False:
				db.add_node(receiver)

		if db.has_edge(sender, receiver):
			db[sender][receiver]['weight'] += int(duration)
		else:
			db.add_edge(sender, receiver, weight=int(duration))
		i+=1
	#pickle.dump(db, open("users_networkx.p" % str(round), "wb"))
	#print "degree assortativity coeff:", nx.degree_assortativity_coefficient(db)
	#print "average degree connectivity:", nx.average_degree_connectivity(db)
	#	print "k nearest neighbors:", nx.k_nearest_neighbors(db)
	print "calculating deg cent"
	deg_cent = nx.degree_centrality(db) #sorted(nx.degree_centrality(db).items(), key=lambda x: x[1])
	print "calculating in deg cent"
	in_deg_cent = nx.in_degree_centrality(db) #sorted(nx.in_degree_centrality(db).items(), key=lambda x: x[1])
	print "calculating out deg cent"
	out_deg_cent = nx.out_degree_centrality(db) #sorted(nx.out_degree_centrality(db).items(), key=lambda x: x[1])
	print "closeness cent"
	closeness_cent = nx.closeness_centrality(db) #sorted(nx.closeness_centrality(db).items(), key=lambda x: x[1])
	#print "betweenness cent"
	#btwn_cent = nx.betweenness_centrality(db) #sorted(nx.betweenness_centrality(db).items(), key=lambda x: x[1])
	print "done"
	w = open("../output/user_network_stats.csv", 'w')
	w.write("uid,deg_cent,in_deg_cent,out_deg_cent,closeness_cent,btwn_cent\n")
	for user in deg_cent.keys():
		try:
			w.write("%s,%s,%s,%s,%s\n" % (user, deg_cent[user], in_deg_cent[user], out_deg_cent[user], closeness_cent[user]))
		except: pass
	w.close()
	print "drawing..."
	nx.draw(db)
	plt.savefig("path.pdf")
	print "done!"
	print "edge betweenness centrality:", nx.edge_betweenness_centrality(db)
	print "communicability:", nx.communicability(db)
	print "communicability centrality:", nx.communicability_centrality(db)
Ejemplo n.º 55
0
def d3_graph(graph):
    node_bc = nx.betweenness_centrality(graph, weight="weight")
    node_dc = nx.degree_centrality(graph)
    edge_bc = nx.edge_betweenness_centrality(graph, weight="weight")
    nx.set_edge_attributes(graph, 'betweenness', edge_bc)
    for node in graph.nodes():
        graph.node[node]["bw"] = node_bc[node]
        graph.node[node]["dc"] = node_dc[node]
    # print self.new_graph.edges(data=True)
    d3graph = json_graph.node_link_data(graph)
    return json.dumps(d3graph)
Ejemplo n.º 56
0
def CmtyGirvanNewmanStep(G):
    init_ncomp = nx.number_connected_components(G)    #no of components
    ncomp = init_ncomp
    while ncomp <= init_ncomp:
        bw = nx.edge_betweenness_centrality(G, weight='weight')    #edge betweenness for G
        #find the edge with max centrality
        max_ = max(bw.values())
        #find the edge with the highest centrality and remove all of them if there is more than one!
        for k, v in bw.iteritems():
            if float(v) == max_:
                G.remove_edge(k[0],k[1])    #remove the central edge
        ncomp = nx.number_connected_components(G)    #recalculate the no of components
Ejemplo n.º 57
0
    def calculte_betweenness(self, G, bonus=True):
        """
        Calculate Betweenness
        input:
        - G: graph
        - bonus: True if use my own betweenness calculator. (bonus=True by default)

        """
        if bonus:
            betweenness = self.my_betweenness_calculation(G)
        else:
            betweenness = nx.edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None)
        return betweenness
Ejemplo n.º 58
0
def _remove_max_edge(G, weight=None):
    """
    Removes edge with the highest value on betweenness centrality.
    Repeat this step until more connected components than the connected
    components of the original graph are detected.
    """
    number_components = nx.number_connected_components(G)
    while nx.number_connected_components(G) <= number_components and G.number_of_edges():
        betweenness = nx.edge_betweenness_centrality(G, weight=weight)
        max_value = max(betweenness.values())
        # Use a list of edges because G is changed in the loop
        for edge in list(G.edges()):
            if betweenness[edge] == max_value:
                G.remove_edge(*edge)
Ejemplo n.º 59
0
def disintegrate(gr):
    components = list(nx.connected_components(gr))
    num_comps = len(components)
    num_nodes = nx.number_of_nodes(gr)
    yield components
    while num_comps < num_nodes:
        bw = nx.edge_betweenness_centrality(gr)              # betweenness dict
        to_remove = max(bw.keys(), key=(lambda x: bw[x]))    # edge with highest betweenness
        gr.remove_edge(*to_remove)                           # throw it away
        components = list(nx.connected_components(gr))
        new_num_comps = len(components)
        if new_num_comps > num_comps:
            num_comps = new_num_comps
            yield components
Ejemplo n.º 60
-1
def gnewman(club,splitTo = 2):
    itteration = 0
    # ok so why do I check the number of connected components
    # for an undirected graph it is know that a connected component of an
    # an undirected graph is a subgraph in which any two vertices are connected to each other by paths
    # this is useful for this application since we are splitting a graph into two subgraphs
    # ie to mathematically represent the splitting of the club
    while nx.number_connected_components(club) < splitTo:
        # returns to us edges with the weights
        between = nx.edge_betweenness_centrality(club,normalized=False)
        # we want the edges with the highest edge betweenness centrality
        # there might be ties so just get the max betweenness
        m = max(between.values())
        # unpack the tuple returned to us by between.items ((u,v), maxBetweenScore)
        for (hU,hV),val in between.items():
            # check to see if m(max betweenness score) is equal to val
            # removes ties along the way
            if val == m:
                club.remove_edge(hU,hV)
                print("removed edge %s--%s with betweenness score of %f"%(hU,hV,m))
        itteration += 1

        print("-------------------------")
        # this print out can be uncommented it simply shows the same metric as described two different ways
        # print(nx.number_connected_components(club),len(list(nx.connected_component_subgraphs(club))))
    print("total iterations %d for splitting into %d"%(itteration,splitTo))