def betweenness_removal(g, recalculate=False):
    """
    Performs robustness analysis based on betweenness centrality,
    on the network specified by infile using sequential (recalculate = True)
    or simultaneous (recalculate = False) approach. Returns a list
    with fraction of nodes removed, a list with the corresponding sizes of
    the largest component of the network, and the overall vulnerability
    of the network.
    """

    m = nx.betweenness_centrality(g)
    l = sorted(m.items(), key=operator.itemgetter(1), reverse=True)
    x = []
    y = []

    dimension = fd.fractal_dimension(g, iterations=100, debug=False)

    n = len(g.nodes())
    x.append(0)
    y.append(dimension)

    for i in range(1, n-1):
        g.remove_node(l.pop(0)[0])
        if recalculate:
            m = nx.betweenness_centrality(g)
            l = sorted(m.items(), key=operator.itemgetter(1),
                       reverse=True)

        dimension = fd.fractal_dimension(g, iterations=100, debug=False)
        x.append(i * 1. / n)
        y.append(dimension)

    return x, y
예제 #2
0
def recalculated_betweenness(ex):
    # Copy the graph
    ex = ex.copy()
    # Calculate betweenness of full graph
    between = nx.betweenness_centrality(ex, weight='distance', normalized=False)
    # Create a copy to track the recalculated betweenness
    rebetween = between
    while len(ex.edges()) > 0:
        # Recalculate betweenness
        between = nx.betweenness_centrality(ex, weight='distance', normalized=False)
        # Store recalculated values if they're higher
        for node, value in between.iteritems():
            if value > rebetween[node]:
                rebetween[node] = value
        # Remove all edges from most central node
        node, value = sorted(between.items(), key=lambda x: x[1], reverse=True)[0]
        if (value == 0):
            # All remaining edges are trivial shortest paths
            break
        for tail, head in ex.edges(node):
            ex.remove_edge(tail, head)
        sys.stdout.write('.')
        sys.stdout.flush()
    print
    return rebetween
예제 #3
0
def betweenness(infile, recalculate = False):
    """
    Performs robustness analysis based on betweenness centrality,  
    on the network specified by infile using sequential (recalculate = True) 
    or simultaneous (recalculate = False) approach. Returns a list 
    with fraction of nodes removed, a list with the corresponding sizes of 
    the largest component of the network, and the overall vulnerability 
    of the network.
    """

    g = networkx.read_gml(infile)
    m = networkx.betweenness_centrality(g)
    l = sorted(m.items(), key = operator.itemgetter(1), reverse = True)
    x = []
    y = []
    largest_component = max(networkx.connected_components(g), key = len)
    n = len(g.nodes())
    x.append(0)
    y.append(len(largest_component) * 1. / n)
    R = 0.0
    for i in range(1, n):
        g.remove_node(l.pop(0)[0])
        if recalculate:
            m = networkx.betweenness_centrality(g)
            l = sorted(m.items(), key = operator.itemgetter(1), 
                       reverse = True)
        largest_component = max(networkx.connected_components(g), key = len)
        x.append(i * 1. / n)
        R += len(largest_component) * 1. / n
        y.append(len(largest_component) * 1. / n)
    return x, y, 0.5 - R / n
def show_network_metrics(G):
    '''
    Print the local and global metrics of the network
    '''
    print(nx.info(G))

    # density
    print("Density of the network")
    print(nx.density(G))    
    
    # average  betweeness
    print("Average  betweeness of the network")
    print(np.sum(list(nx.betweenness_centrality(G).values()))/len(nx.betweenness_centrality(G)))

    # Average clustering coefficient
    print("Average clustering coefficient:")
    print(nx.average_clustering(G))


    #create metrics dataframe
    by_node_metrics = pd.DataFrame({"Betweeness_Centrality":nx.betweenness_centrality(G),"Degree_Centrality":nx.degree_centrality(G),
        "Clustering_Coefficient":nx.clustering(G), "Triangels":nx.algorithms.cluster.triangles(G)})
    print(by_node_metrics)

    by_node_metrics.to_excel("metrics.xlsx")
예제 #5
0
def betweenness_fracture(infile, outfile, fraction, recalculate = False):
    """
    Removes given fraction of nodes from infile network in reverse order of 
    betweenness centrality (with or without recalculation of centrality values 
    after each node removal) and saves the network in outfile.
    """

    g = networkx.read_gml(infile)
    m = networkx.betweenness_centrality(g)
    l = sorted(m.items(), key = operator.itemgetter(1), reverse = True)
    largest_component = max(networkx.connected_components(g), key = len)
    n = len(g.nodes())
    for i in range(1, n):
        g.remove_node(l.pop(0)[0])
        if recalculate:
            m = networkx.betweenness_centrality(g)
            l = sorted(m.items(), key = operator.itemgetter(1), 
                       reverse = True)
        largest_component = max(networkx.connected_components(g), key = len)
        if i * 1. / n >= fraction:
            break
    components = networkx.connected_components(g)
    component_id = 1
    for component in components:
        for node in component:
            g.node[node]["component"] = component_id
        component_id += 1
    networkx.write_gml(g, outfile)
def compute_static_graph_statistics(G,start_time,end_time):
    verts = G.vertices
    n = len(verts)
    m = float(end_time - start_time)
    agg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3
    avg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3

    aggregated_graph = nx.Graph()
    aggregated_graph.add_nodes_from(verts)
    start_time = max(1,start_time)
    for t in xrange(start_time,end_time+1):
        aggregated_graph.add_edges_from(G.snapshots[t].edges_iter())
         
        dc = G.snapshots[t].degree()
        cc = nx.closeness_centrality(G.snapshots[t])
        bc = nx.betweenness_centrality(G.snapshots[t])
        for v in verts:
            avg_statistics[0][v] += dc[v]/(n-1.0)
            avg_statistics[1][v] += cc[v]
            avg_statistics[2][v] += bc[v]
    for v in verts:
        avg_statistics[0][v] = avg_statistics[0][v]/m
        avg_statistics[1][v] = avg_statistics[1][v]/m
        avg_statistics[2][v] = avg_statistics[2][v]/m
    
    dc = nx.degree_centrality(aggregated_graph)
    cc = nx.closeness_centrality(aggregated_graph)
    bc = nx.betweenness_centrality(aggregated_graph)
    for v in verts:
        agg_statistics[0][v] = dc[v]
        agg_statistics[1][v] = cc[v]
        agg_statistics[2][v] = bc[v]
    return (agg_statistics, avg_statistics)
def compareGraphs(g1, g2):
    
    """#Compares the quantitative properties of two graph. So I can check the coarse graining. """

    
    #Nodes and edges
    print 'Graph1: #(Nodes, Edges) = (' + str(len(g1.nodes())) + ', ' + str(len(g1.edges())) + ')'
    print 'Graph2: #(Nodes, Edges) = (' + str(len(g2.nodes())) + ', ' + str(len(g2.edges())) + ')'

    #Connected Components
    #print '\n#CCs for graph 1: ' + str(len(nx.connected_components(g1)))
    #print '#CCs for graph 2: ' + str(len(nx.connected_components(g2)))
    
    plt.hist([len(i) for i in nx.connected_components(g1)])
    plt.hist([len(i) for i in nx.connected_components(g2)])
    plt.title('Cluster Size')
    plt.xlabel('Cluster Size')
    plt.ylabel('#Cluster')
    show()
    
    #Degree Distribution
    plt.hist(nx.degree_histogram(g1))
    plt.hist(nx.degree_histogram(g2))
    plt.title('Degree Distribution' )
    plt.xlabel('Degree')
    plt.ylabel('#Nodes')
    show()
    
    #Betweeness --- this is by far the most compuationally demanding.
    plt.hist(nx.betweenness_centrality(g1, normalized = False).values())
    plt.hist(nx.betweenness_centrality(g2, normalized = False).values())
    plt.title('Distribution of Betweenness' )
    plt.xlabel('Betweenness')
    plt.ylabel('#Nodes')
    show()        
def sna_calculations(g, play_file):
    """
    :param g: a NetworkX graph object
    :type g: object
    :param play_file: the location of a play in .txt format
    :type play_file: string
    :return: returns a dictionary containing various network related figures
    :rtype: dict
    :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv
    """
    file_name = os.path.splitext(os.path.basename(play_file))[0]
    sna_calculations_list = dict()
    sna_calculations_list['playType'] = file_name[0]
    sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()),
                                                                            dtype=float))
    sna_calculations_list['avDegreeCentralityStd'] = numpy.std(
        numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float))
    sna_calculations_list['avInDegreeCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float))
    sna_calculations_list['avOutDegreeCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float))

    try:
        sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g)
    except:
        sna_calculations_list['avShortestPathLength'] = 'not connected'

    sna_calculations_list['density'] = nx.density(g)
    sna_calculations_list['avEigenvectorCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float))
    sna_calculations_list['avBetweennessCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float))
    sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g)
    sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g)
    sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g)

    # sna_calculations.txt file
    sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL,
                               delimiter=';')
    for key, value in sna_calculations_list.items():
        sna_calc_file.writerow([key, value])

    # all_characters.csv file
    if not os.path.isfile('results/allCharacters.csv'):
        with open('results/allCharacters.csv', 'w') as f:
            f.write(
                'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n')

    all_characters = open('results/allCharacters.csv', 'a')
    character_speech_amount = speech_amount(play_file)
    for character in sna_calculations_list['DegreeCentrality']:
        all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str(
            sna_calculations_list['DegreeCentrality'][character]) + ';' + str(
            sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str(
            sna_calculations_list['BetweennessCentrality'][character]) + ';' + str(
            character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n')
    all_characters.close()

    return sna_calculations
 def __init__(self, view, controller, use_ego_betw=False, **kwargs):
     super(CacheLessForMore, self).__init__(view, controller)
     topology = view.topology()
     if use_ego_betw:
         self.betw = dict((v, nx.betweenness_centrality(nx.ego_graph(topology, v))[v])
                          for v in topology.nodes_iter())
     else:
         self.betw = nx.betweenness_centrality(topology)
예제 #10
0
def weighted_betweenness_centrality_distribution(G, return_dictionary=False):
    """Return a distribution of weighted betweenness centralities.
    If return_dictionary is specified, we return a dictionary indexed by
    vertex name, rather than just the values (as returned by default).
    """
    if return_dictionary:
        return nx.betweenness_centrality(G, weighted_edges=True)
    else:
        return nx.betweenness_centrality(G, weighted_edges=True).values()
예제 #11
0
def betweenness_centrality_distribution(G, return_dictionary=False):
    """Return a distribution of unweighted betweenness centralities, 
    as used in Borges, Coppersmith, Meyer, and Priebe 2011.
    If return_dictionary is specified, we return a dictionary indexed by
    vertex name, rather than just the values (as returned by default).
    """
    if return_dictionary:
        return nx.betweenness_centrality(G)
    else:
        return nx.betweenness_centrality(G).values()
예제 #12
0
    def centrality_measures(self):

        centrality_measures = []
        txt = ''
        
        # betweenness
        # unweighted
        self.unweighted_betweenness_distribution	= nx.betweenness_centrality(self.G)
        statistics		= self.Stats.get_distribution_info(self.unweighted_betweenness_distribution)
        centrality_measures.extend(statistics[:5])
        centrality_measures.extend(statistics[5])
        txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution

        # # weighted
        self.weighted_betweenness_distribution		= nx.betweenness_centrality(self.G, weight = self.weight_id)
        # statistics		= self.Stats.get_distribution_info(self.weighted_betweenness_distribution)
        # centrality_measures.extend(statistics[:5])
        # centrality_measures.extend(statistics[5])
        # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution
        
        # closeness
        # unweighted
        self.unweighted_closeness_distribution	= nx.closeness_centrality(self.G)
        statistics		= self.Stats.get_distribution_info(self.unweighted_closeness_distribution)
        centrality_measures.extend(statistics[:5])
        centrality_measures.extend(statistics[5])
        txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution        
        
        # eigen vector
		# right
        try:
            self.right_eigenvector_distribution	= nx.eigenvector_centrality(self.G)
            statistics	= self.Stats.get_distribution_info(self.right_eigenvector_distribution)
            centrality_measures.extend(statistics[:5])
            centrality_measures.extend(statistics[5])
        except:
            centrality_measures.extend([0,0,0,0,0])
            centrality_measures.extend([0]*len(statistics[5])) 
        txt += ',average right eigenvector' + self.standard_text_distribution
		
		# left
        try:
            G_rev 								= self.G.reverse()
            self.lef_eigenvector_distribution	= nx.eigenvector_centrality(G_rev)
            statistics							= self.Stats.get_distribution_info(self.lef_eigenvector_distribution)
            centrality_measures.extend(statistics[:5])
            centrality_measures.extend(statistics[5])
        except:
            centrality_measures.extend([0,0,0,0,0])
            centrality_measures.extend([0]*len(statistics[5])) 
        txt += ',average left eigenvector' + self.standard_text_distribution

        return [centrality_measures, txt]
예제 #13
0
    def genSeedsMaxDegree(self,p,bwness):
        """Generate seeds based on maximum degree. Also handles Betweenness.
        Optional input argument sets randomization. 0<p<1"""

        numSeeds = self.numSeeds

        if bwness:
            numSeeds = numSeeds*1.5

        if bwness:
            k_val = int(2000/math.sqrt(len(self.adj)))
            if k_val > len(self.adj):
                bw_node = nx.betweenness_centrality(self.nxgraph)
            else:
                bw_node = nx.betweenness_centrality(self.nxgraph, k = k_val )


        numMax=int(self.numSeeds/(1.0*p))

        seeds=[None]*numMax
        deg=[0]*numMax

        for key,value in self.adj.iteritems():
            #fill seeds
            curr_deg=len(value)
            for j in range(numMax):
                if curr_deg>deg[j]:
                    deg.insert(j,curr_deg)
                    seeds.insert(j,key)
                    break

            seeds=seeds[:numMax]
            deg=deg[:numMax]

        if bwness:
            numMax=int(self.numSeeds/(1.0*p))
            dict_bw = bw_node
            seeds_degree = seeds
            seeds = dict()
            for node in seeds_degree:
                value = dict_bw.get(node)
                key = node
                seeds[key] = value
            seeds_fin = dict(sorted(seeds.iteritems(), key=operator.itemgetter(1), reverse=True)[:numMax])
            seeds = seeds_fin.keys()


        #shuffle
        if p!=1:
            random.shuffle(seeds)

        return seeds[:self.numSeeds]
def node_graph(tup):
    h=nx.Graph()
    h.add_edges_from(tup)
    print "edges:" ,h.edges()
    #%matplotlib inline
    BLUE="#99CCFF"
    nx.draw(h, node_color=BLUE,with_labels=True)
    print "Degree Distribution:",h.degree()
    print "Degree Centrality:",nx.degree_centrality(h)
    print "Betweenness Centrality : ",nx.betweenness_centrality(h)
    print "Betweenness Centrality Non-Normalized : ",nx.betweenness_centrality(h, normalized=False)
    print "Closeness Centrality:", nx.closeness_centrality(h)
    pyplot.show()
def betweenness_apl(g, recalculate=False):
    """
    Performs robustness analysis based on betweenness centrality,
    on the network specified by infile using sequential (recalculate = True)
    or simultaneous (recalculate = False) approach. Returns a list
    with fraction of nodes removed, a list with the corresponding sizes of
    the largest component of the network, and the overall vulnerability
    of the network.
    """

    m = networkx.betweenness_centrality(g)
    l = sorted(m.items(), key=operator.itemgetter(1), reverse=True)
    x = []
    y = []

    average_path_length = 0.0
    number_of_components = 0
    n = len(g.nodes())

    for sg in networkx.connected_component_subgraphs(g):
        average_path_length += networkx.average_shortest_path_length(sg)
        number_of_components += 1

    average_path_length = average_path_length / number_of_components
    initial_apl = average_path_length

    x.append(0)
    y.append(average_path_length * 1. / initial_apl)
    r = 0.0
    for i in range(1, n):
        g.remove_node(l.pop(0)[0])
        if recalculate:
            m = networkx.betweenness_centrality(g)
            l = sorted(m.items(), key=operator.itemgetter(1),
                       reverse=True)

        average_path_length = 0.0
        number_of_components = 0

        for sg in networkx.connected_component_subgraphs(g):
            if len(sg.nodes()) > 1:
                average_path_length += networkx.average_shortest_path_length(sg)
            number_of_components += 1

        average_path_length = average_path_length / number_of_components

        x.append(i * 1. / initial_apl)
        r += average_path_length
        y.append(average_path_length)
    return x, y, r / initial_apl
예제 #16
0
	def betweenness_centrality(self, withme=False, node=None,average=False):
		if node==None:
			if withme:
				my_dict = nx.betweenness_centrality(self.mynet)
				new = {}
				new2={}
				for i in my_dict:
					new[self.id_to_name(i)] = my_dict[i]
					new2[i] = my_dict[i]
				if average:
					print "The average is " + str(round(sum(new.values())/float(len(new.values())),4))
				else:
					for i,j in new.items():
						print i, round(j,4)
					return new2
			else:
				my_dict = nx.betweenness_centrality(self.no_ego_net)

				new = {}
				new2={}
				for i in my_dict:
					new[self.id_to_name(i)] = my_dict[i]
					new2[i] = my_dict[i]
				if average:
					print "The average is " + str(round(sum(new.values())/float(len(new.values())),4))
				else:
					for i,j in new.items():
						print i, round(j,4)
					return new2

		else:
			if withme:
				my_dict = nx.betweenness_centrality(self.mynet)
				try:
					print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4))
				except:
					try:
						return my_dict[self.name_to_id(node)]
					except:
						print "Invalid node name"
			else:
				my_dict = nx.betweenness_centrality(self.no_ego_net)
				try:
					print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4))
				except:
					try:
						print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[[self.name_to_id(node)]],4))
					except:
						print "Invalid node name"
예제 #17
0
def betweenValue(charList, graphFile, bookNetworksPath):
    # Compute betweenness for all characters in the current chapter graph.
    g = nx.read_gexf(graphFile)
    betCentrality = nx.betweenness_centrality(g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None)
    betweenValues = betCentrality.values()

    # NORMALISE betweenness values
    d = decimal.Decimal
    maxBetween = max(betweenValues)
    minBetween = min(betweenValues)
    maxMinusMin = d(maxBetween) - d(minBetween)

    if not charList:
        # Get top 10 overall characters from overall.gexf graph
        overallGraphFile = bookNetworksPath + "overall.gexf"
        overall_g = nx.read_gexf(overallGraphFile)
        overallBetweenCent = nx.betweenness_centrality(
            overall_g, k=None, normalized=True, weight="Weight", endpoints=False, seed=None
        )

        # Quick fix for getting all characters.

        # sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True)[:10])
        sortedCentrality = dict(sorted(overallBetweenCent.iteritems(), key=itemgetter(1), reverse=True))
        sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True)

        charList = [seq[0] for seq in sortedCentrality]
        return charList

    else:
        charList = [item for item in charList]

        for index, item in enumerate(charList):
            currentChar = None
            for key, value in betCentrality.iteritems():
                if key == item:
                    nummerator = d(value) - d(minBetween)
                    if nummerator == 0:
                        charList[index] = (key, str(0))
                    else:
                        norm_value = (d(value) - d(minBetween)) / d(maxMinusMin)
                        charList[index] = (key, str(norm_value))
                    currentChar = key
            # If current character is not present in the current chapter assign 0 influence.
            if not currentChar:
                charList[index] = (item, 0)

        return charList
예제 #18
0
def build_graph():
    pair_list = TwitterUser.get_top_100_pair()
    DG = nx.DiGraph()
    DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in
        pair_list])
    betweenness = nx.betweenness_centrality(DG)
    closeness = nx.closeness_centrality(DG)
    edge_betweenness = nx.edge_betweenness(DG)
    clustering_co = nx.clustering(nx.Graph(DG))
    page_rank = nx.pagerank(DG)
    for twitter_id in DG.nodes():
        t = TwitterUser.get_by_id(twitter_id)
        node = DG.node[twitter_id]
        node['user_id'] = t.user_id
        node['label'] = t.scrn_name
        node['follower_count'] = t.foer_cnt
        node['friend_count'] = t.friend_cnt
        node['status_count'] = t.status_cnt
        node['location'] = t.location
        node['verified'] = t.verified
        node['twitter_age'] = (date.today() - t.created_at).days
        node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age']
        node['indegree'] = len([(id, foer) for id, foer 
            in pair_list if id == twitter_id])
        node['outdegree'] = len([(id, foer) for id, foer 
            in pair_list if foer == twitter_id])
        node['cluster'] = clustering_co[twitter_id]
        node['betweenness'] = betweenness[twitter_id]
        node['closeness'] = closeness[twitter_id]
        node['page_rank'] = page_rank[twitter_id]
    for out_n, in_n in DG.edges():
        DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)]

    return DG
예제 #19
0
	def __init__(self, graph, node_1=None, node_2=None):
		self.graph = graph
		self.node_1 = node_1
		self.node_2 = node_2
		self.clustering_dict = nx.clustering(graph)
		self.betweenness_dict = nx.betweenness_centrality(graph)
		self.average_neighbor_degree_dict = nx.average_neighbor_degree(graph)
		
		self.attributes_map = {
			"adamic_adar_similarity": self.adamic_adar_similarity,	
			"average_clustering_coefficient": self.average_clustering_coefficient,	
			"average_neighbor_degree_sum": self.average_neighbor_degree_sum,	
			"betweenness_centrality": self.betweenness_centrality,	
			"closeness_centrality_sum": self.closeness_centrality_sum,	
			"clustering_coefficient_sum": self.clustering_coefficient_sum,	
			"common_neighbors": self.common_neighbors,	
			"cosine": self.cosine,	
			"jaccard_coefficient": self.jaccard_coefficient,	
			"katz_measure": self.katz_measure,	
			"preferential_attachment": self.preferential_attachment,		
			"square_clustering_coefficient_sum": self.square_clustering_coefficient_sum,	
			"sum_of_neighbors": self.sum_of_neighbors,	
			"sum_of_papers": self.sum_of_papers,
			"get_shortest_path_length": self.get_shortest_path_length,
			"get_second_shortest_path_length": self.get_second_shortest_path_length				
		}
		
		if(self.node_1 != None and self.node_2 != None):
			self.neighbors_1 = self.all_neighbors(self.node_1)
			self.neighbors_2 = self.all_neighbors(self.node_2)
예제 #20
0
def __nfur_func(topology, edges, betweenness):
    """
    Calculate NFUR on a specific set of edges
    
    Parameters
    ----------
    topology : Topology
        The topology
    edges : list
        The list of edges (subset of topology edges)
    betweenness : dict
        The betweeness centrality of the topology, keyed by node
        
    Returns
    -------
    nfur : dict
        NFUR values keyed by node, only relative to failures of the specified
        edges 
    """
    nfur = betweenness.copy()
    topology = topology.copy()
    for u, v in edges:
        edge_attr = topology.edge[u][v]
        topology.remove_edge(u, v)
        betw = nx.betweenness_centrality(topology, normalized=False, 
                                         weight='weight')
        for node in betw.keys():
            if betw[node] > nfur[node]:
                nfur[node] = betw[node]
        topology.add_edge(u, v, edge_attr)
    return nfur
예제 #21
0
    def test_fast_versions_properties_threshold_graphs(self):
        cs='ddiiddid'
        G=nxt.threshold_graph(cs)
        assert_equal(nxt.density('ddiiddid'), nx.density(G))
        assert_equal(sorted(nxt.degree_sequence(cs)),
                     sorted(G.degree().values()))

        ts=nxt.triangle_sequence(cs)
        assert_equal(ts, list(nx.triangles(G).values()))
        assert_equal(sum(ts) // 3, nxt.triangles(cs))

        c1=nxt.cluster_sequence(cs)
        c2=list(nx.clustering(G).values())
        assert_almost_equal(sum([abs(c-d) for c,d in zip(c1,c2)]), 0)

        b1=nx.betweenness_centrality(G).values()
        b2=nxt.betweenness_sequence(cs)
        assert_true(sum([abs(c-d) for c,d in zip(b1,b2)]) < 1e-14)

        assert_equal(nxt.eigenvalues(cs), [0, 1, 3, 3, 5, 7, 7, 8])

        # Degree Correlation
        assert_true(abs(nxt.degree_correlation(cs)+0.593038821954) < 1e-12)
        assert_equal(nxt.degree_correlation('diiiddi'), -0.8)
        assert_equal(nxt.degree_correlation('did'), -1.0)
        assert_equal(nxt.degree_correlation('ddd'), 1.0)
        assert_equal(nxt.eigenvalues('dddiii'), [0, 0, 0, 0, 3, 3])
        assert_equal(nxt.eigenvalues('dddiiid'), [0, 1, 1, 1, 4, 4, 7])
예제 #22
0
def relevant_stats(G):
	cloC = nx.closeness_centrality(G, distance = 'distance')
	betC = nx.betweenness_centrality(G, weight = 'distance')
	katC = nx.katz_centrality(G)
	eigC = nx.eigenvector_centrality(G)

	return
예제 #23
0
    def __init__(self, n=1000, k=10, p=0.02947368):
        self.n = n
        self.k = k
        self.p = p
        self.ws = nx.watts_strogatz_graph(self.n, self.k, self.p, seed='nsll')
        nx.set_node_attributes(self.ws, 'SIR', 'S')
        self.clustering = nx.clustering(self.ws)
        self.betweenness = nx.betweenness_centrality(self.ws)
        p_r_0 = 0.001
        r_0 = int(self.n * p_r_0)
        if r_0 < 1:
            r_0 = 1
        random.seed('nsll')
        self.r = random.sample(self.ws.nodes(), r_0)

        i_0 = 4
        if i_0 < r_0:
            i_0 += 1
        random.seed('nsll')
        self.infected = random.sample(self.ws.nodes(), i_0)
        for n in self.infected:
            self.ws.node[n]['SIR'] = 'I'
        for n in self.r:
            self.ws.node[n]['SIR'] = 'R'
        self.s = self.n - len(self.infected) - len(self.r)
        print(self.r)
        print(self.infected)
def print_top_betweenness(component, size=10):
	bc = nx.betweenness_centrality(component, weight='weight', normalized=True)
	for node in sorted(bc, key=bc.get, reverse=True)[0:size]:
		query = {'spec': {'user.id': int(node) }, 'fields':{'_id':0,'user.screen_name': 1} }
		this_data = bf.query_mongo_get_list(query, limit=1)
		print this_data['user']['screen_name'],'&', "{0:.4f}".format(bc[node]), '\\\\'
	return bc
def plot_betweenness_dist (graph, path):
    """Plot distribution of betweenness centrality of the graph and save the figure
       at the given path. On X-axis we have betweenness centrality values and on
       Y-axis we have percentage of the nodes that have that betweenness value.
       k is the number of samples for estimating the betweenness centrality."""

    N = float(graph.order())
    node_to_betweenness = nx.betweenness_centrality(graph)
    betweenness_to_percent = {}

    # calculate percentages of nodes with certain betweeness value
    for node in node_to_betweenness:
        betweenness_to_percent[node_to_betweenness[node]] = 1 + \
                betweenness_to_percent.get(node_to_betweenness[node], 0)
    for c in betweenness_to_percent:
        betweenness_to_percent[c] = betweenness_to_percent[c] / N * 100

    x = sorted(betweenness_to_percent.keys(), reverse = True)
    y = [betweenness_to_percent[i] for i in x]

    plt.loglog(x, y, 'b-', marker = '.')
    plt.title("Betweenness Centrality Distribution")
    plt.ylabel("Percentage")
    plt.xlabel("Betweenness value")
    plt.axis('tight')
    plt.savefig(path)
    def test_florentine_families_graph(self):
        """Weighted betweenness centrality: 
        Florentine families graph"""
        G=nx.florentine_families_graph()
        b_answer=\
             {'Acciaiuoli':    0.000,
              'Albizzi':       0.212,
              'Barbadori':     0.093,
              'Bischeri':      0.104,
              'Castellani':    0.055,
              'Ginori':        0.000,
              'Guadagni':      0.255,
              'Lamberteschi':  0.000,
              'Medici':        0.522,
              'Pazzi':         0.000,
              'Peruzzi':       0.022,
              'Ridolfi':       0.114,
              'Salviati':      0.143,
              'Strozzi':       0.103,
              'Tornabuoni':    0.092}

        b=nx.betweenness_centrality(G,
                                          weight='weight',
                                          normalized=True)
        for n in sorted(G):
            assert_almost_equal(b[n],b_answer[n],places=3)
예제 #27
0
def return_average_betweenness_centralities(path):
    f = open(path,'r');
    dct = json.loads(f.read())
    f.close()
    ct_avg = -1
    whole_avg = -1
    try:
        dg = json_dag.JsonToDag(path)
        dg.add_nodes()
        dg.add_dependencies()
        G = dg.G
        critical_path, stats_result = dg.findCriticalPath()
        G_directed = G.to_undirected()
        bt = nx.betweenness_centrality(G)
        #bt_edge = nx.edge_betweenness_centrality(G)
        sm = 0
        for element in critical_path:
            sm += bt[element]

        ct_avg = sm/ float(len(critical_path))
        whole_avg = sum(bt.values())/(float(len(bt)));
    except:
        print "error"
        pass
    return ct_avg, whole_avg
def calculate_betweenness(graph):
	''' Calculate betweenness centrality of a node, sets value on node as attribute; returns graph, and dict of the betweenness centrality values
	'''
	g = graph
	bc=nx.betweenness_centrality(g)
	nx.set_node_attributes(g,'betweenness',bc)
	return g, bc
        def btw_centrality_month_airports(data):    
            df = data.copy()
            df['DateOfDeparture'] = pd.to_datetime(df['DateOfDeparture'])
            df['month'] = df['DateOfDeparture'].dt.week.astype(str)
            df['year'] = df['DateOfDeparture'].dt.year.astype(str)
            df['year_month'] = df[['month','year']].apply(lambda x: '-'.join(x),axis=1)
            df['year_month_dep'] = df[['Departure','month','year']].apply(lambda x: '-'.join(x),axis=1)
            df['year_month_arr'] = df[['Arrival','month','year']].apply(lambda x: '-'.join(x),axis=1)
            year_month = pd.unique(df['year_month'])
            G = nx.Graph()
            btw_centrality = {}

            for i, item in enumerate(year_month):
                sub_df = df[df['year_month'] == item][['Departure','Arrival']]
                list_dep_arr = zip(sub_df['Departure'], sub_df['Arrival'])
                G.add_edges_from(list_dep_arr)
                #G.number_of_nodes()
                #G.number_of_edges()
                centrality_month = nx.betweenness_centrality(G)
                centrality_month = pd.DataFrame(centrality_month.items())
                centrality_month['year_month'] = [item] * centrality_month.shape[0]
                centrality_month['airport_year_month'] = centrality_month[centrality_month.columns[[0,2]]].apply(lambda x: '-'.join(x),axis=1)
                centrality_month =dict(zip(centrality_month['airport_year_month'], centrality_month[1]))

                z = btw_centrality.copy()
                z.update(centrality_month)
                btw_centrality = z
            df['btw_centrality_month_dep'] = df['year_month_dep'].map(btw_centrality)
            df['btw_centrality_month_arr'] = df['year_month_arr'].map(btw_centrality)
            return df
예제 #30
0
파일: epidemic.py 프로젝트: vslovik/ARS
 def get_center_ego(graph):
     bt = nx.betweenness_centrality(graph)
     print(bt)
     for (node, betweenness) in sorted(bt.items(), key=lambda x: x[1], reverse=True):
         nodes = nx.ego_graph(graph, node).nodes()
         print(nodes)
         return nodes
예제 #31
0
if __name__ == '__main__':
    # 단어쌍 동시출현 빈도수를 담았던 networkx.csv파일을 불러온다.
    dataset = pd.read_csv('D:\crawling\\networkx.csv')

    # 중심성 척도 계산을 위한 Graph를 만든다
    G_centrality = nx.Graph()

    # 빈도수가 20000 이상인 단어쌍에 대해서만 edge(간선)을 표현한다.
    for ind in range((len(np.where(dataset['freq'] >= 19700)[0]))):
        G_centrality.add_edge(dataset['word1'][ind],
                              dataset['word2'][ind],
                              weight=int(dataset['freq'][ind]))

    dgr = nx.degree_centrality(G_centrality)  # 연결 중심성
    btw = nx.betweenness_centrality(G_centrality)  # 매개 중심성
    cls = nx.closeness_centrality(G_centrality)  # 근접 중심성
    egv = nx.eigenvector_centrality(G_centrality)  # 고유벡터 중심성
    pgr = nx.pagerank(G_centrality)  # 페이지 랭크

    # 중심성이 큰 순서대로 정렬한다.
    sorted_dgr = sorted(dgr.items(), key=operator.itemgetter(1), reverse=True)
    sorted_btw = sorted(btw.items(), key=operator.itemgetter(1), reverse=True)
    sorted_cls = sorted(cls.items(), key=operator.itemgetter(1), reverse=True)
    sorted_egv = sorted(egv.items(), key=operator.itemgetter(1), reverse=True)
    sorted_pgr = sorted(pgr.items(), key=operator.itemgetter(1), reverse=True)

    # 단어 네트워크를 그려줄 Graph 선언
    G = nx.Graph()

    # 페이지 랭크에 따라 두 노드 사이의 연관성을 결정한다. (단어쌍의 연관성)
# nx.write_gexf(subgraph, 'beehive-sub.gexf')

triadic_closure = nx.transitivity(G)
print('Triadic closure:', triadic_closure)

degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree')
print(G.nodes['poverty'])

sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)

print('Top 20 nodes by degree:')
for d in sorted_degree[:20]:
    print(d)

betweenness_dict = nx.betweenness_centrality(G)
eigenvector_dict = nx.eigenvector_centrality(G)

nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

sorted_betweenness = sorted(betweenness_dict.items(),
                            key=itemgetter(1),
                            reverse=True)

print('Top 20 nodes by betweeness centrality:')
for b in sorted_betweenness[:20]:
    print(b)

sorted_eigenvector = sorted(eigenvector_dict.items(),
                            key=itemgetter(1),
예제 #33
0
def betweenness_centrality(G, nodes):
    r"""Compute betweenness centrality for nodes in a bipartite network.

    Betweenness centrality of a node `v` is the sum of the
    fraction of all-pairs shortest paths that pass through `v`. 

    Values of betweenness are normalized by the maximum possible
    value which for bipartite graphs is limited by the relative size 
    of the two node sets [1]_.

    Let `n` be the number of nodes in the node set `U` and
    `m` be the number of nodes in the node set `V`, then
    nodes in `U` are normalized by dividing by 

    .. math::

       \frac{1}{2} [m^2 (s + 1)^2 + m (s + 1)(2t - s - 1) - t (2s - t + 3)] ,

    where
    
    .. math::
        
        s = (n - 1) \div m , t = (n - 1) \mod m ,
    
    and nodes in `V` are normalized by dividing by

    .. math::    

        \frac{1}{2} [n^2 (p + 1)^2 + n (p + 1)(2r - p - 1) - r (2p - r + 3)] ,

    where,
    
    .. math::

        p = (m - 1) \div n , r = (m - 1) \mod n .

    Parameters
    ----------
    G : graph
        A bipartite graph

    nodes : list or container
        Container with all nodes in one bipartite node set.

    Returns
    -------
    betweenness : dictionary
        Dictionary keyed by node with bipartite betweenness centrality 
        as the value.

    See Also
    --------
    degree_centrality,
    closeness_centrality,
    networkx.algorithms.bipartite.basic.sets,
    networkx.algorithms.bipartite.basic.is_bipartite

    Notes
    -----
    The nodes input parameter must contain all nodes in one bipartite node set,
    but the dictionary returned contains all nodes from both node sets.

    References
    ----------
    .. [1] Borgatti, S.P. and Halgin, D. In press. "Analyzing Affiliation 
        Networks". In Carrington, P. and Scott, J. (eds) The Sage Handbook 
        of Social Network Analysis. Sage Publications.
        http://www.steveborgatti.com/papers/bhaffiliations.pdf
    """
    top = set(nodes)
    bottom = set(G) - top
    n = float(len(top))
    m = float(len(bottom))
    s = (n - 1) // m
    t = (n - 1) % m
    bet_max_top = (((m**2) * ((s + 1)**2)) + (m * (s + 1) * (2 * t - s - 1)) -
                   (t * ((2 * s) - t + 3))) / 2.0
    p = (m - 1) // n
    r = (m - 1) % n
    bet_max_bot = (((n**2) * ((p + 1)**2)) + (n * (p + 1) * (2 * r - p - 1)) -
                   (r * ((2 * p) - r + 3))) / 2.0
    betweenness = nx.betweenness_centrality(G, normalized=False, weight=None)
    for node in top:
        betweenness[node] /= bet_max_top
    for node in bottom:
        betweenness[node] /= bet_max_bot
    return betweenness
                 node_size=node_size,
                 node_color=node_color,
                 alpha=0.7,
                 with_labels=True,
                 width=edge_width,
                 edge_color='.4',
                 cmap=plt.cm.Blues)
plt.savefig("DevelopersWeightedCircular.png")

# In[14]:

#colored
import networkx as nx
G_fb = nx.read_edgelist('partnerDevelopers.txt',
                        create_using=nx.Graph(),
                        nodetype=str)
pos = nx.spring_layout(G_fb)
betCent = nx.betweenness_centrality(G_fb, normalized=True, endpoints=True)
node_color = [100 * G_fb.degree(v) for v in G_fb]
node_size = [v * 10000 for v in betCent.values()]
plt.figure(figsize=(20, 20))
nx.draw_networkx(G_fb,
                 pos=pos,
                 with_labels=True,
                 node_color=node_color,
                 node_size=node_size)

plt.savefig("DevelopersColored.png")

# In[ ]:
예제 #35
0
def Decomposition(value, timepass, q):
    global masti, n, G, mymap
    a = 0
    if (masti[value] == -1 or G.has_node(value)):
        mymap[value] = 0
        for nbr in G[value]:  #Not necessary but keeping it
            mymap[nbr] = mymap[nbr] - 1
        G.remove_node(value)
        visited = defaultdict(lambda: 0)
        for i in range(1, n + 1):
            visited[i] = False

        for i in range(1, n + 1):
            if (mymap[i] == 0 and masti[i] == -1):
                masti[i] = value
            store = defaultdict(lambda: 0)
            if ((visited[i] == False) and timepass[i] == value
                    and masti[i] == -1 and mymap[i] > 0):
                DFSUtil(i, visited, timepass, value)
                baby = 0
                for j in range(1, n + 1):
                    store[j] = -1
                for h in range(1, n + 1):
                    if (timepass[h] == -10):
                        store[h] = 1
                        baby = baby + 1
                        baby1 = h
                if (baby == 1):
                    masti[baby1] = value
                    break

                gr = nx.Graph()
                for g in range(1, n + 1):
                    if (store[g] == 1):
                        for f in range(g + 1, n + 1):
                            if (store[f] == 1):
                                for d in G[g]:
                                    if (d == f):
                                        gr.add_node(g)
                                        gr.add_node(f)
                                        gr.add_edge(g, f)
                                        gr.add_edge(f, g)
                if (gr.number_of_edges() < 4 * gr.number_of_nodes()
                        and gr.number_of_nodes() < 800):
                    pr = nx.betweenness_centrality(gr)
                elif (gr.number_of_nodes() < 2000
                      and 4 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 8))
                elif (gr.number_of_nodes() < 5000
                      and 10 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 32))
                elif (gr.number_of_nodes() < 20000
                      and 10 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 2000))
                elif (gr.number_of_nodes() < 50000
                      and 10 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 20000))
                elif (gr.number_of_nodes() < 200000
                      and 1.5 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 80000))
                elif (gr.number_of_nodes() < 600000
                      and 1.2 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 128000))
                elif (gr.number_of_nodes() < 1200000
                      and 1.1 * gr.number_of_nodes() > gr.number_of_edges()):
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 320000))
                else:
                    pr = nx.betweenness_centrality(
                        gr, k=max(1,
                                  gr.number_of_nodes() // 400000))

                nextNode = max(pr, key=pr.get)

                for m in range(1, n + 1):
                    if (timepass[m] == -10):
                        timepass[m] = nextNode
                masti[nextNode] = value
                gr.clear()
                store.clear()
                if (mymap[nextNode] > 0):
                    q.append(nextNode)
        visited.clear()
        while (q):
            if (value == 0):
                break
            aese = q[0]
            q.popleft()
            Decomposition(aese, timepass, q)
def centralityAnalysis(repo: git.Repo, commits: List[git.Commit],
                       outputDir: str):

    allRelatedAuthors = {}
    authorCommits = Counter({})

    # for all commits...
    print("Analyzing centrality")
    for commit in Bar('Processing').iter(commits):
        author = commit.author.email

        # increase author commit count
        authorCommits.update({author: 1})

        # initialize dates for related author analysis
        commitDate = datetime.fromtimestamp(commit.committed_date)
        earliestDate = commitDate + relativedelta(months=-1)
        latestDate = commitDate + relativedelta(months=+1)

        # find authors related to this commit
        #        commitRelatedCommits = commit.iter_items(
        #                repo, 'master',
        #                after=earliestDate.strftime('%Y-%m-%d'),
        #                before=latestDate.strftime('%Y-%m-%d'))

        commitRelatedCommits = filter(
            lambda c: findRelatedCommits(author, earliestDate, latestDate, c),
            commits)

        commitRelatedAuthors = set(
            list(map(lambda c: c.author.email, commitRelatedCommits)))

        # get current related authors collection and update it
        authorRelatedAuthors = allRelatedAuthors.setdefault(author, set())
        authorRelatedAuthors.update(commitRelatedAuthors)

    # prepare graph
    print("Preparing NX graph")
    G = nx.Graph()

    for author in allRelatedAuthors:
        for relatedAuthor in allRelatedAuthors[author]:
            G.add_edge(author.strip(), relatedAuthor.strip())

    # analyze graph
    closeness = dict(nx.closeness_centrality(G))
    betweenness = dict(nx.betweenness_centrality(G))
    centrality = dict(nx.degree_centrality(G))
    density = nx.density(G)
    modularity = list(greedy_modularity_communities(G))

    print("Outputting CSVs")

    # output non-tabular results
    with open(os.path.join(outputDir, 'project.csv'), 'a', newline='') as f:
        w = csv.writer(f, delimiter=',')
        w.writerow(['Density', density])
        w.writerow(['Community Count', len(modularity)])

    # output community information
    with open(os.path.join(outputDir, 'community.csv'), 'a', newline='') as f:
        w = csv.writer(f, delimiter=',')
        w.writerow(['Community Index', 'Author Count', 'Commit Count'])
        for idx, community in enumerate(modularity):
            communityCommitCount = sum(authorCommits[author]
                                       for author in community)
            w.writerow([idx + 1, len(modularity[idx]), communityCommitCount])

    # combine centrality results
    combined = {}
    for key in closeness:
        single = {
            'Author': key,
            'Closeness': closeness[key],
            'Betweenness': betweenness[key],
            'Centrality': centrality[key]
        }

        combined[key] = single

    # output tabular results
    with open(os.path.join(outputDir, 'centrality.csv'), 'w', newline='') as f:
        w = csv.DictWriter(
            f, ['Author', 'Closeness', 'Betweenness', 'Centrality'])
        w.writeheader()

        for key in combined:
            w.writerow(combined[key])

    # output graph to PNG
    print("Outputting graph to PNG")
    graphFigure = plt.figure(5, figsize=(30, 30))
    nx.draw(G,
            with_labels=True,
            node_color='orange',
            node_size=4000,
            edge_color='black',
            linewidths=2,
            font_size=20)
    graphFigure.savefig(os.path.join(outputDir, 'graph.png'))
예제 #37
0
def main(simulated_time):

    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    """
    TOPOLOGY from a json
    """

    t = Topology()
    t.G = nx.read_graphml("Euclidean.graphml")

    ls = list(t.G.nodes)
    li = {x: int(x) for x in ls}
    nx.relabel_nodes(t.G, li, False)  #Transform str-labels to int-labels

    print "Nodes: %i" % len(t.G.nodes())
    print "Edges: %i" % len(t.G.edges())
    #MANDATORY fields of a link
    # Default values =  {"BW": 1, "PR": 1}
    valuesOne = dict(itertools.izip(t.G.edges(), np.ones(len(t.G.edges()))))

    nx.set_edge_attributes(t.G, name='BW', values=valuesOne)
    nx.set_edge_attributes(t.G, name='PR', values=valuesOne)

    centrality = nx.betweenness_centrality(t.G)
    nx.set_node_attributes(t.G, name="centrality", values=centrality)

    sorted_clustMeasure = sorted(centrality.items(),
                                 key=operator.itemgetter(1),
                                 reverse=True)

    top20_devices = sorted_clustMeasure[0:20]
    main_fog_device = copy.copy(top20_devices[0][0])

    # df = pd.read_csv("pos_network.csv")
    # pos = {}
    # for r in df.iterrows():
    #     lat = r[1].x
    #     lng = r[1].y
    #     pos[r[0]] = (lat, lng)

    # fig = plt.figure(figsize=(10, 8), dpi=100)
    # nx.draw(t.G, with_labels=True,pos=pos,node_size=60,node_color="orange", font_size=8)
    # plt.savefig('labels.png')
    # exit()

    print "-" * 20
    print "Best top centralized device: ", main_fog_device
    print "-" * 20
    """
    APPLICATION
    """
    app1 = create_application("app1")
    """
    PLACEMENT algorithm
    """
    #There are not modules to place.
    placement = NoPlacementOfModules("NoPlacement")
    """
    POPULATION algorithm
    """
    number_generators = int(len(t.G) * 0.1)
    print "Number of generators %i" % number_generators

    #you can use whatever funciton to change the topology
    dStart = deterministicDistributionStartPoint(500,
                                                 400,
                                                 name="Deterministic")
    pop = Population_Move(name="mttf-nodes",
                          srcs=number_generators,
                          node_dst=main_fog_device,
                          activation_dist=dStart)
    pop.set_sink_control({
        "id": main_fog_device,
        "number": number_generators,
        "module": app1.get_sink_modules()
    })

    dDistribution = deterministicDistribution(name="Deterministic", time=100)
    pop.set_src_control({
        "number": 1,
        "message": app1.get_message("M.Action"),
        "distribution": dDistribution
    })

    #In addition, a source includes a distribution function:
    """--
    SELECTOR algorithm
    """
    selectorPath = CloudPath_RR()
    """
    SIMULATION ENGINE
    """
    s = Sim(t, default_results_path="Results_%s" % (simulated_time))
    s.deploy_app(app1, placement, pop, selectorPath)

    s.run(simulated_time,
          test_initial_deploy=False,
          show_progress_monitor=False)

    # s.draw_allocated_topology() # for debugging
    s.print_debug_assignaments()
import networkx as nx
import json
G = nx.Graph()
fp = open("Cit-HepPh - Copy.txt", 'r')
fp.readline()
fp.readline()
fp.readline()
fp.readline()
while True:
    line = fp.readline()
    if not line:
        break
    tk = line.split('\t')
    G.add_edge(int(tk[0]), int(tk[1]))
#dg=nx.degree_centrality(G)
#cc=nx.closeness_centrality(G, normalized=True)
bc = nx.betweenness_centrality(G,
                               k=None,
                               normalized=True,
                               weight=None,
                               endpoints=False,
                               seed=None)
#ec=nx.edge_betweenness_centrality(G, normalized=True, weight=None)
#eg=nx.eigenvector_centrality_numpy(G)
#json.dump(dg,open("degree_centrality.txt",'w'))
#json.dump(cc,open("closeness.txt",'w'))
json.dump(bc, open("betweeness.txt", 'w'))
#json.dump(ec,open("edge_betweeness.txt",'w'))
#json.dump(eg,open("eigenvector.txt",'w'))
fp.close()
def get_centralities(compare):

    params = [5000, 2000, 1000, 500, 100, 50, 40, 30, 20, 10, 5, 4, 3, 2, 1, 0]
    #[300000, 150000, 100000, 50000, 35000, 20000, 14000, 10000, 5000, 2000, 1000, 500, 100, 50, 30, 20, 10, 5, 1]
    folderout = 'networks/backboning_centralities/'
    if not os.path.exists(folderout):
        os.makedirs(folderout)

    time_nx = []
    time_ig = []
    ftimes = open(folderout + 'compare_comp_time.dat', 'w')

    ftimes.write('nc\tt_nx\tt_ig\n')

    for nc in params:
        ''' NETWORKX '''

        edges_nx = []
        t1 = time.time()

        print 'Parse edges'
        for ind, line in enumerate(
                open('networks/backboning/nc_backboned_' + str(nc))):
            if 'nij' not in line:
                e1, e2, w, sign = line.strip().split('\t')
                edges_nx.append((e1, e2, {'weight': float(w)}))

        G_nx = nx.Graph()
        G_nx.add_edges_from(edges_nx)
        GC_nx = [
            c for c in sorted(
                nx.connected_components(G_nx), key=len, reverse=True)
        ][0]

        print nc, '\tGet NC degrees'
        degrees_nx = add_df_meas(nx.degree_centrality(G_nx), 'degree_nx')

        print nc, '\tGet NC clustering'
        clusterings_nx = add_df_meas(nx.clustering(G_nx), 'clustering_nx')

        print nc, '\tGet NC pageranks'
        pageranks_nx = add_df_meas(nx.pagerank(G_nx), 'pagerank_nx')

        print nc, '\tGet NC betweenness'
        betweennesses_nx = add_df_meas(nx.betweenness_centrality(G_nx),
                                       'betweenness_nx')

        print nc, '\tGet NC closeness'
        closenesses_nx = add_df_meas(nx.closeness_centrality(G_nx),
                                     'closeness_nx')

        #print 'Get eigenvector'
        #eigenvectors_nx   = add_df_meas(nx.eigenvector_centrality(G_nx), 'eigenvector_mx')

        print nc, '\tGet NC constraint'
        constraints_nx = add_df_meas(nx.constraint(G_nx), 'constraint_nx')

        df_nx = degrees_nx.merge(clusterings_nx,
                                 left_index=True,
                                 right_index=True)
        df_nx = df_nx.merge(pageranks_nx, left_index=True, right_index=True)
        df_nx = df_nx.merge(betweennesses_nx,
                            left_index=True,
                            right_index=True)
        df_nx = df_nx.merge(closenesses_nx, left_index=True, right_index=True)
        df_nx = df_nx.merge(constraints_nx, left_index=True, right_index=True)

        t2 = time.time()
        t_nx = t2 - t1
        time_nx.append(t_nx)

        print 'Time for NX:  ', round(t_nx, 2), ' s'
        ''' IGRAPH '''

        # get the igraph network
        t1 = time.time()
        ftempname = 'tempfile_nc_backboned' + str(nc)
        ftemp = open(ftempname, 'w')
        for line in open('networks/backboning/nc_backboned_' + str(nc)):
            if 'src' not in line:
                ftemp.write('\t'.join(line.strip().split('\t')[0:3]) + '\n')
        ftemp.close()
        G_ig = Graph.Read_Ncol(ftempname, weights=True, directed=False)
        os.remove(ftempname)

        # get degree thats matching
        # nw computes degree centrality, which is the k/(N-1), while ig computes k
        # https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.algorithms.centrality.degree_centrality.html
        print '\n', nc, '\tGet IG degrees'
        degrees_ig = {}
        G_ig.vs['degree_ig'] = G_ig.degree()
        N = len(G_ig.vs['degree_ig'])
        for v in G_ig.vs():
            degrees_ig[v['name']] = v['degree_ig'] / float(N - 1)

        # get the matching clustering
        # when nw gives 0 for clustering, ig gives nan
        print nc, '\tGet IG clustering'
        clusterings_ig = {}
        G_ig.vs['clustering_ig'] = G_ig.transitivity_local_undirected(
            weights=None)
        for v in G_ig.vs():
            if np.isnan(v['clustering_ig']):
                v['clustering_ig'] = 0
            clusterings_ig[v['name']] = v['clustering_ig']

        # match betweenness
        # nx gives the normalzed betweenness, while igraph gives the raw value. normalization vactor is
        # Bnorm = =  (n*n-3*n+2) / 2.0                      http://igraph.org/r/doc/betweenness.html
        print nc, '\tGet IG betweenness'
        G_ig.vs['betweenness_ig'] = G_ig.betweenness(weights=None)
        betweennesses_ig = {}
        n = len(G_ig.vs())
        for v in G_ig.vs():
            Bnormalizer = (n * n - 3 * n + 2) / 2.0
            betweennesses_ig[v['name']] = v['betweenness_ig'] / Bnormalizer

        # comparing closeness:
        # NX: If the graph is not completely connected, this algorithm computes the closeness centrality for each connected part separately.
        #    https://networkx.github.io/documentation/networkx-1.10/reference/generated/networkx.algorithms.centrality.closeness_centrality.html
        # IG: If the graph is not connected, and there is no path between two vertices, the number of vertices is used instead the length of the geodesic. This is always longer than the longest possible geodesic.
        # http://igraph.org/python/doc/igraph.GraphBase-class.html#closeness
        print nc, '\tGet IG closeness'
        closenesses_ig = {}
        G_ig.vs['closeness_ig'] = G_ig.closeness(weights=None,
                                                 normalized=False)
        for v in G_ig.vs():
            closenesses_ig[v['name']] = v['closeness_ig']

        # get matching pagerank values
        # they match, besides some numerical things
        print nc, '\tGet IG pageranks'
        pageranks_ig = {}
        G_ig.vs['pagerank_ig'] = G_ig.pagerank(weights=None)
        for v in G_ig.vs():
            pageranks_ig[v['name']] = v['pagerank_ig']

        # constrains match well
        print nc, '\tGet IG constraint'
        constraints_ig = {}
        G_ig.vs['constraint_ig'] = G_ig.constraint(weights=None)
        for v in G_ig.vs():
            constraints_ig[v['name']] = v['constraint_ig']

        # G_ig.vs['eigenvector_ig']  = G_ig.eigenvector_centrality( weights = None )

        degrees_ig = add_df_meas(degrees_ig, 'degree_ig')
        clusterings_ig = add_df_meas(clusterings_ig, 'clustering_ig')
        betweennesses_ig = add_df_meas(betweennesses_ig, 'betweennesse_ig')
        pageranks_ig = add_df_meas(pageranks_ig, 'pagerank_ig')
        constraints_ig = add_df_meas(constraints_ig, 'constraint_ig')
        closenesses_ig = add_df_meas(closenesses_ig, 'closenesse_ig')

        df_ig = degrees_ig.merge(clusterings_ig,
                                 left_index=True,
                                 right_index=True)
        df_ig = df_ig.merge(pageranks_ig, left_index=True, right_index=True)
        df_ig = df_ig.merge(betweennesses_ig,
                            left_index=True,
                            right_index=True)
        df_ig = df_ig.merge(closenesses_ig, left_index=True, right_index=True)
        df_ig = df_ig.merge(constraints_ig, left_index=True, right_index=True)

        t2 = time.time()
        t_ig = t2 - t1
        time_nx.append(t_ig)

        print 'Time for IG:  ', round(t_ig, 2), ' s\n\n'

        df_nx.to_csv(folderout + 'nc_backboned_centralities_NX_' + str(nc),
                     na_rep='nan')
        df_ig.to_csv(folderout + 'nc_backboned_centralities_IG_' + str(nc),
                     na_rep='nan')

        if compare:
            compare('degree    ', dict(degrees_nx.degree_nx), degrees_ig,
                    GC_nx)
            compare('clustering', dict(clusterings_nx.clustering_nx),
                    clusterings_ig, GC_nx)
            compare('pagerank   ', dict(pageranks_nx.pagerank_nx),
                    pageranks_ig, GC_nx)
            compare('betweenness', dict(betweennesses_nx.betweenness_nx),
                    betweennesses_ig, GC_nx)
            compare('closeness', dict(closenesses_nx.closeness_nx),
                    closenesses_ig, GC_nx)
            compare('constraint', dict(constraints_nx.constraint_nx),
                    constraints_ig, GC_nx)

        ftimes.write(str(nc) + '\t' + str(t_nx) + '\t' + str(t_ig) + '\n')
    ftimes.close()
예제 #40
0
 def betweenness_centrality(self):
     self.betweenness_centrality_dict = nx.betweenness_centrality(self.G)
예제 #41
0
def get_center(graph: nx.Graph) -> Hashable:
    centralities = nx.betweenness_centrality(graph)
    return max(centralities, key=centralities.get)
G = nx.Graph()
G.add_edges_from([(1, 2), (3, 11), (4, 5), (5, 6), (5, 7), (5, 8), (5, 9),
                  (5, 10), (10, 11), (10, 13), (11, 13), (12, 14), (12, 15),
                  (13, 14), (13, 15), (13, 16), (13, 17), (14, 15), (14, 16),
                  (15, 16)])

dict_degree = {}
dict_closeness = {}
dict_beweeness = {}
dict_coreness = {}

for each in G.nodes():
    dict_degree[each] = G.degree(each)
    dict_closeness[each] = nx.closeness_centrality(G, each)
    dict_beweeness[each] = nx.betweenness_centrality(G, each)
    dict_coreness[each] = nx.core_number(G)[each]

dict_cascade = {}  #holds cascading power of nodes

for each in G.nodes():
    c = []
    for num in range(
            0, 1000
    ):  #cascade is random thus we average out total number of infected people for 1000 iteration
        seed = [each]
        i = independentcascade(G, seed)
        c.append(len(i))
    dict_cascade[each] = numpy.average(c)
sorted_dict_cascade = sorted(dict_cascade, key=dict_cascade.get, reverse=True)
sorted_dict_deg = sorted(dict_degree, key=dict_degree.get, reverse=True)
def get_graph_properties(edges):
    # Set up graph
    connections = np.array([int(x) for x in edges.split(';')])

    nodes = sorted(list(set(connections)))
    # Calculate Properties
    properties = []
    timings = {}

    if connections[0] > 0:
        edges = connections.reshape(int(connections.size / 2), 2)
        timeS = time.time()

        # directed graph
        G = nx.DiGraph()
        G.add_edges_from(edges)

        # undirected graph
        U = nx.Graph()
        U.add_edges_from(edges)
        # graph generated

        # property 1: number of components
        num_comp = nx.number_connected_components(U)
        properties.append(num_comp)

        # property 2: number of strongly connected components
        num_strong_comp = nx.number_strongly_connected_components(G)
        properties.append(num_strong_comp)

        # property 3: average in/out degree
        indeg = []
        outdeg = []
        indeg_ls = list(G.in_degree())
        outdeg_ls = list(G.out_degree())

        for x in np.arange(len(nodes)):
            indeg.append(indeg_ls[x][1])
            outdeg.append(outdeg_ls[x][1])
        av_deg = np.mean(indeg)
        properties.append(av_deg)

        # property 4: link density
        linkden = connections.size / (len(nodes) * len(nodes))
        properties.append(linkden)

        # property 5: number of self loops
        numloop = list(G.selfloop_edges())
        numloop = len(numloop)
        properties.append(numloop)
        #       # property 6: number of simple cycles (excluding self loops)
        #       numcyc = list(nx.simple_cycles(G))
        #       numcyc = len(numcyc) - numloop
        #       properties.append(numcyc)

        #       timings.update({'p6':time.time()-timeS})
        #       print('p6')
        #       print(timings['p6'])
        #       timeS = time.time()

        # find all components
        components = list(nx.connected_components(U))

        ischain = [None] * len(components)
        istree = [None] * len(components)
        isdag = [None] * len(components)
        unicel = [None] * len(components)
        isscc = [None] * len(components)
        iscyc = [None] * len(components)
        iseul = [None] * len(components)
        indeg_by_comp = []
        outdeg_by_comp = []
        node_conn = [0] * len(components)
        av_clust = [0.] * len(components)
        assort = [0.] * len(components)
        indeg_cen_av = [0.] * len(components)
        indeg_cen_max = [0.] * len(components)
        indeg_cen_min = [0.] * len(components)
        outdeg_cen_av = [0.] * len(components)
        outdeg_cen_max = [0.] * len(components)
        outdeg_cen_min = [0.] * len(components)
        bet_cen_av = [0.] * len(components)
        bet_cen_max = [0.] * len(components)
        bet_cen_min = [0.] * len(components)
        eig_cen_av = [0.] * len(components)
        eig_cen_max = [0.] * len(components)
        eig_cen_min = [0.] * len(components)
        triangles_av = [0.] * len(components)
        triangles_max = [0.] * len(components)
        triangles_min = [0.] * len(components)
        squares_av = [0.] * len(components)
        squares_max = [0.] * len(components)
        squares_min = [0.] * len(components)
        transitivity = [0.] * len(components)
        rc = [0.] * len(components)
        loopnumber = [0] * len(components)

        for compnum in np.arange(len(components)):
            # property 6: ischain?(remove self-loops and then test this property)
            # want: how many chains does the graph contain.. look at each component, not the whole graph in one go.
            # most graphs are single components.
            G1 = G.subgraph(list(components[compnum]))
            Gnoself = G1.copy()
            Gnoself.remove_edges_from(Gnoself.selfloop_edges())
            Unoself = nx.Graph()
            Unoself.add_edges_from(Gnoself.edges)

            # if all in and out degrees are 1, graph is a chain..do not include in trees
            indeg2 = []
            outdeg2 = []
            indeg_ls2 = list(Gnoself.in_degree())
            outdeg_ls2 = list(Gnoself.out_degree())
            # nx gives indeg and outdeg as tuples (nodename, in/out deg). which is why i need the for loop below
            for x in np.arange(len(G1.nodes())):
                indeg2.append(indeg_ls2[x][1])
                outdeg2.append(outdeg_ls2[x][1])
            indeg_by_comp.append(int_arr_to_str(indeg2, delim=';'))
            outdeg_by_comp.append(int_arr_to_str(outdeg2, delim=';'))

            indeg2 = np.array(indeg2)
            outdeg2 = np.array(outdeg2)
            in_min_out = indeg2 - outdeg2
            ischain[compnum] = int((np.sum(in_min_out) == 0)
                                   & (np.sum(np.abs(in_min_out)) == 2)
                                   & (np.all(indeg2 <= 1))
                                   & (np.all(outdeg2 <= 1)))
            # property 7: istree(remove chains first)
            istree[compnum] = int((nx.is_tree(Gnoself) - ischain[compnum]) > 0)
            # property 8: isdag(only looking at DAGs other than trees and chains)
            isdag[compnum] = int((int(nx.is_directed_acyclic_graph(Gnoself)) -
                                  istree[compnum] - ischain[compnum]) > 0)
            if isdag[compnum] > 0:
                loopnumber[compnum] = len(list(
                    Gnoself.edges)) - (len(list(Gnoself.nodes)) - 1)
            # property 9: single celled
            unicel[compnum] = int(len(Gnoself.nodes) == 1)
            istree[compnum] = int(istree[compnum]) - int(
                unicel[compnum]
            )  # nx counts single node with no self-edge as a tree
            # property 10: isscc (excluding unicellular)
            num_strong_comp2 = nx.number_strongly_connected_components(Gnoself)
            isscc[compnum] = int(num_strong_comp2 == 1)
            isscc[compnum] = int((isscc[compnum] - unicel[compnum]) > 0)
            # property 11: iscyc(cyclic graphs other than those with a single scc and single celled graphs)
            iscyc[compnum] = int((isdag[compnum] + istree[compnum] +
                                  ischain[compnum] + isscc[compnum] +
                                  unicel[compnum]) == 0)
            # property 12: is eulerian
            iseul[compnum] = int(nx.is_eulerian(Gnoself))
            # property 13: node connectivity
            node_conn[compnum] = approx.node_connectivity(Gnoself)
            # property 14: clustering coefficient
            av_clust[compnum] = nx.average_clustering(Gnoself)
            # property 15: assortativity(pearson's coefficient)
            try:
                assort[compnum] = nx.degree_pearson_correlation_coefficient(
                    Gnoself)  #####################check
            except:
                assort[compnum] = 0.0
            # property 16,17,18: in degree centrality (average, maximum and minimum)
            indeg_cen = []
            dict1 = nx.in_degree_centrality(Gnoself)
            for a1 in dict1:
                indeg_cen.append(dict1[a1])
            indeg_cen_av[compnum] = np.average(indeg_cen)
            indeg_cen_max[compnum] = max(indeg_cen)
            indeg_cen_min[compnum] = min(indeg_cen)
            # property 19,20,21: out degree centrality (average, maximum, minimum)
            outdeg_cen = []
            dict1 = nx.out_degree_centrality(Gnoself)
            for a1 in dict1:
                outdeg_cen.append(dict1[a1])
            outdeg_cen_av[compnum] = np.average(outdeg_cen)
            outdeg_cen_max[compnum] = max(outdeg_cen)
            outdeg_cen_min[compnum] = min(outdeg_cen)
            # property 22,23,24: betweenness centrality (average,maximum, minimum)
            bet_cen = []
            dict1 = nx.betweenness_centrality(Gnoself)
            for a1 in dict1:
                bet_cen.append(dict1[a1])
            bet_cen_av[compnum] = np.average(bet_cen)
            bet_cen_max[compnum] = max(bet_cen)
            bet_cen_min[compnum] = min(bet_cen)
            # property 25,26,27: eigen vector centrality (average,maximum, minimum)
            eig_cen = []
            try:
                dict1 = nx.eigenvector_centrality(Gnoself)
                for a1 in dict1:
                    eig_cen.append(dict1[a1])
                eig_cen_av[compnum] = np.average(eig_cen)
                eig_cen_max[compnum] = max(eig_cen)
                eig_cen_min[compnum] = min(eig_cen)
            except nx.PowerIterationFailedConvergence:
                pass
            # property 28,29,30: number of triangles for each node (average,maximum, minimum)
            triangles = []
            dict1 = nx.triangles(Unoself)
            for a1 in dict1:
                triangles.append(dict1[a1])
            if len(triangles):
                triangles_av[compnum] = np.average(triangles)
                triangles_max[compnum] = max(triangles)
                triangles_min[compnum] = min(triangles)
            # property 31: transitivity (fraction of all possible triangles present in the graph)
            transitivity[compnum] = nx.transitivity(Gnoself)
            # property 32,33,34: square clustering for each node(fraction of all possible squares present at a node)
            squares = []
            dict1 = nx.square_clustering(Gnoself)
            for a1 in dict1:
                squares.append(dict1[a1])
            if len(squares):
                squares_av[compnum] = np.average(squares)
                squares_max[compnum] = max(squares)
                squares_min[compnum] = min(squares)
            # propery 35: rich club coefficient
            if len(list(Unoself.nodes())) > 3:
                rc[compnum] = 0.0


#               rc[compnum] = nx.rich_club_coefficient(Unoself).values()# only works if graph has 4 or more edges
# property 36 and 37: number of source and target nodes

        iseul = sum(iseul)
        iscyc = sum(iscyc)
        isscc = sum(isscc)
        unicel = sum(unicel)
        isdag = sum(isdag)
        istree = sum(istree)
        ischain = sum(ischain)
        indeg_by_comp = ';'.join([str(x) for x in indeg_by_comp])
        outdeg_by_comp = ';'.join([str(x) for x in outdeg_by_comp])
        node_conn = ';'.join([str(x) for x in node_conn
                              ])  # node connectivity for each component
        avav_clust = np.average(
            av_clust)  # average clustering coefficient over all components
        av_clust = ';'.join([
            str(round(x, 2)) for x in av_clust
        ])  # average clustering coefficients for each component
        av_assort = np.average(
            assort)  # average assortativity over all components
        assort = ';'.join([str(round(x, 2)) for x in assort
                           ])  # assortativity for each component
        indeg_cen_avav = np.average(
            indeg_cen_av)  # average indeg centrality over all components
        indeg_cen_av = ';'.join([
            str(round(x, 2)) for x in indeg_cen_av
        ])  # average indeg centrality for each component
        indeg_cen_maxmax = max(
            indeg_cen_max)  # maximum indeg centrality across all components
        indeg_cen_max = ';'.join([
            str(round(x, 2)) for x in indeg_cen_max
        ])  # maximum indeg centrality for each component
        indeg_cen_minmin = min(
            indeg_cen_min)  # minimum indeg centrality across all components
        indeg_cen_min = ';'.join([
            str(round(x, 2)) for x in indeg_cen_min
        ])  # minimum indeg centrality for each component

        outdeg_cen_avav = np.average(outdeg_cen_av)
        outdeg_cen_av = ';'.join([str(round(x, 2)) for x in outdeg_cen_av])
        outdeg_cen_maxmax = max(outdeg_cen_max)
        outdeg_cen_max = ';'.join([str(round(x, 2)) for x in outdeg_cen_max])
        outdeg_cen_minmin = min(outdeg_cen_min)
        outdeg_cen_min = ';'.join([str(round(x, 2)) for x in outdeg_cen_min])
        bet_cen_avav = np.average(bet_cen_av)
        bet_cen_av = ';'.join([str(round(x, 2)) for x in bet_cen_av])
        bet_cen_maxmax = max(bet_cen_max)
        bet_cen_max = ';'.join([str(round(x, 2)) for x in bet_cen_max])
        bet_cen_minmin = min(bet_cen_min)
        bet_cen_min = ';'.join([str(round(x, 2)) for x in bet_cen_min])
        eig_cen_avav = np.average(eig_cen_av)
        eig_cen_av = ';'.join([str(round(x, 2)) for x in eig_cen_av])
        eig_cen_maxmax = max(eig_cen_max)
        eig_cen_max = ';'.join([str(round(x, 2)) for x in eig_cen_max])
        eig_cen_minmin = min(eig_cen_min)
        eig_cen_min = ';'.join([str(round(x, 2)) for x in eig_cen_min])
        triangles_avav = np.average(triangles_av)
        triangles_av = ';'.join([str(x) for x in triangles_av])
        triangles_maxmax = max(triangles_max)
        triangles_max = ';'.join([str(x) for x in triangles_max])
        triangles_minmin = min(triangles_min)
        triangles_min = ';'.join([str(x) for x in triangles_min])
        transitivity_av = np.average(transitivity)
        transitivity_max = max(transitivity)
        transitivity_min = min(transitivity)
        transitivity = ';'.join([str(x) for x in transitivity])
        squares_avav = np.average(squares_av)
        squares_maxmax = max(squares_max)
        squares_minmin = min(squares_min)
        squares_av = ';'.join([str(x) for x in squares_av])
        squares_max = ';'.join([str(x) for x in squares_max])
        squares_min = ';'.join([str(x) for x in squares_min])
        rc_av = np.average(rc)
        rc_max = max(rc)
        rc_min = min(rc)
        rc = ';'.join([str(x) for x in rc])
        ln = [loopnumber[x] for x in np.nonzero(loopnumber)[0]]
        if any(ln):
            loopnumber_av = np.average(ln)
        else:
            loopnumber_av = 0.0
        loopnumber = ';'.join([str(x) for x in loopnumber])

        # check.. sum of iscyc, isscc, unicel, dag,tree, chain should be the total number of components
        if num_comp != (iscyc + isscc + unicel + isdag + istree + ischain):
            print('Number of components is wrong!!!!!!')
            print(num_comp)
            print([iscyc, isscc, unicel, isdag, istree, ischain])
            sys.exit()

        properties.append(indeg_by_comp)  # string
        properties.append(outdeg_by_comp)  #string
        properties.append(ischain)  #int
        properties.append(istree)  #int
        properties.append(isdag)  #int
        properties.append(unicel)  #int
        properties.append(isscc)  #int
        properties.append(iscyc)  #int
        properties.append(iseul)  #int
        properties.append(loopnumber_av)  #float
        properties.append(loopnumber)  #string
        properties.append(node_conn)  #string
        properties.append(avav_clust)  #float
        properties.append(av_clust)  #string
        properties.append(av_assort)  #float
        properties.append(assort)  #string
        properties.append(indeg_cen_avav)  #float
        properties.append(indeg_cen_av)  #string
        properties.append(indeg_cen_maxmax)  #float
        properties.append(indeg_cen_max)  #string
        properties.append(indeg_cen_minmin)  #float
        properties.append(indeg_cen_min)  #string
        properties.append(outdeg_cen_avav)  #float
        properties.append(outdeg_cen_av)  #string
        properties.append(outdeg_cen_maxmax)  #float
        properties.append(outdeg_cen_max)  #string
        properties.append(outdeg_cen_minmin)  #float
        properties.append(outdeg_cen_min)  #string
        properties.append(bet_cen_avav)  #float
        properties.append(bet_cen_av)  #string
        properties.append(bet_cen_maxmax)  #float
        properties.append(bet_cen_max)  #string
        properties.append(bet_cen_minmin)  #float
        properties.append(bet_cen_min)  #string
        properties.append(eig_cen_avav)  #float
        properties.append(eig_cen_av)  #string
        properties.append(eig_cen_maxmax)  #float
        properties.append(eig_cen_max)  #string
        properties.append(eig_cen_minmin)  #float
        properties.append(eig_cen_min)  #string
        properties.append(triangles_avav)  #float
        properties.append(triangles_av)  #string
        properties.append(triangles_maxmax)  #float
        properties.append(triangles_max)  #string
        properties.append(triangles_minmin)  #float
        properties.append(triangles_min)  #string
        properties.append(transitivity_av)  # float
        properties.append(transitivity_max)  #float
        properties.append(transitivity_min)  #float
        properties.append(transitivity)  #string
        properties.append(squares_avav)  #float
        properties.append(squares_av)  #string
        properties.append(squares_maxmax)  #float
        properties.append(squares_max)  #string
        properties.append(squares_minmin)  #float
        properties.append(squares_min)  #string
        properties.append(rc_av)  # float
        properties.append(rc_max)  #float
        properties.append(rc_min)  #float
        properties.append(rc)  #string

        # append more properties.....
        # property 14:

        # property x: in-degree sequence
        #indeg = # list(G.in_degree())[iterate over number of nodes][1]
        # property y: out-degree sequence
        #outdeg = # list(G.in_degree())[iterate over number of nodes][1]
        #.....
    else:
        properties = [0] * 2 + [0.] * 2 + [0] + [''] * 2 + [0] * 7 + [
            0.
        ] + [''] * 2 + [0., ''] * 17 + [0.] * 3 + [''] + [0., ''] * 3 + [
            0., 0., 0., ''
        ]

    # return list of properties
    return properties
예제 #44
0
time.sleep(10)

########## MEDIDAS DE CENTRALIDADE ##########

print(colored('\n \n ##########  MEDIDAS DE CENTRALIDADE ##########', 'red'))

time.sleep(3)

# Degree centrality (o número de conecções de um nodo para todos os outros)
print(colored('\n \n CENTRALIDADE DO GRAU \n', 'red'))
time.sleep(3)
print(net.degree_centrality(g))
time.sleep(0.5)

# Eigenvector centrality (o quão importante é um nodo em função de quão bem conectado está)
print(colored('\n \n EIGENVECTOR CENTRALITY \n', 'red'))
time.sleep(3)
print(net.eigenvector_centrality(g))
time.sleep(0.5)

# Closeness centrality (importância de um nodo em função da sua proximidade com os outros da rede)
print(colored('\n \n CLOSENESS CENTRALITY \n', 'red'))
time.sleep(3)
print(net.closeness_centrality(g))
time.sleep(0.5)

# Betweeness centrality (quantifica quantas vezes um nodo aparece nos caminhos mais curtos entre dois nodos)
print(colored('\n \n BETWEENESS CENTRALITY \n', 'red'))
time.sleep(3)
print(net.betweenness_centrality(g))
예제 #45
0
def graphAnalysis():

    if not request.headers['tga-key'] or request.headers['tga-key'] != tga_key:
        return jsonify({
            'message': 'Not authorized for twina graph api.'
        })

    body = request.get_json()

    graph = http_client.get(fb_db_base_url + body['graph_path'] + ".json")

    graph = graph.json()

    G = nx.Graph()

    G.add_nodes_from([(screen_name, graph['nodes'][screen_name])
                      for screen_name in graph['nodes']])

    G.add_edges_from([
        (
            graph['edges'][source_target]['source'],
            graph['edges'][source_target]['target'],
            graph['edges'][source_target]
        )
        for source_target in graph['edges']
    ])

    analysis = {
        # Single-Result
        'number_of_nodes': G.number_of_nodes(),
        'number_of_edges': G.number_of_edges(),
        'average_clustering': nx.average_clustering(G),

        # Nodes Analysis
        'clustering': nx.clustering(G),
        'square_clustering': nx.square_clustering(G),
        'degree_centrality': nx.degree_centrality(G),
        'closeness_centrality': nx.closeness_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
    }

    for nodes_analysis in [
        'clustering',
        'square_clustering',
        'degree_centrality',
        'closeness_centrality',
        'betweenness_centrality'
    ]:
        print(analysis[nodes_analysis].keys())
        for node in analysis[nodes_analysis].keys():
            if 'analysis' not in graph['nodes'][node].keys():
                graph['nodes'][node]['analysis'] = {}

            graph['nodes'][node]['analysis'][nodes_analysis] = analysis[nodes_analysis][node]

    try:
        # post analysis
        http_client.put(fb_db_base_url +
                        body['analysis_path'] + ".json", data=json.dumps(analysis))

        # modify graph with analysis
        http_client.put(fb_db_base_url +
                        body['graph_path'] + ".json", data=json.dumps({
                            'nodes': graph['nodes'],
                            'edges': graph['edges']
                        }))

    except Exception as e:
        current_app.logger.error('Failed to post analysis: ' + str(e))

    return jsonify({
        'message': 'Graph analyzed',
        # 'data': analysis,
    })
예제 #46
0
def computeComponent(size):
    randomCompMean = []
    degreeCompMean = []
    closenessCompMean = []
    betweennessCompMean = []

    for r in range(1, 2):
        print("step: ", r)
        graph = nx.erdos_renyi_graph(1000, 0.1)
        graph1 = graph.copy()
        graph2 = graph.copy()
        graph3 = graph.copy()

        # Remove high-degree nodes,  create a list of conneted_components lenght (attack)
        listDegree = [
            x[0] for x in sorted(
                dict(graph.degree()).items(), reverse=True, key=lambda x: x[1])
        ]
        degreeCompMean.append(connectedComponentsList(graph, listDegree, size))
        print("degree")

        # Remove random sample nodes, create a list of connected_components lenght
        listRandomNodes = random.sample(graph1.nodes(), len(graph1.nodes()))
        randomCompMean.append(
            connectedComponentsList(graph1, listRandomNodes, size))
        print("random")
        # Remove high closeness_centrality nodes, create a list of connected_components lenght
        listClosenessCentrality = [
            x[0] for x in sorted(dict(nx.closeness_centrality(graph2)).items(),
                                 reverse=True,
                                 key=lambda x: x[1])
        ]
        closenessCompMean.append(
            connectedComponentsList(graph2, listClosenessCentrality, size))
        # Remove high betweeness_centrality nodes, create a list of connected_components lenght
        listBetweennessCentrality = [
            x[0]
            for x in sorted(dict(nx.betweenness_centrality(graph3)).items(),
                            reverse=True,
                            key=lambda x: x[1])
        ]
        betweennessCompMean.append(
            connectedComponentsList(graph3, listBetweennessCentrality, size))
        #print("bet")
    degreeCompMean = computeMean(degreeCompMean)
    randomCompMean = computeMean(randomCompMean)
    closenessCompMean = computeMean(closenessCompMean)
    betweennessCompMean = computeMean(betweennessCompMean)

    # plotting:
    x = [x * 0.01 for x in range(1, 100)]
    print("x: ", x)
    degree, = plt.plot(x, degreeCompMean, label="degree (attack)", color='b')
    randoms, = plt.plot(x, randomCompMean, label="random", color='y')
    closeness, = plt.plot(x, closenessCompMean, label="closeness", color='g')
    betweeness = plt.plot(x,
                          betweennessCompMean,
                          label="betweenness",
                          color='r')

    # drawing legend and titles:
    legend = plt.legend(bbox_to_anchor=(0.96, 0.94),
                        loc="upper right",
                        borderaxespad=0.)
    plt.gca().add_artist(legend)
    #plt.title("Robustness of networks" + "\n" + "Watts, N = 1000, k = 8")
    plt.xlabel("Removed nodes")
    plt.ylabel("connected components coefficient")
    plt.savefig("ER_robustness1.jpg")
예제 #47
0
def complex_network_mapping(graph):
    """
    Compute the vectorial mapping of a graph based on the computation of
    several complex-network analysis indexes.
    """
    vect = []

    n = nx.number_of_nodes(graph)
    e = nx.number_of_edges(graph)
    print n, e

#    adj = nx.adjacency_matrix(graph).toarray()
#    adj_bin = np.where(adj > 0, 1., 0.)
#    adj_conn = 1 - adj
    adj_bin = nx.adjacency_matrix(graph).toarray()
    adj_bin = np.array(adj_bin, dtype=np.float)

    # Node Betweenness binary
    bt_bin = nx.betweenness_centrality(graph).values()
    avg_btb = np.mean(bt_bin)
    vect.append(avg_btb)

    # Edge betweenness
    ebt = np.array(nx.edge_betweenness_centrality(graph).values())
    vect.append(np.mean(ebt))

    # Eigen vector centrality binary
    evc_bin = eigenvector_centrality_und(adj_bin)
    avg_evcb = np.mean(evc_bin)
    vect.append(avg_evcb)

    # Flow coefficient
    _, flow_bin, _ = flow_coef_bd(adj_bin)
    avg_flow = np.mean(flow_bin)
    vect.append(avg_flow)

    # Kcoreness centrality
    kcor_bin, _ = kcoreness_centrality_bu(adj_bin)
    avg_kcor = np.mean(kcor_bin)
    vect.append(avg_kcor)

    # Degree assortivity
    dac = nx.degree_assortativity_coefficient(graph)
    vect.append(dac)

    # Page rank centrality
#    pgr_wei = pagerank_centrality(adj_bin, d=0.85)
#    avg_pgr = np.mean(pgr_wei)
#    vect.append(avg_pgr)

    # Rich club coefficient
#    rcc = nx.rich_club_coefficient(graph).values()
#    avg_rcc = np.mean(rcc)
#    vect.append(avg_rcc)

    # Transitivity
    tr = nx.transitivity(graph)
    vect.append(tr)

    # average clustering
    avg_clst = nx.average_clustering(graph)
    vect.append(avg_clst)

    glb_ef = efficiency_bin(adj_bin)
    vect.append(glb_ef)

    return vect
예제 #48
0
#Write out initial graph data in JSON file
jsonData = json_graph.node_link_data(M)
with open('evo_0.json', 'w') as outfile:
    json.dump(jsonData, outfile, indent=4)

#Eigenvector centrality criteria
Meigen = nx.eigenvector_centrality(M)
normeigen = [float(i) / max(Meigen.values()) for i in Meigen.values()]

#Closeness centrality
Mclose = nx.closeness_centrality(M)
normclose = Mclose.values()

#Betweeness centrality
Mbetween = nx.betweenness_centrality(M)
normbetween = Mbetween.values()

N = len(M.nodes())
labels = [i[1]['name'] for i in M.nodes(data=True)]

# ###################### Evolution ####################

import operator

# Common Neighbors
CN = [(e[0], e[1], len(list(nx.common_neighbors(M, e[0], e[1]))))
      for e in nx.non_edges(M)]
CN.sort(key=operator.itemgetter(2), reverse=True)

# Jaccard coef
예제 #49
0
nodes = G1.degree().values()
plt.hist(nodes,bins=25)
plt.xlim(0,200)
plt.show()

Counter(nx.degree_centrality(G)).most_common(5)

len(list(nx.connected_components(G1)))


size = [len(c) for c in nx.connected_components(G1)]

plt.hist(size[1:])

G2 = nx.read_edgelist('data/small_actor_edges.tsv', delimiter='\t')

len(list(nx.connected_components(G2)))

Counter(nx.degree_centrality(G2)).most_common(5)

Counter(nx.betweenness_centrality(G2)).most_common(5)


karateG = nx.karate_club_graph()

# betweenness= nx.edge_betweenness_centrality(karateG)
#
# u,v = sorted(betweenness.items(), key=lambda x: x[1])[-1][0]
#
# karateG.remove_edge(u,v)
예제 #50
0
def set_up_hash_distr(net_p2p, centrality_measure, hash_distribution, number_selfish_nodes, number_honest_nodes, alpha):
    # make sure that when there are no selfish nodes that alpha is never unequal 0. (in case you want to simulate only honest nodes)
    assert not (number_selfish_nodes == 0 and alpha !=
                0), "Alpha unequal 0 with no selfish nodes"

    if hash_distribution == "UNIFORM":
        hashing_power_selfish = np.random.random(number_selfish_nodes)
        hashing_power_honest = np.random.random(number_honest_nodes)

    elif hash_distribution == "POWERLAW":
        power_distrib = pl.Power_Law(parameters=[pl_alpha], discrete=False)
        hashing_power_selfish = power_distrib.generate_random(
            number_selfish_nodes)
        hashing_power_honest = power_distrib.generate_random(
            number_honest_nodes)

    elif hash_distribution == "EXPONENTIAL":
        exp_distrib = pl.Exponential(parameters=[exp_lambda])
        hashing_power_selfish = exp_distrib.generate_random(
            number_selfish_nodes)
        hashing_power_honest = exp_distrib.generate_random(
            number_honest_nodes)

    # normalize vector so that sum of selfish hashing power equals alpha & honest hashing power equals 1-alpha.
    if number_selfish_nodes != 0:
        hashing_power_selfish /= sum(hashing_power_selfish)
        hashing_power_selfish *= alpha
    hashing_power_honest /= sum(hashing_power_honest) / (1 - alpha)

    # combine selfish and honest hashing power vectors together
    hashing_power_unsorted = np.append(
        hashing_power_selfish, hashing_power_honest)

    if centrality_measure == "RANDOM":
        # create an is_selfish vector that corresponds to the order of the hashing_power vector
        is_selfish = np.append(np.ones(number_selfish_nodes),
                               np.zeros(number_honest_nodes))

        # finally, randomize is_selfish and hashing_power arrays in unison
        randomize = np.arange(len(hashing_power_unsorted))
        np.random.shuffle(randomize)
        hashing_power = hashing_power_unsorted[randomize]
        is_selfish = is_selfish[randomize]

    elif centrality_measure == "BETWEENNESS":
        # compute betweenness centrality and sort it
        btwn = nx.betweenness_centrality(net_p2p)
        btwn_sorted = {k: v for k, v in sorted(
            btwn.items(), key=lambda item: item[1], reverse=True)}
        # return node indeces sorted for betweenness centrality
        btwn_sorted_indices = list(btwn_sorted.keys())

        selfish_indices = list(btwn_sorted.keys())[:number_selfish_nodes]
        honest_indices = list(btwn_sorted.keys())[
            number_selfish_nodes:len(btwn)]

        # set selifsh nodes according to betweenness centrality
        is_selfish = np.zeros(number_honest_nodes+number_selfish_nodes)
        for i in selfish_indices:
            is_selfish[i] = 1

        # sort hashing power vector so that selfish nodes are assigned correct hashing power
        hashing_power = hashing_power_unsorted.copy()
        for (index, value) in enumerate(btwn_sorted):
            hashing_power[value] = hashing_power_unsorted[index]

    return hashing_power, is_selfish
예제 #51
0
def function():
    mapbox_access_token = 'pk.eyJ1IjoiY2xlaXR1cyIsImEiOiJjamgwZ2c1a3Yxc3dtMnFtb2ptdDR5ZWs0In0.sjZdn45v32AojmWGWIN9Tg'
    pt.set_credentials_file(username='******', api_key='9LICBZ681YiPTiSZCuFX')

    # ########################### Reading Initial Data ###################################
    with open('fb_nodes.json') as f:
        nodes = json.load(f)

    with open('fb_edges.json') as f:
        links = json.load(f)

    for i in links:
        i['value'] = 'init'

    # ########################### Reading Initial Data ###################################

    #nodes = data['nodes']
    #links = data['edges']

    M = nx.Graph()

    M = nx.Graph(
        [(i['source'], i['target'], {'value': i['value']}) for i in links])
    for i in range(len(M.nodes)):
        node = nodes[i]['id']
        M.add_node(node, group=nodes[i]['group'])
        M.add_node(node, name=nodes[i]['name'])
        M.add_node(node, istrain=nodes[i]['istrain'])
        M.add_node(node, lat=nodes[i]['lat'])
        M.add_node(node, lon=nodes[i]['lon'])
        M.add_node(node, id=nodes[i]['id'])


    # ###################### Evolution ####################


    # Common Neighbors
    CN = [(e[0], e[1], len(list(nx.common_neighbors(M, e[0], e[1]))))
          for e in nx.non_edges(M)]
    CN.sort(key=operator.itemgetter(2), reverse=True)

    # Jaccard coef
    jaccard = list(nx.jaccard_coefficient(M))
    jaccard.sort(key=operator.itemgetter(2), reverse=True)

    # Resource Allocation index
    RA = list(nx.resource_allocation_index(M))
    RA.sort(key=operator.itemgetter(2), reverse=True)

    # Adamic-Adar index
    AA = list(nx.adamic_adar_index(M))
    AA.sort(key=operator.itemgetter(2), reverse=True)

    # Preferential Attachement
    PA = list(nx.preferential_attachment(M))
    PA.sort(key=operator.itemgetter(2), reverse=True)

    # ###################### Prediction on Future Edge Linkage ####################

    FM = M
    for i in PA[0:int(0.1*len(M.edges()))]:
        FM.add_edge(i[0], i[1], value='new')

    for i in CN[0:int(0.1*len(M.edges()))]:
        FM.add_edge(i[0], i[1], value='new')

    #Layout
    pos=nx.fruchterman_reingold_layout(FM, dim=3)
    lay=list()
    for i in pos.values():
        lay.append(list(i))
    N = len(FM.nodes())
    
    ulti = {}
    for i in pos.keys():
        ulti[i]=list(pos[i])

    #Eigenvector centrality criteria (normalised)
    Geigen=nx.eigenvector_centrality(FM)
    for i in Geigen:
        ulti[i].append(float(Geigen[i])/max(Geigen.values()))

    #Closeness centrality
    Gclose=nx.closeness_centrality(FM)
    for i in Gclose:
        ulti[i].append(Gclose[i])

    #Betweeness centrality
    Gbetween=nx.betweenness_centrality(FM)
    for i in Gbetween:
        ulti[i].append(Gbetween[i])

    # ###################### Plot ####################

    # Nodes and Edges coordinates
    Xv=[lay[k][0] for k in range(N)]# x-coordinates of nodes
    Yv=[lay[k][1] for k in range(N)]# y-coordinates
    Zv=[lay[k][2] for k in range(N)]# z-coordinates
    Xed = []
    Yed = []
    Zed = []
    Xned = []
    Yned = []
    Zned = []
    for edge in M.edges():
        Xed+=[pos[edge[0]][0],pos[edge[1]][0], None]
        Yed+=[pos[edge[0]][1],pos[edge[1]][1], None]
        Zed+=[pos[edge[0]][2],pos[edge[1]][2], None]

    for edge in [(i[0], i[1]) for i in list(FM.edges(data=True)) if i[2]['value'] == 'new']:
        Xned+=[pos[edge[0]][0],pos[edge[1]][0], None]
        Yned+=[pos[edge[0]][1],pos[edge[1]][1], None]
        Zned+=[pos[edge[0]][2],pos[edge[1]][2], None]


    trace1=Scatter3d(x=Xed,
                   y=Yed,
                   z=Zed,
                   mode='lines',
                   line=Line(color='rgb(125,125,125)', width=1),
                   hoverinfo='none'
                   )

    trace2=Scatter3d(x=Xv,
                   y=Yv,
                   z=Zv,
                   mode='markers',
                   name='actors',
                   marker=Marker(symbol='dot',
                                 color=[i[-3] for i in ulti.values()], # Eigenvector centrality
							     #color=[i[-2] for i in ulti.values()], # Closeness centrality
							     #color=[i[-1] for i in ulti.values()], # Betweeness centrality
                                 #color=[data['nodes'][k]['group'] for k in range(len(data['nodes']))], #

                                 size=6,colorbar=ColorBar(
                    title=''
                ),
                                 colorscale='Viridis',
                                 line=Line(color='rgb(158,18,130)', width=0.5)
                                 ),
                   text=ulti.keys(),  # node Labels
                   hoverinfo='text'
                   )

    data=Data([trace1, trace2])
    py.plot(data, filename = 'fb-3d')
    return
예제 #52
0
import networkx as nx
import numpy as np
from bokeh.palettes import YlOrRd

df = pd.read_csv(
    'C:/Users/Meenu/PycharmProjects/CS590/CS590-Yelp/usernetwork1.csv')
df['distance'] = 1 / df['strength']
df_user = pd.read_csv(
    'C:/Users/Meenu/PycharmProjects/CS590/CS590-Yelp/userdetails1.csv')
del df_user['Unnamed: 0']

G = nx.from_pandas_edgelist(df, 'user1', 'user2', ['strength', 'distance'])
print(nx.number_connected_components(G))
nx.set_node_attributes(G, df_user.set_index('user_id').to_dict('index'))
nx.set_node_attributes(G, dict(G.degree(weight='strength')), 'WDegree')
nx.set_node_attributes(G, nx.betweenness_centrality(G, weight='distance'),
                       'bwcentral')
nx.set_node_attributes(G, nx.communicability_betweenness_centrality(G),
                       'ccentral')

# col = ['#FFFFFF', '#93CCB9', '#4D9980', '#24745A', '#074A34', '#002217']
col = YlOrRd[8]

for u in G.nodes():
    if G.node[u]['friend'] < 730:
        G.node[u]['friend'] = col[7]
    elif G.node[u]['friend'] < (730 * 2):
        G.node[u]['friend'] = col[6]
    elif G.node[u]['friend'] < (730 * 3):
        G.node[u]['friend'] = col[5]
    elif G.node[u]['friend'] < (730 * 4):
예제 #53
0
def Between_Centrality(G):
    Bet_Centrality = nx.betweenness_centrality(G)
    #print "Bet_Centrality:", sorted(Bet_Centrality.iteritems(), key=lambda d:d[1], reverse = True)
    return Bet_Centrality
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

data = pd.read_csv('erl_14_8_084011_sd_3.csv')
G = nx.from_pandas_edgelist(df=data,
                            source='ori',
                            target='des',
                            edge_attr='total',
                            create_using=nx.DiGraph())
connectivity = list(G.degree())
connectivity_values = [n[1] for n in connectivity]
centrality = nx.betweenness_centrality(G).values()

plt.figure(figsize=(12, 8))
plt.plot(centrality, connectivity_values, 'ro')
plt.xlabel('Node centrality', fontsize='large')
plt.ylabel('Node connectivity', fontsize='large')
plt.savefig("node_connectivity.png", dpi=300)
plt.show()

#Get 95th percentile of largest flows
threshold = np.percentile(data['total'], 95)
data = data.loc[(data['total'] > threshold)]

pos_data = pd.read_csv('counties.csv', delimiter=',')

G = nx.from_pandas_edgelist(df=data,
                            source='ori',
 def centrality_fun(graph, feature_dim):
     nodes = list(graph.G.nodes)
     centrality = nx.betweenness_centrality(graph.G)
     graph.betweenness_centrality = torch.tensor(
         [centrality[x] for x in nodes]).unsqueeze(1)
     return graph
예제 #56
0
def a_avg_between(G):
    return np.average(nx.betweenness_centrality(G, normalized=True).values())
예제 #57
0
for i in nodes:
    if i[1] == 1:  # hateful node
        for j in nx_graph.neighbors(i[0]):
            hateful_neighbors[j] = True
    if i[1] == 0:
        for j in nx_graph.neighbors(i[0]):
            normal_neighbors[j] = True

nx.set_node_attributes(nx_graph, name="hateful_neighbors", values=False)
nx.set_node_attributes(nx_graph,
                       name="hateful_neighbors",
                       values=hateful_neighbors)
nx.set_node_attributes(nx_graph, name="normal_neighbors", values=False)
nx.set_node_attributes(nx_graph,
                       name="normal_neighbors",
                       values=normal_neighbors)

# Set node network-based attributes, such as betweenness and eigenvector

betweenness = nx.betweenness_centrality(nx_graph, k=16258, normalized=False)
eigenvector = nx.eigenvector_centrality(nx_graph)
in_degree = nx.in_degree_centrality(nx_graph)
out_degree = nx.out_degree_centrality(nx_graph)

nx.set_node_attributes(nx_graph, name="betweenness", values=betweenness)
nx.set_node_attributes(nx_graph, name="eigenvector", values=eigenvector)
nx.set_node_attributes(nx_graph, name="in_degree", values=in_degree)
nx.set_node_attributes(nx_graph, name="out_degree", values=out_degree)

nx.write_graphml(nx_graph, "../data/features/users_hate.graphml")
예제 #58
0
def extended_stats(G,
                   connectivity=False,
                   anc=False,
                   ecc=False,
                   bc=False,
                   cc=False):
    """
    Calculate extended topological stats and metrics for a graph.

    Many of these algorithms have an inherently high time complexity. Global
    topological analysis of large complex networks is extremely time consuming
    and may exhaust computer memory. Consider using function arguments to not
    run metrics that require computation of a full matrix of paths if they
    will not be needed.

    Parameters
    ----------
    G : networkx multidigraph
    connectivity : bool
        if True, calculate node and edge connectivity
    anc : bool
        if True, calculate average node connectivity
    ecc : bool
        if True, calculate shortest paths, eccentricity, and topological metrics
        that use eccentricity
    bc : bool
        if True, calculate node betweenness centrality
    cc : bool
        if True, calculate node closeness centrality

    Returns
    -------
    stats : dict
        dictionary of network measures containing the following elements (some
        only calculated/returned optionally, based on passed parameters):

          - avg_neighbor_degree
          - avg_neighbor_degree_avg
          - avg_weighted_neighbor_degree
          - avg_weighted_neighbor_degree_avg
          - degree_centrality
          - degree_centrality_avg
          - clustering_coefficient
          - clustering_coefficient_avg
          - clustering_coefficient_weighted
          - clustering_coefficient_weighted_avg
          - pagerank
          - pagerank_max_node
          - pagerank_max
          - pagerank_min_node
          - pagerank_min
          - node_connectivity
          - node_connectivity_avg
          - edge_connectivity
          - eccentricity
          - diameter
          - radius
          - center
          - periphery
          - closeness_centrality
          - closeness_centrality_avg
          - betweenness_centrality
          - betweenness_centrality_avg

    """

    stats = {}
    full_start_time = time.time()

    # create a DiGraph from the MultiDiGraph, for those metrics that require it
    G_dir = nx.DiGraph(G)

    # create an undirected Graph from the MultiDiGraph, for those metrics that
    # require it
    G_undir = nx.Graph(G)

    # get the largest strongly connected component, for those metrics that
    # require strongly connected graphs
    G_strong = get_largest_component(G, strongly=True)

    # average degree of the neighborhood of each node, and average for the graph
    avg_neighbor_degree = nx.average_neighbor_degree(G)
    stats['avg_neighbor_degree'] = avg_neighbor_degree
    stats['avg_neighbor_degree_avg'] = sum(
        avg_neighbor_degree.values()) / len(avg_neighbor_degree)

    # average weighted degree of the neighborhood of each node, and average for
    # the graph
    avg_weighted_neighbor_degree = nx.average_neighbor_degree(G,
                                                              weight='length')
    stats['avg_weighted_neighbor_degree'] = avg_weighted_neighbor_degree
    stats['avg_weighted_neighbor_degree_avg'] = sum(
        avg_weighted_neighbor_degree.values()) / len(
            avg_weighted_neighbor_degree)

    # degree centrality for a node is the fraction of nodes it is connected to
    degree_centrality = nx.degree_centrality(G)
    stats['degree_centrality'] = degree_centrality
    stats['degree_centrality_avg'] = sum(
        degree_centrality.values()) / len(degree_centrality)

    # calculate clustering coefficient for the nodes
    stats['clustering_coefficient'] = nx.clustering(G_undir)

    # average clustering coefficient for the graph
    stats['clustering_coefficient_avg'] = nx.average_clustering(G_undir)

    # calculate weighted clustering coefficient for the nodes
    stats['clustering_coefficient_weighted'] = nx.clustering(G_undir,
                                                             weight='length')

    # average clustering coefficient (weighted) for the graph
    stats['clustering_coefficient_weighted_avg'] = nx.average_clustering(
        G_undir, weight='length')

    # pagerank: a ranking of the nodes in the graph based on the structure of
    # the incoming links
    pagerank = nx.pagerank(G_dir, weight='length')
    stats['pagerank'] = pagerank

    # node with the highest page rank, and its value
    pagerank_max_node = max(pagerank, key=lambda x: pagerank[x])
    stats['pagerank_max_node'] = pagerank_max_node
    stats['pagerank_max'] = pagerank[pagerank_max_node]

    # node with the lowest page rank, and its value
    pagerank_min_node = min(pagerank, key=lambda x: pagerank[x])
    stats['pagerank_min_node'] = pagerank_min_node
    stats['pagerank_min'] = pagerank[pagerank_min_node]

    # if True, calculate node and edge connectivity
    if connectivity:
        start_time = time.time()

        # node connectivity is the minimum number of nodes that must be removed
        # to disconnect G or render it trivial
        stats['node_connectivity'] = nx.node_connectivity(G_strong)

        # edge connectivity is equal to the minimum number of edges that must be
        # removed to disconnect G or render it trivial
        stats['edge_connectivity'] = nx.edge_connectivity(G_strong)
        log('Calculated node and edge connectivity in {:,.2f} seconds'.format(
            time.time() - start_time))

    # if True, calculate average node connectivity
    if anc:
        # mean number of internally node-disjoint paths between each pair of
        # nodes in G, i.e., the expected number of nodes that must be removed to
        # disconnect a randomly selected pair of non-adjacent nodes
        start_time = time.time()
        stats['node_connectivity_avg'] = nx.average_node_connectivity(G)
        log('Calculated average node connectivity in {:,.2f} seconds'.format(
            time.time() - start_time))

    # if True, calculate shortest paths, eccentricity, and topological metrics
    # that use eccentricity
    if ecc:
        # precompute shortest paths between all nodes for eccentricity-based
        # stats
        start_time = time.time()
        sp = {
            source: dict(
                nx.single_source_dijkstra_path_length(G_strong,
                                                      source,
                                                      weight='length'))
            for source in G_strong.nodes()
        }

        log('Calculated shortest path lengths in {:,.2f} seconds'.format(
            time.time() - start_time))

        # eccentricity of a node v is the maximum distance from v to all other
        # nodes in G
        eccentricity = nx.eccentricity(G_strong, sp=sp)
        stats['eccentricity'] = eccentricity

        # diameter is the maximum eccentricity
        diameter = nx.diameter(G_strong, e=eccentricity)
        stats['diameter'] = diameter

        # radius is the minimum eccentricity
        radius = nx.radius(G_strong, e=eccentricity)
        stats['radius'] = radius

        # center is the set of nodes with eccentricity equal to radius
        center = nx.center(G_strong, e=eccentricity)
        stats['center'] = center

        # periphery is the set of nodes with eccentricity equal to the diameter
        periphery = nx.periphery(G_strong, e=eccentricity)
        stats['periphery'] = periphery

    # if True, calculate node closeness centrality
    if cc:
        # closeness centrality of a node is the reciprocal of the sum of the
        # shortest path distances from u to all other nodes
        start_time = time.time()
        closeness_centrality = nx.closeness_centrality(G, distance='length')
        stats['closeness_centrality'] = closeness_centrality
        stats['closeness_centrality_avg'] = sum(
            closeness_centrality.values()) / len(closeness_centrality)
        log('Calculated closeness centrality in {:,.2f} seconds'.format(
            time.time() - start_time))

    # if True, calculate node betweenness centrality
    if bc:
        # betweenness centrality of a node is the sum of the fraction of
        # all-pairs shortest paths that pass through node
        # networkx 2.4+ implementation cannot run on Multi(Di)Graphs, so use DiGraph
        start_time = time.time()
        betweenness_centrality = nx.betweenness_centrality(G_dir,
                                                           weight='length')
        stats['betweenness_centrality'] = betweenness_centrality
        stats['betweenness_centrality_avg'] = sum(
            betweenness_centrality.values()) / len(betweenness_centrality)
        log('Calculated betweenness centrality in {:,.2f} seconds'.format(
            time.time() - start_time))

    log('Calculated extended stats in {:,.2f} seconds'.format(time.time() -
                                                              full_start_time))
    return stats
def construct_ccig(sentences, concepts, title=None, use_cd=True, betweenness_threshold_coef=1.0, max_c_size=10,
                   min_c_size=3, IDF=None):
    """
     Given a segmented text and a list of concepts,
     construct concept community interaction graph.
     :param sentences: a list of sentences.
     :param concepts: a list of concepts.
     :return: a concept community interaction graph.
     """
    g = nx.Graph()

    concepts = list(set(concepts))
    concepts = remove_values_from_list(concepts, EMPTY_VERTEX_NAME)

    if len(sentences) == 0 or len(concepts) == 0:
        print("No concept in concepts list.")
        return None
    if len(concepts) > 70:
        print("Too many concepts.")
        return None

    # get concept communities
    if use_cd:
        concept_communities = get_concept_communities(sentences, concepts, betweenness_threshold_coef, max_c_size,
                                                      min_c_size)

    else:
        concept_communities = [[c] for c in concepts]

    if use_cd:
        cname_sentidxs = assign_sentences_to_concept_communities(
            sentences, concept_communities, IDF)
    else:
        cname_sentidxs = assign_sentences_to_concepts(sentences, concepts)

    # initialize vertex properties
    concept_vertexidxs_map = {}

    for c in concepts:
        concept_vertexidxs_map[c] = []

    g.add_node(0, name=EMPTY_VERTEX_NAME, concepts=[], sentidxs=cname_sentidxs[EMPTY_VERTEX_NAME])
    # g.add_node(0)
    # g.node[0]['name'] = EMPTY_VERTEX_NAME
    # g.node[0]['concepts'] = []
    # g.node[0]['sentidxs'] = cname_sentidxs[EMPTY_VERTEX_NAME]

    # print(g.node[0])
    i = 1

    for community in concept_communities:
        cname = community2name(community)

        if len(cname_sentidxs[cname]) == 0:
            continue

        g.add_node(i, name=cname, concepts=community, sentidxs=cname_sentidxs[cname])

        for concept in community:
            concept_vertexidxs_map[concept].append(i)
        i = i + 1

    # edges by connective entences
    # dic
    eprop_name = {}
    eprop_concepts = {}
    eprop_sentidxs = {}
    eprop_weight_numsent = {}
    eprop_weight_tfidf = {}

    for sent_idx in range(len(sentences)):
        sent = sentences[sent_idx]
        words = str(sent).split()
        intersect = set(words).intersection(set(concepts))

        if len(intersect) == 0:
            continue

        related_vertexidxs = []

        for c in intersect:
            related_vertexidxs.extend(concept_vertexidxs_map[c])
        related_vertexidxs = list(set(related_vertexidxs))

        # print("related_vertex_idx:")
        # print(related_vertexidxs)

        num_related_v = len(related_vertexidxs)

        if num_related_v < 2:
            continue

        for j in range(num_related_v):
            v1_idx = related_vertexidxs[j]
            for k in range(j, num_related_v):
                if j == k:
                    continue
                v2_idx = related_vertexidxs[k]

                source_idx = min(v1_idx, v2_idx)
                target_idx = max(v1_idx, v2_idx)

                e = (source_idx, target_idx)
                if not g.has_edge(source_idx, target_idx):
                    # g.add_edge(source_idx, target_idx)

                    eprop_sentidxs[e] = [sent_idx]
                    eprop_concepts[e] = list(intersect)

                    g.add_edge(source_idx, target_idx)

                    # g.add_edges_from([(source_idx, target_idx, dict(sentidxs=eprop_sentidxs[e])),
                    #                   (source_idx, target_idx, dict(concepts=eprop_concepts[e]))])

                else:
                    old_idxs = list(eprop_sentidxs[e])
                    old_idxs.append(sent_idx)
                    eprop_sentidxs[e] = old_idxs

                    old_concepts = list(eprop_concepts[e])
                    old_concepts.extend(intersect)
                    eprop_concepts[e] = list(set(old_concepts))

                g[source_idx][target_idx]['sentidxs'] = eprop_sentidxs[e]
                g[source_idx][target_idx]['concepts'] = eprop_concepts[e]

    # assign vertex names and weights
    for e in g.edges():
        eprop_name[e] = " ".join(eprop_concepts[e])
        eprop_weight_numsent[e] = float(len(eprop_sentidxs[e]))
        eprop_weight_tfidf[e] = 0.0

        g[e[0]][e[1]]['weight_numsent'] = eprop_weight_numsent[e]
        g[e[0]][e[1]]['weight_tfidf'] = eprop_weight_tfidf[e]

    # edges by node text similarity
    WEIGHT_THRESHOLD = 0.001  # NOTICE: smaller threshold leads to more edges

    numv = g.number_of_nodes()

    for i in range(numv):
        for j in range(i, numv):
            if j == i:
                continue
            v1 = g.node[i]
            v2 = g.node[j]
            idxs1 = list(set(v1['sentidxs']))
            idxs2 = list(set(v2['sentidxs']))

            text1 = [sentences[s] for s in idxs1]
            text1 = " ".join(text1)
            text2 = [sentences[s] for s in idxs2]
            text2 = " ".join(text2)

            w = tfidf_cos_sim(text1, text2, IDF)

            if w >= WEIGHT_THRESHOLD:
                e = (i, j)
                if not g.has_edge(i, j):
                    eprop_sentidxs[e] = []
                    eprop_concepts[e] = []
                    eprop_weight_numsent[e] = 0.0
                    eprop_name[e] = ""
                    g.add_edges_from([
                        (i, j, dict(sentidxs=eprop_sentidxs[e])),
                        (i, j, dict(concepts=eprop_concepts[e])),
                        (i, j, dict(weight_numsent=eprop_weight_numsent[e])),
                        (i, j, dict(weight_name=eprop_name[e]))
                    ])
                eprop_weight_tfidf[e] = w
                g[i][j]['weight_tfidf'] = eprop_weight_tfidf[e]
    if title is not None:
        g.add_nodes_from('TITLE', name=TITLE_VERTEX_NAME, sentidxs=[], concepts=[])

    #g.add_nodes_from('T', name=TITLE_VERTEX_NAME, sentidxs=[], concepts=[])
    # calculate vertex scores
    pr = nx.pagerank(g, weight='weight_tfidf')
    bt = nx.betweenness_centrality(g, weight='weight_tfidf')
    #print(bt)
    try:
        katz = nx.katz_centrality(g, weight='weight_tfidf')
    except:
        katz = [0.0 for i in range(numv)]
    #numv = len(pr)
    for i in g.nodes():
        #print(i)
        g.node[i]['pagerank'] = pr[i]
        g.node[i]['betweenness'] = bt[i]
        g.node[i]['katz'] = katz[i]

    ebt = nx.edge_betweenness(g, weight='weight_tfidf')
    #print(ebt)
    #print(g.nodes())
    for i in range(len(g.nodes())):
        for j in range(i, len(g.nodes())):
            if j == i:
                continue
            if g.has_edge(i, j):
                g[i][j]['betweenness'] = ebt[(i, j)]

    return g
예제 #60
0
        res = max(res, current)
    print(res)


if __name__ == '__main__':
    # def main():
    # global mymap, masti,n,m,G
    for i in range(1, n + 1):
        mymap[i] = 0
    getGraph()
    timepass = defaultdict(lambda: 0)
    q = deque()
    #pr=nx.pagerank(G,0.4)
    if (G.number_of_edges() < 4 * G.number_of_nodes()
            and G.number_of_nodes() < 800):
        pr = nx.betweenness_centrality(G)
    elif (G.number_of_nodes() < 2000
          and 4 * G.number_of_nodes() > G.number_of_edges()):
        pr = nx.betweenness_centrality(G, k=max(1, G.number_of_nodes() // 8))
    elif (G.number_of_nodes() < 5000
          and 10 * G.number_of_nodes() > G.number_of_edges()):
        pr = nx.betweenness_centrality(G, k=max(1, G.number_of_nodes() // 32))
    elif (G.number_of_nodes() < 20000
          and 10 * G.number_of_nodes() > G.number_of_edges()):
        pr = nx.betweenness_centrality(G,
                                       k=max(1,
                                             G.number_of_nodes() // 2000))
    elif (G.number_of_nodes() < 50000
          and 10 * G.number_of_nodes() > G.number_of_edges()):
        pr = nx.betweenness_centrality(G,
                                       k=max(1,