def degree_fracture(infile, outfile, fraction, recalculate = False): """ Removes given fraction of nodes from infile network in reverse order of degree centrality (with or without recalculation of centrality values after each node removal) and saves the network in outfile. """ g = networkx.read_gml(infile) m = networkx.degree_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) n = len(g.nodes()) for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.degree_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) if i * 1. / n >= fraction: break components = networkx.connected_components(g) component_id = 1 for component in components: for node in component: g.node[node]["component"] = component_id component_id += 1 networkx.write_gml(g, outfile)
def degree(infile, recalculate = False): """ Performs robustness analysis based on degree centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ g = networkx.read_gml(infile) m = networkx.degree_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) x = [] y = [] largest_component = max(networkx.connected_components(g), key = len) n = len(g.nodes()) x.append(0) y.append(len(largest_component) * 1. / n) R = 0.0 for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.degree_centrality(g) l = sorted(m.items(), key = operator.itemgetter(1), reverse = True) largest_component = max(networkx.connected_components(g), key = len) x.append(i * 1. / n) R += len(largest_component) * 1. / n y.append(len(largest_component) * 1. / n) return x, y, 0.5 - R / n
def degree_component(seed_num, graph=None, graph_json_filename=None, graph_json_str=None): if graph_json_filename is None and graph_json_str is None and graph is None: return [] G = None if graph is not None: G = graph elif graph_json_str is None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) components = list(nx.connected_components(G)) components = filter(lambda x: len(x) > 0.1 * len(G), components) total_size = sum(map(lambda x: len(x), components)) total_nodes = 0 rtn = [] for comp in components[1:]: num_nodes = int(float(len(comp)) / total_size * seed_num) component = G.subgraph(list(comp)) clse_cent = nx.degree_centrality(component) collector = collections.Counter(clse_cent) clse_cent = collector.most_common(num_nodes) rtn += map(lambda (x, y): x, clse_cent) total_nodes += num_nodes num_nodes = seed_num - total_nodes component = G.subgraph(list(components[0])) clse_cent = nx.degree_centrality(component) collector = collections.Counter(clse_cent) clse_cent = collector.most_common(num_nodes) rtn += map(lambda (x, y): x, clse_cent) return rtn
def degree_removal(g, recalculate=False): """ Performs robustness analysis based on degree centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = nx.degree_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] dimension = fd.fractal_dimension(g, iterations=100, debug=False) n = len(g.nodes()) x.append(0) y.append(dimension) for i in range(1, n-1): g.remove_node(l.pop(0)[0]) if recalculate: m = nx.degree_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) dimension = fd.fractal_dimension(g, iterations=100, debug=False) x.append(i * 1. / n) y.append(dimension) return x, y
def sna_calculations(g, play_file): """ :param g: a NetworkX graph object :type g: object :param play_file: the location of a play in .txt format :type play_file: string :return: returns a dictionary containing various network related figures :rtype: dict :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv """ file_name = os.path.splitext(os.path.basename(play_file))[0] sna_calculations_list = dict() sna_calculations_list['playType'] = file_name[0] sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avDegreeCentralityStd'] = numpy.std( numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avInDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float)) sna_calculations_list['avOutDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float)) try: sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g) except: sna_calculations_list['avShortestPathLength'] = 'not connected' sna_calculations_list['density'] = nx.density(g) sna_calculations_list['avEigenvectorCentrality'] = numpy.mean( numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float)) sna_calculations_list['avBetweennessCentrality'] = numpy.mean( numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float)) sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g) sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g) sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g) # sna_calculations.txt file sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL, delimiter=';') for key, value in sna_calculations_list.items(): sna_calc_file.writerow([key, value]) # all_characters.csv file if not os.path.isfile('results/allCharacters.csv'): with open('results/allCharacters.csv', 'w') as f: f.write( 'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n') all_characters = open('results/allCharacters.csv', 'a') character_speech_amount = speech_amount(play_file) for character in sna_calculations_list['DegreeCentrality']: all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str( sna_calculations_list['DegreeCentrality'][character]) + ';' + str( sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str( sna_calculations_list['BetweennessCentrality'][character]) + ';' + str( character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n') all_characters.close() return sna_calculations
def __init__(self, time, voteomat): self.foldername = voteomat.network_func_name + voteomat.distribution_func_name self.foldertime = time self.path = "Statistics//"+self.foldername+"//" self.path += g_candidates_affecting_nodes + "=" + str(voteomat.candidates_affecting) + "_" self.path += g_candidates_affected_by_median + "=" + str(voteomat.candidates_affected) + "_" self.path += g_neighbours_affecting_each_other + "=" + str(voteomat.affecting_neighbours) + "_" self.path += g_counterforce_affecting_candidates + "=" + str(voteomat.counter_force_affecting) + "_" self.path += "counterforce_left="+str(voteomat.counter_force_left)+"_"+"counterforce_right="+str(voteomat.counter_force_right)+ "_" + time self.make_sure_path_exists(self.path) self.file = open(self.path + "//statistic.csv", 'w') self.statistic = {} self.statistic["networkfunc"] = voteomat.network_func_name self.statistic["distributionfunc"] = voteomat.distribution_func_name self.statistic["acceptance"] = voteomat.acceptance median, avg, std = voteomat.get_statistic() self.statistic["median"] = [] self.statistic["median"].append(median) self.statistic["avg"] = [] self.statistic["avg"].append(avg) self.statistic["std"] = [] self.statistic["std"].append(std) self.statistic["node_with_highest_degree_centrality"] = [] self.max_degree_node = max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_degree_node][1]["orientation"]) self.statistic["node_with_minimum_degree_centrality"] = [] self.min_degree_node = min(nx.degree_centrality(voteomat.get_network()).items(), key = lambda x: x[1])[0] self.statistic["node_with_minimum_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.min_degree_node][1]["orientation"]) self.statistic["node_with_highest_closeness_centrality"] = [] self.max_closeness_node = max( nx.closeness_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_closeness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_closeness_node][1]["orientation"]) self.statistic["node_with_highest_betweenness_centrality"] = [] self.max_betweenness_node = max(nx.betweenness_centrality(voteomat.get_network()).items() ,key = lambda x: x[1])[0] self.statistic["node_with_highest_betweenness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_betweenness_node][1]["orientation"]) try: self.statistic["node_with_highest_eigenvector_centrality"] = [] self.max_eigenvector_node = max( nx.eigenvector_centrality(voteomat.get_network(), max_iter = 1000).items(),key = lambda x: x[1])[0] self.statistic["node_with_highest_eigenvector_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_eigenvector_node][1]["orientation"]) except nx.NetworkXError: print "Eigenvector centrality not possible." freeman = self.freeman_centrality([x[1] for x in nx.degree_centrality(voteomat.get_network()).items()], max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[1]) self.statistic["freeman_centrality"] = round(freeman,2) self.statistic["affecting_neighbours"] = voteomat.affecting_neighbours self.statistic["affecting_candidates"] = voteomat.candidates_affecting self.statistic["affected_canddiates"] = voteomat.candidates_affected self.statistic["affecting_counter_force"] = voteomat.counter_force_affecting self.statistic["affecting_counter_force_left"] = voteomat.counter_force_left self.statistic["affecting_counter_force_right"] = voteomat.counter_force_right self.statistic["candidates"] = [] for candidate in voteomat.candidates: self.statistic["candidates"].append(candidate.to_save()) self.statistic["network"] = voteomat.get_network().nodes(data=True);
def degree_apl(g, recalculate=False): """ Performs robustness analysis based on degree centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.degree_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] average_path_length = 0.0 number_of_components = 0 n = len(g.nodes()) for sg in networkx.connected_component_subgraphs(g): average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components initial_apl = average_path_length x.append(0) y.append(average_path_length * 1. / initial_apl) r = 0.0 for i in range(1, n - 2): g.remove_node(l.pop(0)[0]) if recalculate: m = networkx.degree_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) average_path_length = 0.0 number_of_components = 0 for sg in networkx.connected_component_subgraphs(g): if len(sg.nodes()) > 1: average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components x.append(i * 1. / initial_apl) r += average_path_length * 1. / initial_apl y.append(average_path_length * 1. / initial_apl) return x, y, r / initial_apl
def labels(G, threshhold = 95): '''return labels(dictionary) for nodes with high centrality for a given percentile''' labels = {} # create cutoff based on the given percentile cen_cutoff = np.percentile(list(nx.degree_centrality(G).values()), threshhold) # put nodes label in the dictionary if the centrality passes the threshold for key,value in nx.degree_centrality(G).items(): if value >= cen_cutoff: labels[key] = key return labels
def __init__(self) : self.g = nx.barabasi_albert_graph(random.randint(100,1000),random.randint(2,7)) self.degree_centrality = nx.degree_centrality(self.g) self.deg = nx.degree_centrality(self.g) self.sorted_deg = sorted(self.deg.items(), key=operator.itemgetter(1)) self.nodes = len(self.g.nodes()) self.edges = len(self.g.edges()) self.degree_rank() self.degree_dict = self.g.degree() self.avg_deg = sum(self.g.degree().values())/float(len(self.g.nodes())) #print self.rank #print self.degree_dict self.form_dataset()
def degree_centrality(self, withme=True, node=None, average=False): if node==None: if withme: my_dict = nx.degree_centrality(self.mynet) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: my_dict = nx.degree_centrality(self.no_ego_net) new = {} new2={} for i in my_dict: new[self.id_to_name(i)] = my_dict[i] new2[i] = my_dict[i] if average: print "The average is " + str(round(sum(new.values())/float(len(new.values())),4)) else: for i,j in new.items(): print i, round(j,4) return new2 else: if withme: my_dict = nx.degree_centrality(self.mynet) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: return my_dict [self.name_to_id(node)] except: print "Invalid node name" else: my_dict = nx.degree_centrality(self.no_ego_net) try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4)) except: try: print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[[self.name_to_id(node)]],4)) except: print "Invalid node name"
def set_capacities_degree_gravity(topology, capacities, capacity_unit='Mbps'): """ Set link capacities proportionally to the product of the degrees of the two end-points of the link Parameters ---------- topology : Topology The topology to which link capacities will be set capacities : list A list of all possible capacity values capacity_unit : str, optional The unit in which capacity value is expressed (e.g. Mbps, Gbps etc..) """ if topology.is_directed(): in_degree = nx.in_degree_centrality(topology) out_degree = nx.out_degree_centrality(topology) gravity = {(u, v): out_degree[u] * in_degree[v] for (u, v) in topology.edges()} else: degree = nx.degree_centrality(topology) gravity = {(u, v): degree[u] * degree[v] for (u, v) in topology.edges()} _set_capacities_proportionally(topology, capacities, gravity, capacity_unit=capacity_unit)
def high_degrees_fast(seed_num, graph=None, graph_json_filename=None, graph_json_str=None): """ Find the high-degree nodes of the given graph by sorting on the adjacency list lengths and slicing. Parameters: seed_num: Number of nodes to choose. graph_json_filename: Filename where the adjacency list lives as JSON. graph_json_str: Graph as an adjacency list string in JSON. Return: List of 'seed_num' highest degree nodes. """ if graph_json_filename is None and graph_json_str is None and graph is None: return [] G = None if graph is not None: G = graph elif graph_json_str is None: G = util.load_graph(graph_json_filename=graph_json_filename) else: G = util.load_graph(graph_json_str=graph_json_str) clse_cent = nx.get_node_attributes(G, "centrality") if len(clse_cent) == 0: clse_cent = nx.degree_centrality(G) nx.set_node_attributes(G, "centrality", clse_cent) print "hi high-degree" collector = collections.Counter(clse_cent) clse_cent = collector.most_common(seed_num) return map(lambda (x, y): x, clse_cent)
def centrality_month_airports(data): df = data.copy() df['DateOfDeparture'] = pd.to_datetime(df['DateOfDeparture']) df['month'] = df['DateOfDeparture'].dt.week.astype(str) df['year'] = df['DateOfDeparture'].dt.year.astype(str) df['year_month'] = df[['month','year']].apply(lambda x: '-'.join(x),axis=1) df['year_month_dep'] = df[['Departure','month','year']].apply(lambda x: '-'.join(x),axis=1) df['year_month_arr'] = df[['Arrival','month','year']].apply(lambda x: '-'.join(x),axis=1) year_month = pd.unique(df['year_month']) G = nx.Graph() centrality = {} for i, item in enumerate(year_month): sub_df = df[df['year_month'] == item][['Departure','Arrival']] list_dep_arr = zip(sub_df['Departure'], sub_df['Arrival']) G.add_edges_from(list_dep_arr) #G.number_of_nodes() #G.number_of_edges() centrality_month = nx.degree_centrality(G) centrality_month = pd.DataFrame(centrality_month.items()) centrality_month['year_month'] = [item] * centrality_month.shape[0] centrality_month['airport_year_month'] = centrality_month[centrality_month.columns[[0,2]]].apply(lambda x: '-'.join(x),axis=1) centrality_month =dict(zip(centrality_month['airport_year_month'], centrality_month[1])) z = centrality.copy() z.update(centrality_month) centrality = z df['centrality_month_dep'] = df['year_month_dep'].map(centrality) df['centrality_month_arr'] = df['year_month_arr'].map(centrality) return df
def degree_centrality(graph, records): """ Reports on the most central individuals in the graph """ dc = nx.degree_centrality(graph) nodes = sorted(dc.items(), key=operator.itemgetter(1), reverse=True)[:records] print("Degree Centrality - top {} individuals".format(records)) for n in nodes: print(" {:30}:\t{}".format(n[0], n[1]))
def centralities(self): ''' Get info on centralities of data Params: None Returns: dictionary of centrality metrics with keys(centralities supported): degree - degree centrality betweeness - betweeness centrality eigenvector - eigenvector centrality hub - hub scores - not implemented authority - authority scores - not implemented katz - katz centrality with params X Y pagerank - pagerank centrality with params X Y ''' output = {} output['degree'] = nx.degree_centrality(self.G) output['betweeness'] = nx.betweenness_centrality(self.G) try: output['eigenvector'] = nx.eigenvector_centrality(self.G) output['katz'] = nx.katz_centrality(self.G) except: output['eigenvector'] = 'empty or exception' output['katz'] = 'empty or exception' # output['hub'] = 'Not implemented' # output['authority'] = 'Not implemented' # output['pagerank'] = 'Not implemented' return output
def fast_approximate_solution_two(graph): """ Given a graph, construct a solution greedily using approximation methods. Performs bad. """ new_graph = nx.Graph() degrees = nx.degree_centrality(graph) largest = argmax(degrees) new_graph.add_node(largest) while new_graph.number_of_edges() < graph.number_of_nodes() - 1: degrees = {n: count_uncovered_degree(graph, new_graph, n) for n in nx.nodes(graph)} neighbor_list = [nx.neighbors(graph, n) for n in new_graph.nodes()] neighbors = set() for lst in neighbor_list: neighbors = neighbors.union(lst) if not neighbors: break next_largest = argmax_in(degrees, neighbors, exceptions = new_graph.nodes()) possible_edge_ends = [n for n in nx.neighbors(graph, next_largest) if graph.has_edge(n, next_largest) and n in new_graph.nodes()] new_graph.add_node(next_largest) edge_end = argmax_in(degrees, possible_edge_ends) new_graph.add_edge(edge_end, next_largest) return new_graph
def modularity(self): """ Compute the modularity. Returns: Numerical value of the modularity of the graph. """ g = self.gr A = nx.adj_matrix(g) degDict = nx.degree_centrality(g) adjDict = {} n = A.shape[0] B = A.sum(axis=1) for i in range(n): adjDict[g.nodes()[i]] = B[i,0] m = len(g.edges()) connComponents = nx.connected_components(g) mod = 0 for c in connComponents: edgesWithinCommunity = 0 randomEdges = 0 for u in c: edgesWithinCommunity += adjDict[u] randomEdges += degDict[u] mod += (float(edgesWithinCommunity) - float(randomEdges * randomEdges)/float(2 * m)) mod = mod/float(2 * m) return mod
def run_main(file): NumberOfStations=465 print file adjmatrix = np.loadtxt(file,delimiter=' ',dtype=np.dtype('int32')) # for i in range (0,NumberOfStations): # if(adjmatrix[i,i]==1): # print "posicion: ["+str(i)+","+str(i)+"]" g = nx.from_numpy_matrix(adjmatrix, create_using = nx.MultiGraph()) degree = g.degree() density = nx.density(g) degree_centrality = nx.degree_centrality(g) clossness_centrality = nx.closeness_centrality(g) betweenless_centrality = nx.betweenness_centrality(g) print degree print density print degree_centrality print clossness_centrality print betweenless_centrality #nx.draw(g) # np.savetxt(OutputFile, Matrix, delimiter=' ',newline='\n',fmt='%i')
def draw_graph(label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40): G=build_graph(fb.get_friends_network()) betweenness=nx.betweenness_centrality(G) degree=nx.degree_centrality(G) degree_num=[ degree[v] for v in G] maxdegree=max(degree_num);mindegree=min(degree_num); print maxdegree,mindegree clustering=nx.clustering(G) print nx.transitivity(G) # Judge whether remove the isolated point from graph if remove_isolated is True: H = nx.empty_graph() for SG in nx.connected_component_subgraphs(G): if SG.number_of_nodes() > iso_level: H = nx.union(SG, H) G = H # Ajust graph for better presentation if different_size is True: L = nx.degree(G) G.dot_size = {} for k, v in L.items(): G.dot_size[k] = v #node_size = [betweenness[v] *1000 for v in G] node_size = [G.dot_size[v] * 10 for v in G] node_color= [((degree[v]-mindegree))/(maxdegree-mindegree) for v in G] #edge_width = [getcommonfriends(u,v) for u,v in G.edges()] pos = nx.spring_layout(G, iterations=15) nx.draw_networkx_edges(G, pos, alpha=0.05) nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=node_color, vmin=0.0,vmax=1.0, alpha=0.3) # Judge whether shows label if label_flag is True: nx.draw_networkx_labels(G, pos, font_size=6,alpha=0.1) #nx.draw_graphviz(G) plt.show() return G
def mean_degree_centrality(pg, normalize=0): """ mean_degree_centrality(pg) calculates mean in- and out-degree centralities for directed graphs and simple degree-centralities for undirected graphs. If the normalize flag is set, each node's centralities are weighted by the number of edges in the (di)graph. """ centrality = {} try: if networkx.is_directed_acyclic_graph(pg): cent_sum_in, cent_sum_out = 0, 0 for n in pg.nodes(): n_cent_in = pg.in_degree(n) n_cent_out = pg.out_degree(n) if normalize: n_cent_in = float(n_cent_in) / float(pg.size()-1) n_cent_out = float(n_cent_out) / float(pg.size()-1) cent_sum_in = cent_sum_in + n_cent_in cent_sum_out = cent_sum_out + n_cent_out centrality['in'] = cent_sum_in / float(pg.order()) centrality['out'] = cent_sum_out / float(pg.order()) else: cent_sum = 0 for n in pg.nodes(): if not normalize: n_cent = pg.degree(n) else: n_cent = networkx.degree_centrality(pg,n) cent_sum = cent_sum + n_cent centrality['all'] = cent_sum / float(pg.order()) except: logging.error('pyp_network.mean_degree_centrality() failed!') return centrality
def plotGraph(G, figsize=(8, 8), filename=None): """ Plots an individual graph, node size by degree centrality, edge size by edge weight. """ labels = {n:n for n in G.nodes()} d = nx.degree_centrality(G) layout=nx.spring_layout pos=layout(G) plt.figure(figsize=figsize) plt.subplots_adjust(left=0,right=1,bottom=0,top=0.95,wspace=0.01,hspace=0.01) # nodes nx.draw_networkx_nodes(G,pos, nodelist=G.nodes(), node_color="steelblue", node_size=[v * 250 for v in d.values()], alpha=0.8) try: weights = [G[u][v]['weight'] for u,v in G.edges()] except: weights = [1 for u,v in G.edges()] nx.draw_networkx_edges(G,pos, with_labels=False, edge_color="grey", width=weights ) if G.order() < 1000: nx.draw_networkx_labels(G,pos, labels) plt.savefig(filename) plt.close("all")
def compute_static_graph_statistics(G,start_time,end_time): verts = G.vertices n = len(verts) m = float(end_time - start_time) agg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3 avg_statistics = [dict.fromkeys(verts,0),dict.fromkeys(verts,0),dict.fromkeys(verts,0)]*3 aggregated_graph = nx.Graph() aggregated_graph.add_nodes_from(verts) start_time = max(1,start_time) for t in xrange(start_time,end_time+1): aggregated_graph.add_edges_from(G.snapshots[t].edges_iter()) dc = G.snapshots[t].degree() cc = nx.closeness_centrality(G.snapshots[t]) bc = nx.betweenness_centrality(G.snapshots[t]) for v in verts: avg_statistics[0][v] += dc[v]/(n-1.0) avg_statistics[1][v] += cc[v] avg_statistics[2][v] += bc[v] for v in verts: avg_statistics[0][v] = avg_statistics[0][v]/m avg_statistics[1][v] = avg_statistics[1][v]/m avg_statistics[2][v] = avg_statistics[2][v]/m dc = nx.degree_centrality(aggregated_graph) cc = nx.closeness_centrality(aggregated_graph) bc = nx.betweenness_centrality(aggregated_graph) for v in verts: agg_statistics[0][v] = dc[v] agg_statistics[1][v] = cc[v] agg_statistics[2][v] = bc[v] return (agg_statistics, avg_statistics)
def degree_centrality(graph, outfile, records=10): """ Perform a degree centrality analysis on graph """ ranking = nx.degree_centrality(graph) ordering = sorted(ranking.items(), key=operator.itemgetter(1), reverse=True)[:records] print("Employee,Degree Centrality", file=outfile) for employee, rank in ordering: print("{},{}".format(employee, rank), file=outfile)
def degree_centrality_report(graph, n): """ Reports on the top n most central individuals on the graph """ pr = nx.degree_centrality(graph) nodes = sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:n] print("Degree Centrality - top {} individuals".format(n)) for n in nodes: print(" {:30}:\t{}".format(n[0], n[1]))
def degree_centrality(graph): centrality = nx.degree_centrality(graph) nx.set_node_attributes(graph, 'centrality', centrality) degrees = sorted(centrality.items(), key=itemgetter(1), reverse=True) for idx, item in enumerate(degrees[0:10]): item = (idx+1,) + item + (graph.degree(item[0]),) print "%i. %s: %0.3f (%i)" % item
def print_info(G): #info prints name, type, number of nodes and edges, and average degree already print(nx.info(G)) print "Density: ", nx.density(G) print "Number of connected components: ", nx.number_connected_components(G) all_degree_cent = nx.degree_centrality(G) all_bet_cent = nx.betweenness_centrality(G) all_close_cent = nx.closeness_centrality(G) oldest = [] agerank = 0 names = [] print ("Node, Degree Centrality, Betweenness Centrality, Closeness Centrality:") for x in range(G.number_of_nodes()): names.append(G.nodes(data=True)[x][1]['label']) if G.nodes(data=True)[x][1]['agerank'] >= agerank: if G.nodes(data=True)[x][1]['agerank'] != agerank: oldest = [] agerank = G.nodes(data=True)[x][1]['agerank'] oldest.append(G.nodes(data=True)[x][1]) print G.nodes(data=True)[x][1]['label'],' %.2f' % all_degree_cent.get(x),\ ' %.2f' % all_bet_cent.get(x),\ ' %.2f' % all_close_cent.get(x) print "Oldest facebook(s): ", ', '.join([x['label'] for x in oldest]) return names
def most_central(self,F=1,cent_type='betweenness'): if cent_type == 'betweenness': ranking = nx.betweenness_centrality(self.G).items() elif cent_type == 'closeness': ranking = nx.closeness_centrality(self.G).items() elif cent_type == 'eigenvector': ranking = nx.eigenvector_centrality(self.G).items() elif cent_type == 'harmonic': ranking = nx.harmonic_centrality(self.G).items() elif cent_type == 'katz': ranking = nx.katz_centrality(self.G).items() elif cent_type == 'load': ranking = nx.load_centrality(self.G).items() elif cent_type == 'degree': ranking = nx.degree_centrality(self.G).items() ranks = [r for n,r in ranking] cent_dict = dict([(self.lab[n],r) for n,r in ranking]) m_centrality = sum(ranks) if len(ranks) > 0: m_centrality = m_centrality/len(ranks) #Create a graph with the nodes above the cutoff centrality- remove the low centrality nodes thresh = F*m_centrality lab = {} for k in self.lab: lab[k] = self.lab[k] g = Graph(self.adj.copy(),self.char_list) for n,r in ranking: if r < thresh: g.G.remove_node(n) del g.lab[n] return (cent_dict,thresh,g)
def calculate_network_measures(net, analyser): deg=nx.degree_centrality(net) clust=[] if(net.is_multigraph()): net = analyser.flatGraph(net) if(nx.is_directed(net)): tmp_net=net.to_undirected() clust=nx.clustering(tmp_net) else: clust=nx.clustering(net) if(nx.is_directed(net)): tmp_net=net.to_undirected() paths=nx.shortest_path(tmp_net, source=None, target=None, weight=None) else: paths=nx.shortest_path(net, source=None, target=None, weight=None) lengths = [map(lambda a: len(a[1]), x[1].items()[1:]) for x in paths.items()] all_lengths=[] for a in lengths: all_lengths.extend(a) max_value=max(all_lengths) #all_lengths = [x / float(max_value) for x in all_lengths] return deg.values(),clust.values(),all_lengths
def allocate(G_phy, G_bgp): log.info("Allocating route reflectors") graph_phy = G_phy._graph for asn, devices in G_phy.groupby("asn").items(): routers = [d for d in devices if d.is_router] router_ids = ank_utils.unwrap_nodes(routers) subgraph_phy = graph_phy.subgraph(router_ids) if len(subgraph_phy) == 1: continue # single node in graph, no ibgp betw_cen = nx.degree_centrality(subgraph_phy) ordered = sorted(subgraph_phy.nodes(), key = lambda x: betw_cen[x], reverse = True) rr_count = len(subgraph_phy)/5 # Take top 20% to be route reflectors route_reflectors = ordered[:rr_count] # most connected 20% rr_clients = ordered[rr_count:] # the other routers route_reflectors = list(ank_utils.wrap_nodes(G_bgp, route_reflectors)) rr_clients = list(ank_utils.wrap_nodes(G_bgp, rr_clients)) G_bgp.update(route_reflectors, route_reflector = True) # mark as route reflector # rr <-> rr over_links = [(rr1, rr2) for rr1 in route_reflectors for rr2 in route_reflectors if rr1 != rr2] G_bgp.add_edges_from(over_links, type = 'ibgp', direction = 'over') # client -> rr up_links = [(client, rr) for (client, rr) in itertools.product(rr_clients, route_reflectors)] G_bgp.add_edges_from(up_links, type = 'ibgp', direction = 'up') # rr -> client down_links = [(rr, client) for (client, rr) in up_links] # opposite of up G_bgp.add_edges_from(down_links, type = 'ibgp', direction = 'down') log.debug("iBGP done")
def plotGraph(graph, color="r", figsize=(12, 8)): labels = {n:n for n in graph.nodes()} d = nx.degree_centrality(graph) layout=nx.spring_layout pos=layout(graph) plt.figure(figsize=figsize) plt.subplots_adjust(left=0,right=1,bottom=0,top=0.95,wspace=0.01,hspace=0.01) # nodes nx.draw_networkx_nodes(graph,pos, nodelist=graph.nodes(), node_color=color, node_size=[v * 250 for v in d.values()], alpha=0.8) nx.draw_networkx_edges(graph,pos, with_labels=False, edge_color=color, width=0.50 ) if graph.order() < 1000: nx.draw_networkx_labels(graph,pos, labels) return plt
def centrality(G2): print("Running Centrality Module") # Check the type of centrality, and calculate for each node if myargs.centrality == "degree": cent = nx.degree_centrality(G2) cent_size = numpy.fromiter(cent.values(), float) print(cent) if myargs.centrality == "eigen": cent = nx.eigenvector_centrality(G2) cent_size = numpy.fromiter(cent.values(), float) print(cent) if myargs.centrality == "betweenness": cent = nx.betweenness_centrality(G2) cent_size = numpy.fromiter(cent.values(), float) print(cent) if myargs.centrality == "closeness": cent = nx.closeness_centrality(G2) cent_size = numpy.fromiter(cent.values(), float) print(cent) # This gives a degree frequency index (useful to compare to Power Law) degree_G2 = nx.degree(G2) degree_df = pd.DataFrame(degree_G2, columns=["Node", "Degree"]) degree_list = degree_df["Degree"].to_numpy() degree_freq_G2 = nx.degree_histogram(G2) degree_freq_df = pd.DataFrame(degree_freq_G2, columns=["Frequency"]) degree_freq_df["Degree"] = degree_freq_df.index degree_freq_df = degree_freq_df[["Degree", "Frequency"]] degree_df.to_csv(f"{edge_file}_Freq.txt", sep="\t") # This allows us to compare centrality between two different Cents1 = [] Cents0 = [] for v in G2.nodes: if v in group: G2.nodes[v]["subgroup"] = 1 G2.nodes[v]["centrality"] = cent[v] Cents1.append(cent[v]) else: G2.nodes[v]["subgroup"] = 0 G2.nodes[v]["centrality"] = cent[v] Cents0.append(cent[v]) node_color = [get_color(G2.nodes[v]["subgroup"]) for v in G2.nodes] # print(G2.nodes) # print(Cents1) # print(Cents0) # Output1: Graph, Highlight High Centrality & Groups. plt.figure() nx.draw( G2, pos=None, with_labels=True, node_color=node_color, node_size=cent_size * 2000, width=1, ) # ,ax=fig.subplot(111)) plt.savefig(f"{myargs.centrality}_{myargs.thresh}_{edge_file}_Network.png") # plt.show() # Output2: Degree Histogram fig = plt.figure("Degree of Graph", figsize=(8, 8)) # Create a gridspec for adding subplots of different sizes # axgrid = fig.add_gridspec(5, 4) # ax2 = fig.add_subplot(axgrid[:, :]) # ax2.bar(*numpy.unique(degree_sequence, return_counts=True)) # ax2.set_title("Degree histogram") # ax2.set_xlabel("Degree") # ax2.set_ylabel("# of Nodes") histoimage = plt.hist(cent.values(), range=[0, 0.15], color="skyblue") fig.tight_layout() fig.savefig(f"{myargs.centrality}_{myargs.thresh}_{edge_file}_Histo.png")
def central_characters(graph, n=10): res = Counter(nx.degree_centrality(graph)).most_common(n) return res
import pickle import networkx as nx import matplotlib.pyplot as plt from nxviz import MatrixPlot, CircosPlot from nxviz.plots import ArcPlot from itertools import combinations from collections import defaultdict graph = pickle.load(open('github_users.p', 'rb')) print("no. of users: " + str(len(graph.nodes()))) print("no. of user-collaborations(p2p) : " + str(len(graph.edges()))) plt.hist(list(nx.degree_centrality(graph).values())) plt.show() # Calculate the largest connected component subgraph largest_ccs = sorted(nx.connected_component_subgraphs(graph), key=lambda x: len(x))[-1] h = MatrixPlot(largest_ccs) h.draw() plt.show() for n, d in graph.nodes(data=True): graph.node[n]['degree'] = nx.degree(graph, n) # a = ArcPlot(graph=graph, node_order='degree') # a.draw()
def getSCV(v, g): return nx.degree_centrality(g)[v]
G = nx.DiGraph() df = pd.read_csv(path, sep="\t") nodes = df.iloc[:, 1].unique().tolist() edges = [(f[0], f[1]) for f in df.as_matrix()] G.add_nodes_from(nodes) G.add_edges_from(edges) return G # 2 # A hits = nx.hits_scipy(g) pagerank = nx.pagerank_scipy(g) # default .85 eigen = nx.eigenvector_centrality(g) degree = nx.degree_centrality(g) get_top_hubs = lambda hits: get_top_nodes(hits[0]) get_top_auths = lambda hits: get_top_nodes(hits[1]) def get_top_nodes(d, n=20): return map(lambda x: x[0], sorted(d.items(), key=lambda x: x[1]))[0:n] get_top_nodes(degree) get_top_nodes(eigen) get_top_nodes(pagerank) get_top_hubs(hits) get_top_auths(hits)
nodes.set_norm(mcolors.SymLogNorm(linthresh=0.01, linscale=1)) # labels = nx.draw_networkx_labels(G, pos) edges = nx.draw_networkx_edges(G, pos) plt.title(measure_name) plt.colorbar(nodes) plt.axis('off') plt.show() pos = nx.spring_layout(G) my_graph = nx.DiGraph() my_graph.add_edges_from(G.edges()) d = nx.degree_centrality(G) print(d) #draw(my_graph, pos=none, nx.degree_centrality(my_graph), 'Degree of Centrality') d = nx.in_degree_centrality(my_graph) print(d) draw(my_graph, pos, d, 'Degree of Incentrality') d = nx.out_degree_centrality(my_graph) print(d) draw(my_graph, pos, d, 'Degree of Outcentrality') """### Eigen Vector Centrality indegree and outdegree ka different """ e = nx.eigenvector_centrality(G)
def main(): qid = sys.argv[1] for cdo in CDO.objects.values('doc', 'citation').distinct()[:5]: print(cdo) #CDO.objects.filter(pk__in=CDO.objects.filter(doc=).values_list('id', flat=True)[1:]).delete() sys.exit() #time.sleep(14400) q = Query.objects.get(pk=qid) mdocs = Doc.objects.filter(query=q, wosarticle__cr__isnull=False) cdos = CDO.objects.filter(doc__query=q) # cdos = CDO.objects.filter( # doc__in=mdocs.values_list('UT',flat=True) # ) m = mdocs.count() m_dict = dict(zip(list(mdocs.values_list('UT', flat=True)), list(range(m)))) rev_m_dict = dict( zip(list(range(m)), list(mdocs.values_list('UT', flat=True)))) del mdocs n = Citation.objects.count() n_dict = dict( zip(list(Citation.objects.all().values_list('id', flat=True)), list(range(n)))) print("ROWIDS") row_ids = list(cdos.values_list('doc__UT', flat=True)) rows = np.array([m_dict[x] for x in row_ids]) print("colids") col_ids = list(cdos.values_list('citation__id', flat=True)) cols = np.array([n_dict[x] for x in col_ids]) print("data") data = np.array([1] * cdos.count()) print("matrix") Scoo = coo_matrix((data, (rows, cols)), shape=(m, n)) del cdos del row_ids del rows del col_ids del cols del data del n_dict gc.collect() S = Scoo.tocsr() del Scoo gc.collect() print("transpose") St = S.transpose() print("multiply") Cmat = S * St del S del St gc.collect() ltri = tril(Cmat, k=-1) G = nx.from_scipy_sparse_matrix(ltri) cnode = m_dict[Doc.objects.get(UT='WOS:000297683800015').UT] paths = nx.single_source_shortest_path(G, cnode) deg = nx.degree_centrality(G) ecent = nx.eigenvector_centrality(G) x = nx.core_number(G) for i in range(G.number_of_nodes()): d = Doc.objects.get(pk=rev_m_dict[i]) d.k = x[i] d.degree = deg[i] d.eigen_cent = ecent[i] try: d.distance = len(paths[i]) except: pass d.save() del x del G del deg del ecent gc.collect() bcmatrix = find(tril(Cmat, k=-1)) N = len(bcmatrix[0]) bcrange = list(range(N)) print(N) chunk_size = 5000 BibCouple.objects.all().delete() for i in range(N // chunk_size + 1): f = i * chunk_size print(f) l = (i + 1) * chunk_size - 1 if l > N: l = N - 1 bcs = [] chunk = bcrange[f:l] pool = Pool(processes=5) bcs.append( pool.map( partial(bib_couple, bc_matrix=bcmatrix, rev_m_dict=rev_m_dict), chunk)) pool.terminate() gc.collect() django.db.connections.close_all() bcs = flatten(bcs) BibCouple.objects.bulk_create(bcs)
def diminish_community(sbm_graph, community_id, nodes_to_purturb, criteria, criteria_r): """Function to diminsh the SBM community Attributes: sbm_graph (Object): Networkx Graph Object community_id (int): Community to diminish criteria (str): Criteria used to diminish the community criteria_r (bool): Used to sort the nodes in reverse once order based on criteria nodes_to_purturb (int): Number of nodes to perturb """ n = sbm_graph._node_num community_nodes = [ i for i in range(n) if sbm_graph._node_community[i] == community_id ] nodes_to_purturb = min(len(community_nodes), nodes_to_purturb) labels = {} try: function = function_mapping[criteria] if criteria == 'katz': G_cen = function(sbm_graph._graph, alpha=0.01) else: G_cen = function(sbm_graph._graph) except KeyError: print(criteria, 'is an invalid input! Using degree_centrality instead.') G_cen = nx.degree_centrality(sbm_graph._graph) pass G_cen = sorted(G_cen.items(), key=operator.itemgetter(1), reverse=criteria_r) perturb_nodes = [] count = 0 i = 0 while count < nodes_to_purturb: if sbm_graph._node_community[G_cen[i][0]] == community_id: perturb_nodes.append(G_cen[i][0]) count += 1 i += 1 node_plot = [] count = 0 i = 0 while count < 20: if sbm_graph._node_community[G_cen[i][0]] == community_id: node_plot.append(G_cen[i][0]) count += 1 i += 1 node_plot_reverse = [] count = 0 i = len(G_cen) - 1 while count < 20: if sbm_graph._node_community[G_cen[i][0]] == community_id: node_plot_reverse.append(G_cen[i][0]) count += 1 i -= 1 for i, nid in enumerate(perturb_nodes): labels[nid] = str("{0:.2f}".format(G_cen[i][1])) del G_cen # perturb_nodes = random.sample(community_nodes, nodes_to_purturb) left_communitis = [ i for i in range(sbm_graph._community_num) if i != community_id ] for node_id in perturb_nodes: new_community = random.sample(left_communitis, 1)[0] print('Node %d change from community %d to %d' % (node_id, sbm_graph._node_community[node_id], new_community)) sbm_graph._node_community[node_id] = new_community for node_id in perturb_nodes: _resample_egde_for_node(sbm_graph, node_id) return perturb_nodes, labels, node_plot, node_plot_reverse
if nx.is_connected(G_mentions): print("graph is connected") else: print("graph is not connected") print( f"Number of connected components: {nx.number_connected_components(G_mentions)}" ) print( f"Average clustering coefficient: {nx.average_clustering(G_mentions):.5f}") print(f"Transitivity: {nx.transitivity(G_mentions):.5f}") # Takes 7 minutes start = time.time() graph_centrality = nx.degree_centrality(G_mentions) max_de = max(graph_centrality.items(), key=itemgetter(1)) sorted_centrality = sorted(graph_centrality.items(), key=itemgetter(1), reverse=True) graph_closeness = nx.closeness_centrality(G_mentions) sorted_closeness = sorted(graph_closeness.items(), key=itemgetter(1), reverse=True) max_clo = max(graph_closeness.items(), key=itemgetter(1)) graph_betweenness = nx.betweenness_centrality(G_mentions, normalized=True, endpoints=False) sorted_betweeness = sorted(graph_betweenness.items(), key=itemgetter(1), reverse=True)
def graph_json(): if len(request.args)==0: return jsonify(json_graph) graph_={'nodes':[],'links':[]} nodes_=set() graph_nodes=[] min_nodes=5 exp_arts=False court_cases=False if request.args.get('min'): min_nodes=int(request.args.get('min')) if request.args.get('exp_arts'): if request.args.get('exp_arts')=="true": exp_arts=True if request.args.get('court_cases'): if request.args.get('court_cases')=="true": court_cases=True if request.args.get('include'): re_include=re.compile(request.args.get('include')) if request.args.get('include_doc'): re_include_doc=re.compile(request.args.get('include_doc')) for node in json_graph['nodes']: if node['type']==1: if request.args.get('include'): if re_include.search(node['name'].lower()): if not node['id'] in nodes_: nodes_.add(node['id']) graph_nodes.append(node) else: if not node['id'] in nodes_: graph_nodes.append(node) nodes_.add(node['id']) elif not court_cases and not exp_arts and node['type']==2: if request.args.get('include_doc'): if re_include_doc.search(node['name'].lower()): if not node['id'] in nodes_: graph_nodes.append(node) nodes_.add(node['id']) else: if not node['id'] in nodes_: graph_nodes.append(node) nodes_.add(node['id']) elif not court_cases and exp_arts and node['type']==3: if request.args.get('include_doc'): if re_include_doc.search(node['name'].lower()): if not node['id'] in nodes_: graph_nodes.append(node) nodes_.add(node['id']) else: if not node['id'] in nodes_: graph_nodes.append(node) nodes_.add(node['id']) graph_nodes_=[] nodes_=set() if request.args.get('exclude'): re_exclude=re.compile(request.args.get('exclude')) if request.args.get('exclude_doc'): re_exclude_doc=re.compile(request.args.get('exclude_doc')) for node in graph_nodes: if node['type']==1: if request.args.get('exclude'): if not re_exclude.search(node['name'].lower()): if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) else: if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) elif not court_cases and not exp_arts and node['type']==2: if request.args.get('exclude_doc'): if not re_exclude_doc.search(node['name'].lower()): if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) else: if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) elif not court_cases and exp_arts and node['type']==3: if request.args.get('exclude_doc'): if not re_exclude_doc.search(node['name'].lower()): if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) else: if not node['id'] in nodes_: graph_nodes_.append(node) nodes_.add(node['id']) targets_=set() sources_=set() for edge in json_graph['links']: if edge['source'] in nodes_ and edge['target'] in nodes_: if int(edge['ori_val'])>= min_nodes: graph_['links'].append(edge) targets_.add(edge['target']) sources_.add(edge['source']) for node in graph_nodes_: if node['type']==1: if node['id'] in sources_ or node['id'] in targets_: graph_['nodes'].append(node) if node['type']>1: if node['id'] in targets_: graph_['nodes'].append(node) G = nx.DiGraph() G.add_nodes_from([ n['id'] for n in graph_['nodes']]) G.add_weighted_edges_from([ (e['source'],e['target'],e['ori_val']) for e in graph_['links']]) graph_['stats']={} graph_['stats']['Density']=nx.density(G) dc=nx.degree_centrality(G) m,nm=get_max(dc) if m>0.0: graph_['stats']['avg Degree Centrality']=sum([v for v in dc.values()])/len(dc) graph_['stats']['max Degree Centrality']=m graph_['stats']['Node Degree Centrality']=nm for i,node in enumerate(graph_['nodes']): graph_['nodes'][i]['dc']=dc[node['id']] return jsonify(graph_)
def Degree_Centrality(G): Degree_Centrality = nx.degree_centrality(G) #print "Degree_Centrality:", sorted(Degree_Centrality.iteritems(), key=lambda d:d[1], reverse = True) return Degree_Centrality
def extended_stats(G, connectivity=False, anc=False, ecc=False, bc=False, cc=False): """ Calculate extended topological stats and metrics for a graph. Many of these algorithms have an inherently high time complexity. Global topological analysis of large complex networks is extremely time consuming and may exhaust computer memory. Consider using function arguments to not run metrics that require computation of a full matrix of paths if they will not be needed. Parameters ---------- G : networkx.MultiDiGraph input graph connectivity : bool if True, calculate node and edge connectivity anc : bool if True, calculate average node connectivity ecc : bool if True, calculate shortest paths, eccentricity, and topological metrics that use eccentricity bc : bool if True, calculate node betweenness centrality cc : bool if True, calculate node closeness centrality Returns ------- stats : dict dictionary of network measures containing the following elements (some only calculated/returned optionally, based on passed parameters): - avg_neighbor_degree - avg_neighbor_degree_avg - avg_weighted_neighbor_degree - avg_weighted_neighbor_degree_avg - degree_centrality - degree_centrality_avg - clustering_coefficient - clustering_coefficient_avg - clustering_coefficient_weighted - clustering_coefficient_weighted_avg - pagerank - pagerank_max_node - pagerank_max - pagerank_min_node - pagerank_min - node_connectivity - node_connectivity_avg - edge_connectivity - eccentricity - diameter - radius - center - periphery - closeness_centrality - closeness_centrality_avg - betweenness_centrality - betweenness_centrality_avg """ stats = {} # create a DiGraph from the MultiDiGraph, for those metrics that require it G_dir = nx.DiGraph(G) # create an undirected Graph from the MultiDiGraph, for those metrics that # require it G_undir = nx.Graph(G) # get the largest strongly connected component, for those metrics that # require strongly connected graphs G_strong = utils_graph.get_largest_component(G, strongly=True) # average degree of the neighborhood of each node, and average for the graph avg_neighbor_degree = nx.average_neighbor_degree(G) stats["avg_neighbor_degree"] = avg_neighbor_degree stats["avg_neighbor_degree_avg"] = sum( avg_neighbor_degree.values()) / len(avg_neighbor_degree) # average weighted degree of the neighborhood of each node, and average for # the graph avg_wtd_nbr_deg = nx.average_neighbor_degree(G, weight="length") stats["avg_weighted_neighbor_degree"] = avg_wtd_nbr_deg stats["avg_weighted_neighbor_degree_avg"] = sum( avg_wtd_nbr_deg.values()) / len(avg_wtd_nbr_deg) # degree centrality for a node is the fraction of nodes it is connected to degree_centrality = nx.degree_centrality(G) stats["degree_centrality"] = degree_centrality stats["degree_centrality_avg"] = sum( degree_centrality.values()) / len(degree_centrality) # calculate clustering coefficient for the nodes stats["clustering_coefficient"] = nx.clustering(G_undir) # average clustering coefficient for the graph stats["clustering_coefficient_avg"] = nx.average_clustering(G_undir) # calculate weighted clustering coefficient for the nodes stats["clustering_coefficient_weighted"] = nx.clustering(G_undir, weight="length") # average clustering coefficient (weighted) for the graph stats["clustering_coefficient_weighted_avg"] = nx.average_clustering( G_undir, weight="length") # pagerank: a ranking of the nodes in the graph based on the structure of # the incoming links pagerank = nx.pagerank(G_dir, weight="length") stats["pagerank"] = pagerank # node with the highest page rank, and its value pagerank_max_node = max(pagerank, key=lambda x: pagerank[x]) stats["pagerank_max_node"] = pagerank_max_node stats["pagerank_max"] = pagerank[pagerank_max_node] # node with the lowest page rank, and its value pagerank_min_node = min(pagerank, key=lambda x: pagerank[x]) stats["pagerank_min_node"] = pagerank_min_node stats["pagerank_min"] = pagerank[pagerank_min_node] # if True, calculate node and edge connectivity if connectivity: # node connectivity is the minimum number of nodes that must be removed # to disconnect G or render it trivial stats["node_connectivity"] = nx.node_connectivity(G_strong) # edge connectivity is equal to the minimum number of edges that must be # removed to disconnect G or render it trivial stats["edge_connectivity"] = nx.edge_connectivity(G_strong) utils.log("Calculated node and edge connectivity") # if True, calculate average node connectivity if anc: # mean number of internally node-disjoint paths between each pair of # nodes in G, i.e., the expected number of nodes that must be removed to # disconnect a randomly selected pair of non-adjacent nodes stats["node_connectivity_avg"] = nx.average_node_connectivity(G) utils.log("Calculated average node connectivity") # if True, calculate shortest paths, eccentricity, and topological metrics # that use eccentricity if ecc: # precompute shortest paths between all nodes for eccentricity-based # stats sp = { source: dict( nx.single_source_dijkstra_path_length(G_strong, source, weight="length")) for source in G_strong.nodes() } utils.log("Calculated shortest path lengths") # eccentricity of a node v is the maximum distance from v to all other # nodes in G eccentricity = nx.eccentricity(G_strong, sp=sp) stats["eccentricity"] = eccentricity # diameter is the maximum eccentricity diameter = nx.diameter(G_strong, e=eccentricity) stats["diameter"] = diameter # radius is the minimum eccentricity radius = nx.radius(G_strong, e=eccentricity) stats["radius"] = radius # center is the set of nodes with eccentricity equal to radius center = nx.center(G_strong, e=eccentricity) stats["center"] = center # periphery is the set of nodes with eccentricity equal to the diameter periphery = nx.periphery(G_strong, e=eccentricity) stats["periphery"] = periphery # if True, calculate node closeness centrality if cc: # closeness centrality of a node is the reciprocal of the sum of the # shortest path distances from u to all other nodes closeness_centrality = nx.closeness_centrality(G, distance="length") stats["closeness_centrality"] = closeness_centrality stats["closeness_centrality_avg"] = sum( closeness_centrality.values()) / len(closeness_centrality) utils.log("Calculated closeness centrality") # if True, calculate node betweenness centrality if bc: # betweenness centrality of a node is the sum of the fraction of # all-pairs shortest paths that pass through node # networkx 2.4+ implementation cannot run on Multi(Di)Graphs, so use DiGraph betweenness_centrality = nx.betweenness_centrality(G_dir, weight="length") stats["betweenness_centrality"] = betweenness_centrality stats["betweenness_centrality_avg"] = sum( betweenness_centrality.values()) / len(betweenness_centrality) utils.log("Calculated betweenness centrality") utils.log("Calculated extended stats") return stats
def plot_info(G, names): def get_spread(dictionary): min_val = dictionary[1] max_val = dictionary[1] for key in dictionary: if min_val > dictionary[key]: min_val = dictionary[key] if max_val < dictionary[key]: max_val = dictionary[key] if min_val == 0: dictionary['Spread'] = 'infinity' else: dictionary['Spread'] = max_val / min_val return dictionary def get_katz_alpha(matrix): largest = max(linalg.eigvals(matrix)) return 1 / largest - 0.01 nx.draw_networkx(G, show_labels=True, labels=names) degree_centralities = get_spread(nx.degree_centrality(G)) eigenvector_centralities = get_spread(nx.eigenvector_centrality(G)) katz_centralities = get_spread( nx.katz_centrality(G, alpha=get_katz_alpha(nx.to_numpy_matrix(G)))) page_rank_centralities = get_spread(nx.pagerank(G, alpha=0.85)) closeness_centralities = get_spread(nx.closeness_centrality(G)) betweeness_centralities = get_spread(nx.betweenness_centrality(G)) data = [] for key in degree_centralities: data.append([ degree_centralities[key], eigenvector_centralities[key], katz_centralities[key], page_rank_centralities[key], closeness_centralities[key], betweeness_centralities[key] ]) row_lables = [] for x in range(len(names)): row_lables.append(names[x]) row_lables.append('Spread') centralities = [ 'Degree', 'Eigenvector', 'Katz', 'Page Rank', 'Closeness', 'Betweenness' ] for row in range(len(data)): for item in range(len(data[0])): if type(data[row][item]) is not str: data[row][item] = round(data[row][item], 3) the_table = plt.table(cellText=data, rowLabels=row_lables, colLabels=centralities, loc='bottom') plt.tight_layout() plt.subplots_adjust(left=0.29, bottom=0.46, right=0.75, top=None, wspace=None, hspace=None) the_table.scale(2, 2) plt.axis('off') the_table.auto_set_font_size(False) the_table.set_fontsize(10) plt.show()
def degree_centrality(): degree_centrality_saves = nx.degree_centrality(G) order_degree_centrality_rank = sorted(degree_centrality_saves.items(), key=lambda x: x[1], reverse=True) #cut_order_degree_centrality_rank = order_degree_centrality_rank[0:10] #print(cut_order_degree_centrality_rank) return order_degree_centrality_rank
nx.write_gexf(g, 'graph.gexf') g1 = nx.read_gexf('graph.gexf') import matplotlib.pyplot as plt pos = nx.spring_layout(g) nx.draw_networkx_nodes(g, pos, node_color='yellow', node_size=50) nx.draw_networkx_edges(g, pos, edge_color='blue') nx.draw_networkx_labels(g, pos, font_size=20) plt.axis('off') plt.show() #plt.savefig('graph.png') nx.diameter(g) #самый длинный путь g.number_of_nodes() g.number_of_edges() nx.density(g) #примерно отношение узлов к ребрам nx.average_clustering(g) deg = nx.degree_centrality(g) # goo.gl/AQllCa - датасеты разных сетей import re g_dolph = nx.Graph() f = open('out.dolphins', 'r', encoding='utf-8') dolphins = f.readlines() for line in dolphins: nums = re.findall(r'[0-9]+', line) g_dolph.add_edge(int(nums[0]), int(nums[1])) pos_dolph = nx.spring_layout(g_dolph) nx.draw_networkx_nodes(g_dolph, pos_dolph, node_color='blue', node_size=100) nx.draw_networkx_edges(g_dolph, pos_dolph, edge_color='yellow') nx.draw_networkx_labels(g_dolph, pos_dolph, font_size=20)
Density of the Graph A good metric to begin with is network density. This is simply the ratio of actual edges in the network to all possible edges in the network. ''' density = nx.density(G) # calulates density of graph print("Network density:", density) ''' Centrality In network analysis, measures of the importance of nodes are referred to as centrality measures. Degree is the simplest and the most common way of finding important nodes. A node’s degree is the sum of its edges. If a node has three lines extending from it to other nodes, its degree is three. Five edges ''' rsd = nx.degree_centrality(G) # calculates density of graph rsdf = pd.DataFrame(pd.Series(rsd)) # preprocess the output rsdf = rsdf.reset_index() # reset the index rsdf.columns = ["addresses","Degree centrality"] # renaming columns rsdf = rsdf.sort_values(by="Degree centrality", ascending=False) # sorting based on descending centrality value top_5_nodes = list(rsdf.addresses[0:5]) # get top 5 nodes print(top_5_nodes) # print top 5 nodes ''' Finding Diameter Diameter is the longest of all shortest paths. After calculating all shortest paths between every possible pair of nodes in the network, diameter is the length of the path between the two nodes that are furthest apart. The measure is designed to give a sense of the network’s overall size, the distance from one end of the network to another.
def degree_centrality(G): return nx.degree_centrality(G)
weightfile=['./intermediate/enc_weightsacdm.hdf5', './intermediate/dec_weightsacdm.hdf5']) sample = args.samples if not os.path.exists('./test_data/academic/pickle'): os.mkdir('./test_data/academic/pickle') graphs, length = dataprep_util.get_graph_academic('./test_data/academic/adjlist') for i in range(length): nx.write_gpickle(graphs[i], './test_data/academic/pickle/' + str(i)) else: length = len(os.listdir('./test_data/academic/pickle')) graphs = [] for i in range(length): graphs.append(nx.read_gpickle('./test_data/academic/pickle/' + str(i))) G_cen = nx.degree_centrality(graphs[29]) # graph 29 in academia has highest number of edges G_cen = sorted(G_cen.items(), key=operator.itemgetter(1), reverse=True) node_l = [] i = 0 while i < sample: node_l.append(G_cen[i][0]) i += 1 for i in range(length): graphs[i] = graph_util.sample_graph_nodes(graphs[i], node_l) outdir = args.resultdir if not os.path.exists(outdir): os.mkdir(outdir) outdir = outdir + '/' + args.testDataType if not os.path.exists(outdir):
def map_properties(self, sort_by=['Pagerank', 'Out_degree_wg']): """ Compute key properties of nodes in the aggregate map. Returns a sorted pandas DataFrame with graph properties. Parameters ---------- sort_by : list List of properties to sort dataframe. Returns ------- pandas DataFrame Dataframe with a set of graph metrics computed for each node in the graph. """ metrics = { # Node degree 'Node_degree': dict(self.map.degree) # Node out-degree , 'Out_degree': dict(self.map.out_degree) # Node weighted out-degree , 'Out_degree_wg': dict(self.map.out_degree(weight='weight')) # Node in-degree , 'In_degree': dict(self.map.in_degree) # Node weighted in-degree , 'In_degree_wg': dict(self.map.in_degree(weight='weight')) # Node pagerank , 'Pagerank': dict(nx.pagerank(self.map)) # Node eigenvector centrality , 'Eigenvector_centrality': dict(nx.eigenvector_centrality(self.map)) # Node degree centrality , 'Degree_centrality': dict(nx.degree_centrality(self.map)) # Node closeness centrality , 'Closeness_centrality': dict(nx.closeness_centrality(self.map)) # Node betweenness centrality , 'Betweenness_centrality': dict(nx.betweenness_centrality(self.map.to_undirected())) # Node Katz centrality #,'Katz_centrality' : dict( nx.katz_centrality( self.map.to_undirected() ) ) # Node communicability centrality #,'Communicability centrality' : dict( nx.communicability_centrality( self.map.to_undirected() ) ) } df_node_properties = pd.DataFrame.from_dict(metrics) df_node_properties.set_index(np.array(self.map.nodes()), inplace=True) df_node_properties.sort_values(sort_by, ascending=False, inplace=True) return df_node_properties
betCent = nx.betweenness_centrality(edges, normalized=True, endpoints=True) node_color = [20000.0 * edges.degree(v) for v in edges] node_size = [v * 10000 for v in betCent.values()] plt.figure(figsize=(5, 5)) nx.draw_networkx(edges, pos=pos, with_labels=True, node_color=node_color, node_size=node_size) plt.show() sorted(betCent, key=betCent.get, reverse=True)[:5] #Degree Centrality pos = nx.spring_layout(edges) degCent = nx.degree_centrality(edges) node_color = [20000.0 * edges.degree(v) for v in edges] node_size = [v * 10000 for v in degCent.values()] plt.figure(figsize=(10, 10)) nx.draw_networkx(edges, pos=pos, with_labels=True, node_color=node_color, node_size=node_size) plt.show() sorted(degCent, key=degCent.get, reverse=True)[:5] #Closeness Centrality pos = nx.spring_layout(edges) cloCent = nx.closeness_centrality(edges)
def find_nodes_with_highest_deg_cent(G): # Compute the degree centrality of G: deg_cent deg_cent = nx.degree_centrality(G) # Compute the maximum degree centrality: max_dc max_dc = max(list(deg_cent.values())) nodes = set() # Iterate over the degree centrality dictionary for k, v in deg_cent.items(): # Check if the current value has the maximum degree centrality if v == max_dc: # Add the current node to the set of nodes nodes.add(k) return nodes # Find the node(s) that has the highest degree centrality in T: top_dc top_dc = find_nodes_with_highest_deg_cent(T) print(top_dc) # Write the assertion statement for node in top_dc: assert nx.degree_centrality(T)[node] == max( nx.degree_centrality(T).values())
def add_degree_centrality(network: nx.Graph): dc = nx.degree_centrality(network) nx.set_node_attributes(network, dc, 'Degree_Centrality') return network
#Assortativity print("The assortativity of the graph: " + str(nx.degree_assortativity_coefficient(g))) #Average Clustering print("The average clustering coefficient: " + str(nx.average_clustering(g))) #Density print("The Density of our Graph: " + str(nx.density(g))) #Number of Nodes print("The Number of Nodes: " + str(len(g.nodes))) #Number of Edges print("The Number of Edges: " + str(len(g.edges))) #MICRO ANALYSIS ------------------------------------- #degree centrality print("The top 5 nodes by degree centrality:") deg = pd.DataFrame(dict(nx.degree_centrality(g)).items()) print(deg.sort_values(by=[1], ascending=False).head(5)) #betweenness centrality print("The top 5 nodes by betweenness centrality:") bet = pd.DataFrame(dict(nx.betweenness_centrality(g)).items()) print(bet.sort_values(by=[1], ascending=False).head(5)) #boxplot plt.boxplot(np.array(bet.sort_values(by=[1], ascending=False)[1])) plt.title('Distribution of Betweenness') #Bottom of each list #degree centrality print("The bottom 5 nodes by degree centrality:") deg = pd.DataFrame(dict(nx.degree_centrality(g)).items()) print(deg.sort_values(by=[1], ascending=False).tail(5))
def degree_centrality(self, number=None): """度中心性""" unsort_dirt = nx.degree_centrality(self.get_graph()) return sort(unsort_dirt, number)
def find_max_nodes(G): list = nx.degree_centrality(G) res = sorted(list.items(), key=lambda x: x[1], reverse=True) #TOP 10% is only growth return res[0:int(G.number_of_nodes() * 0.1)]
def centrality_compare(graph=None, nodes_string=None, value_counts=None): measurements_dict = OrderedDict() compare_dict = OrderedDict() # The data from wevi if nodes_string is None: nodes_list = [ 0.07553153757502284, 0.008804137964580436, 0.009528332679916485, 0.09411131873310066, 0.056807282025497695, 0.09709045935848355, 0.058825181534953086, 0.2655416154784191, 0.18734994882402486, 0.14641018582600146 ] else: nodes_list = [float(x) for x in nodes_string.split(",")] # Put the wevi data in a dictionary node_dict = {num: val for num, val in enumerate(nodes_list)} # Make the graph and make it simple graph instead of multi graph if graph is None: graph = graph_maker() if value_counts is None: value_counts = [14, 30, 30, 11, 15, 32, 39, 37, 45, 47] graph = nx.Graph(graph) measurements_dict["closeness centrality"] = nx.closeness_centrality( graph).values() # measurements_dict["eigenvector centrality"] = nx.eigenvector_centrality(graph).values() measurements_dict["degree centrality"] = nx.degree_centrality( graph).values() measurements_dict["betweenness centrality"] = nx.betweenness_centrality( graph).values() # measurements_dict["katz centrality"] = nx.katz_centrality(graph).values() measurements_dict["load centrality"] = nx.load_centrality(graph).values() measurements_dict["nodes count"] = value_counts # change the lists order to lexicographic measurements_dict = { key: [float(i) / sum(value) for i in value] for key, value in measurements_dict.items() } measurements_dict["wevi"] = [i for i in node_dict.values()] # Loop over all the cenrality measurements for centrality_name, centrality_value in measurements_dict.items(): # Calculate correlations pearson = pearsonr(centrality_value, nodes_list) spearman = spearmanr(centrality_value, nodes_list) linregres = linregress(centrality_value, nodes_list) # add it the the compare dict compare_dict[centrality_name] = [ pearson[0], spearman[0], linregres[2]**2, pearson[1], spearman[1], linregres[4] ] # Print the results nicely print tabulate([[x] + y for x, y in compare_dict.items()], headers=[ 'Name', 'Pearson', 'Spearman', 'linregress', 'Pearson p-value', 'Spearman p-value', 'linregress p-value' ]) sorted2 = sorted(range(len(measurements_dict.values()[0])), key=lambda k: str(k)) best_nodes_dict = {} for measure, mes_nodes_list in measurements_dict.items(): best_nodes_dict[measure] = [ "Node " + str(x[0]) for x in sorted( enumerate(mes_nodes_list), key=lambda x: x[1], reverse=True) ] best_nodes_dict["wevi"] = [ "Node " + str(sorted2[x[0]]) for x in sorted(enumerate(measurements_dict["wevi"]), key=lambda x: x[1], reverse=True) ] df = pd.DataFrame(measurements_dict) # df.to_csv("C:\Users\Dvir\Desktop\NNftw\measures.csv") print "\n\n" print tabulate( [[x] + y for x, y in measurements_dict.items()], headers=[ "Node " + str(x) for x in sorted(range(len(nodes_list)), key=lambda k: str(k)) ]) print "\n\n" print tabulate([[x] + y[:5] for x, y in best_nodes_dict.items()], headers=[x for x in range(5)]) print "\n\n" return compare_dict
listmincentrality = (0, 10) listmaxcentrality = (0, 0) for n in (nx.betweenness_centrality(G)).items(): if (listmincentrality[1] > n[1]): listmincentrality = n elif (listmaxcentrality[1] < n[1]): listmaxcentrality = n print('') print("The node that has minimal centrality is : ", listmincentrality) print("The node that has the maximum centrality is : ", listmaxcentrality) # normalized listminnormalized = (0, 10) listmaxnormalized = (0, 0) for n in (nx.degree_centrality(G)).items(): if (listminnormalized[1] > n[1]): listminnormalized = n elif (listmaxnormalized[1] < n[1]): listmaxnormalized = n print('') print("The node that has the minimum (normalized) degree is : ", listminnormalized) print("The node that has the maximal (normalized) degree is: ", listmaxnormalized) # In[ ]: # recherche des cliques
def centrality_distribution(G): centrality = nx.degree_centrality(G).values() centrality = np.asarray(centrality) centrality /= centrality.sum() return centrality
print("Question 2:") print("Nodes:", len(g.nodes())) print("Edges:", len(g.edges())) print() h = g.copy() h.remove_node('Mining-the-Social-Web-2nd-Edition(repo)') # Comment out write if file is made and uncomment read nx.write_edgelist(h, "followers.edgelist") # h=nx.read_edgelist("followers.edgelist") print("Follower Only List Read or Written") print() dc = sorted(nx.degree_centrality(h).items(), key=itemgetter(1), reverse=True) print("Question 3:") print("Degree Centrality") print(dc[:10]) print() bc = sorted(nx.betweenness_centrality(h).items(), key=itemgetter(1), reverse=True) print("Betweenness Centrality") print(bc[:10]) print() print("Closeness Centrality")
print(nx.minimum_node_cut(GS1)) print(nx.edge_connectivity(GS1)) print(nx.minimum_edge_cut(GS1)) # Centrality: # Indentification of Important nodes. # Degree Centrality # C_deg(V) = D_V/(|N|-1) # D_V : Degree of node V # N : Number of Nodes in the graph GK = nx.karate_club_graph() nx.draw(GK, with_labels=True) plt.show() print(nx.degree_centrality(GK)) # Degree Cenrtality: Directed Graph # Closeness Centrality nx.closeness_centrality(GK) # Page Rank Algorithm GP = nx.DiGraph() GP.add_edge(1, 2) GP.add_edge(1, 3) GP.add_edge(4, 3) GP.add_edge(3, 5) nx.draw(GP, with_labels=True) plt.show()