def augmentNodes(g): r1 = nx.eigenvector_centrality_numpy(g) r2 = nx.degree_centrality(g) # DP MY r3 = nx.betweenness_centrality(g) r5 = nx.load_centrality(g,weight='weight') # DY, WY-writename # Scientific collaboration networks: II. Shortest paths, weighted networks, and centrality, M. E. J. Newman, Phys. Rev. E 64, 016132 (2001). r6 = nx.pagerank(g, alpha=0.85, personalization=None, max_iter=100, tol=1e-08, nstart=None, weight='weight') if nx.is_directed(g) == True: r8 = nx.in_degree_centrality(g) r9 = nx.out_degree_centrality(g) # r10 = nx.hits(g, max_iter=100, tol=1e-08, nstart=None) else: r4 = nx.communicability_centrality(g) r7 = nx.clustering(g, weight='weight') for x in g.nodes(): g.node[x]['eigenvector_centrality_numpy'] = r1[x] g.node[x]['degree_centrality'] = r2[x] g.node[x]['betweenness_centrality'] = r3[x] g.node[x]['load_centrality'] = r5[x] g.node[x]['pagerank'] = r6[x] if nx.is_directed(g) == True: g.node[x]['in_degree_centrality'] = r8[x] g.node[x]['out_degree_centrality'] = r9[x] # g.node[x]['hits'] = r10[x] else: g.node[x]['communicability_centrality'] = r4[x] g.node[x]['clustering'] = r7[x] return g
def test_small_graph_centrality(self): G = nx.empty_graph(create_using=nx.DiGraph) assert {} == nx.degree_centrality(G) assert {} == nx.out_degree_centrality(G) assert {} == nx.in_degree_centrality(G) G = nx.empty_graph(1, create_using=nx.DiGraph) assert {0: 1} == nx.degree_centrality(G) assert {0: 1} == nx.out_degree_centrality(G) assert {0: 1} == nx.in_degree_centrality(G)
def test_small_graph_centrality(self): G = nx.empty_graph(create_using=nx.DiGraph) assert_equal({}, nx.degree_centrality(G)) assert_equal({}, nx.out_degree_centrality(G)) assert_equal({}, nx.in_degree_centrality(G)) G = nx.empty_graph(1, create_using=nx.DiGraph) assert_equal({0: 1}, nx.degree_centrality(G)) assert_equal({0: 1}, nx.out_degree_centrality(G)) assert_equal({0: 1}, nx.in_degree_centrality(G))
def main(): DGTiny = parseEdgeFileToDiGraph(tinyFn) DGSmall = parseEdgeFileToDiGraph(smallFn) # DGLarge = parseEdgeFileToDiGraph(largeFn) print nx.in_degree_centrality(DGTiny) print nx.out_degree_centrality(DGTiny) print "\n" print nx.closeness_centrality(DGTiny)
def ref_metrics(self, G): ''' See https://networkx.github.io/documentation/latest/reference/algorithms.html for algorithms. Some algorithms don't support directed graphs. More comments on each algorithm in respect to directed graphs. Edges in graph G are directed from a to b in (a, b), where a cites b. :param G: graph :return: a dataframe of network statistics for each node. ''' s = time.perf_counter() df = pd.DataFrame([ # Equals the number of references in a paper as (2, 1) is "2" citing "1" nx.out_degree_centrality(G), # Equals the number of citations nx.in_degree_centrality(G), # (1, 3) and (2, 3) give PageRank to "3" nx.pagerank(G), # (1, 2, 3) gives 0 to "3" as it can't reach any other nodes # Since "if the graph is not completely connected, this algorithm computes the closeness # centrality for each connected part separately," ensure that all papers are connected nx.closeness_centrality(G), # (2, 3, 4) and (4, 3, 2) give higher betweenness to "3" than (2, 3, 4) alone does # nx.betweenness_centrality(G), # nx.current_flow_betweenness_centrality(G), # nx.current_flow_closeness_centrality(G), # nx.eigenvector_centrality(G) ]).T df.columns = ['odc', 'idc', 'pr', 'cc', # 'bc', 'cfbc', 'cfcc', 'ec' ] e = time.perf_counter() print("Metrics is computed in %d seconds" % round(e - s, 1)) return df
def calGraph(infile, mode = 1): #init Parameter inputpath = 'edge_list/' outputpath = 'network_output/' n = mode Data_G = inputpath+infile+'_'+str(n)+'.edgelist' #init Graph G = nx.read_edgelist(Data_G, create_using=nx.DiGraph()) GU = nx.read_edgelist(Data_G) #basci info print nx.info(G),'\n', nx.info(GU) average_degree = float(sum(nx.degree(G).values()))/len(G.nodes()) print 'average degree :', average_degree degree_histogram = nx.degree_histogram(G) print 'degree histogram max :', degree_histogram[1] desity = nx.density(G) print 'desity :', desity #Approximation #Centrality degree_centrality = nx.degree_centrality(G) print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2] out_degree_centrality = nx.out_degree_centrality(G) print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
def centrality(G): """ Calculates the in-degree, out-degree, closeness, betweenness centrality for the given graph. If the graph is undirected, return empty dictionary for indegree and outdegree centrality. args: G (nx.DiGraph) : input graph returns: tuple of in-degree, out-degree, closeness, betweenness centrality dictionaries that the keys are nodes. """ in_degree, out_degree = {}, {} if G.is_directed(): print "calculating in_degree centrality..." in_degree = nx.in_degree_centrality(G) print "calculating out_degree centrality..." out_degree = nx.out_degree_centrality(G) print "calculating closeness centrality..." closeness = nx.closeness_centrality(G) print "calculating betweenness centrality..." betweenness = nx.betweenness_centrality(G) return in_degree, out_degree, closeness, betweenness
def set_capacities_degree_gravity(topology, capacities, capacity_unit='Mbps'): """ Set link capacities proportionally to the product of the degrees of the two end-points of the link Parameters ---------- topology : Topology The topology to which link capacities will be set capacities : list A list of all possible capacity values capacity_unit : str, optional The unit in which capacity value is expressed (e.g. Mbps, Gbps etc..) """ if topology.is_directed(): in_degree = nx.in_degree_centrality(topology) out_degree = nx.out_degree_centrality(topology) gravity = {(u, v): out_degree[u] * in_degree[v] for (u, v) in topology.edges()} else: degree = nx.degree_centrality(topology) gravity = {(u, v): degree[u] * degree[v] for (u, v) in topology.edges()} _set_capacities_proportionally(topology, capacities, gravity, capacity_unit=capacity_unit)
def get_node_feat(adjacency_list, nNodes): num_node_feat = 5 node_feat = Variable( torch.zeros(len(adjacency_list), nNodes, num_node_feat)) for t in range(len(adjacency_list)): G = nx.DiGraph(adjacency_list[t].numpy()) in_degree = np.array((nx.in_degree_centrality(G)).values()) out_degree = np.array((nx.out_degree_centrality(G)).values()) closeness = np.array((nx.closeness_centrality(G)).values()) between = np.array((nx.betweenness_centrality(G)).values()) pagerank = np.array((nx.pagerank(G)).values()) to_stack = [in_degree, out_degree, closeness, between, pagerank] assert (len(to_stack) == num_node_feat) node_feat[t] = Variable( torch.from_numpy(np.stack(to_stack, 1)).float()) return node_feat
def nodes_cascaded(): florida_nodes = [] cascade = {} for jj, val in nodes_data.iteritems(): if float(val[1]) <= -80.0 and float(val[1]) >= -87.5 and float( val[2]) >= 24.5 and float(val[2]) <= 31.0: florida_nodes.append(jj) nodes_to_remove = nx.out_degree_centrality(G) florida_highoutd = {} for key, value in nodes_to_remove.iteritems(): if key in set(florida_nodes): florida_highoutd[key] = value r = 0 ne = [] csd = [] for ky, vl in florida_highoutd.iteritems(): print ky ne.append(florida([ky])) if r == 0: print "node removed %s cascade %f nodes effected %s", (ky, csd, ne[r]) cascade[ky] = ne[r] elif r > 0: print "node removed %s cascade %f nodes effected %s", (ky, csd, ne[r] - ne[r - 1]) cascade[ky] = ne[r] - ne[r - 1] r += 1 m = 0 for k, v in sorted(cascade.iteritems(), key=lambda (k, v): (v, k), reverse=True): m += 1 if m <= 20: print "node", k, "cascade", v
def add_network_statistics(nodes, links): if len(nodes)==0: return nodes graph = get_network(nodes, links) degree = nx.degree(graph) if max(dict(degree).values()) > 0: hubs, authorities = get_hits(graph) statistics = { 'degree': degree, 'in_degree': graph.in_degree(), 'out_degree': graph.out_degree(), 'degree_centrality': nx.degree_centrality(graph), 'in_degree_centrality': nx.in_degree_centrality(graph), 'out_degree_centrality': nx.out_degree_centrality(graph), 'betweenness_centrality': nx.betweenness_centrality(graph), 'closeness_centrality': nx.closeness_centrality(graph), 'pagerank': get_pagerank(graph), 'hubs': hubs, 'authorities': authorities } else: statistics = {} # for relative in-degree we sort on date derive_date = lambda k: k['date'] if k['date']!='' else '{}-01-01'.format(k['year']) nodes.sort(key=derive_date, reverse=True) for i, node in enumerate(nodes): nodeid = node['id'] for var in statistics.keys(): node[var] = statistics[var][nodeid] if 'in_degree' in node: node['rel_in_degree'] = node['in_degree'] / float(max(i, 1)) get_community(graph, nodes) return nodes
def main(): g_directed, g_undirected, all_dfs, labels = __read_csv_files() deg_centrality = nx.degree_centrality(g_directed) deg_in_centrality = nx.in_degree_centrality(g_directed) deg_out_centrality = nx.out_degree_centrality(g_directed) dict_measures = { 'degree centrality': deg_centrality, 'deg_in_centrality': deg_in_centrality, 'deg_out_centrality': deg_out_centrality } count = 0 for hist_title, values in dict_measures.items(): count += 1 subplot(2, 3, count) values_df = pd.DataFrame(values.items(), columns=['id', 'Score']) hist_plot = values_df['Score'].hist(bins=50) hist_plot.set_title(hist_title) hist_plot.set_xlabel('Score') hist_plot.set_ylabel("Number of nodes") plt.margins(x=0) plt.yscale('log', basey=10) # plt.yscale('log', basey=10) plt.show() measures_for_centrality(g_undirected) x = 1
def network_analysis(request): profile_name = request.GET.get('profile_name', '') if profile_name == '': profile_name = None users = MyUser.objects.all() print(users) pair_list = [] avatar = None for user in users: followings = user.followings.all() for person in followings: pair = [user.profile_name, person.profile_name] print(pair) pair_list.append(pair) g = nx.DiGraph() g.add_edges_from(pair_list) for user in users: if user.profile_name not in g.nodes(): g.add_node(user.profile_name) i_d = nx.in_degree_centrality(g) o_d = nx.out_degree_centrality(g) b = nx.betweenness_centrality(g) c = nx.closeness_centrality(g) e = nx.eigenvector_centrality(g, max_iter=1000) cc = nx.clustering(g) draw_graph(g.copy(), nx.spring_layout(g, k=0.55), i_d, 'In Degree Centrality', profile_name) draw_graph(g.copy(), nx.spring_layout(g, k=0.55), o_d, 'Out Degree Centrality', profile_name) draw_graph(g.copy(), nx.spring_layout(g, k=0.55), b, 'Betweenness Centrality', profile_name) draw_graph(g.copy(), nx.spring_layout(g, k=0.55), c, 'Closeness Centrality', profile_name) draw_graph(g.copy(), nx.spring_layout(g, k=0.55), e, 'Eigenvector Centrality', profile_name) draw_network(g.copy(), nx.spring_layout(g, k=0.55)) if profile_name is None: result = [i_d, o_d, b, c, e, cc] else: result = [ i_d.get(profile_name, ''), o_d.get(profile_name, ''), b.get(profile_name, ''), c.get(profile_name, ''), e.get(profile_name, ''), cc.get(profile_name, '') ] return JsonResponse(result, safe=False)
def compute_centrality(graph): centrality_values = nx.hits(graph) for node_id, centrality in centrality_values[0].items(): graph.nodes[node_id]['hub'] = centrality for node_id, centrality in centrality_values[1].items(): graph.nodes[node_id]['authority'] = centrality centrality_values = nx.pagerank(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['pagerank'] = centrality centrality_values = nx.in_degree_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['in_degree'] = centrality centrality_values = nx.out_degree_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['out_degree'] = centrality centrality_values = nx.closeness_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['closeness'] = centrality centrality_values = nx.betweenness_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['betweenness'] = centrality centrality_values = nx.pagerank(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['pagerank'] = centrality
def get_centrality_measures(node_list_df, arc_list_df): """ A function that generates a range of centrality measures for a generated DiGraph from the Pandas dataframe :param arc_list_df: A data-frame containing a sources, targets, and weights of relationship. """ G = dataframe_to_networkx(arc_list_df) centrality_measures = {} # Degree-based centrality measure centrality_measures.update({'degree': nx.degree_centrality(G)}) centrality_measures.update({'in_degree': nx.in_degree_centrality(G)}) centrality_measures.update({'out_degree': nx.out_degree_centrality(G)}) # Flow-based centrality measure centrality_measures.update({'closeness': nx.closeness_centrality(G)}) centrality_measures.update( {'betweenness': nx.betweenness_centrality(G)}) centrality_measures.update( {'contagion': contagion_centrality(node_list_df, arc_list_df)}) return centrality_measures
def extract_g_metrics(G, name): if name == 'degree_centrality': metric = nx.degree_centrality(G).items() if name == 'in_degree_centrality': metric = nx.in_degree_centrality(G).items() if name == 'out_degree_centrality': metric = nx.out_degree_centrality(G).items() if name == 'eigenvector_centrality': metric = nx.eigenvector_centrality(G).items() if name == 'closeness_centrality': metric = nx.closeness_centrality(G).items() if name == 'betweenness_centrality': metric = nx.betweenness_centrality(G).items() if name == 'harmonic_centrality': metric = nx.harmonic_centrality(G).items() if name == 'trophic_levels': metric = nx.trophic_levels(G).items() fname = f'{graph_name}_{name}' get_g_metrics(metric, name=fname)[['content_id', fname]].to_feather(f'../save/{fname}.feather') return
def get_centrality(graph, method, topk=None): if method == "edge_betweeness_centrality": output = nx.edge_betweenness_centrality(graph) elif method == "betweenness_centrality": output = nx.betweenness_centrality(graph) elif method == "closeness_centrality": output = nx.closeness_centrality(graph) elif method == "eigenvector_centrality": output = nx.eigenvector_centrality(graph) elif method == "in_degree_centrality": output = nx.in_degree_centrality(graph) elif method == "out_degree_centrality": output = nx.out_degree_centrality(graph) elif method == "pagerank": output = pagerank(graph) else: return print(len(output)) output = np.array(create_array(output)) mean = round(np.mean(output), 4) if topk: arg_sorted_results = np.argsort(output)[::-1][:topk] else: arg_sorted_results = np.argsort(output)[::-1] return output, arg_sorted_results, mean
def study_k_effect_facebook(): graph_file = '/home/pankaj/Sampling/data/input/social_graphs/facebook/facebook_combined.txt' N = 4039 G = read_facebook_graph(graph_file, N) influ_obj = Influence(G, 0.3, 200) degree_dict = nx.out_degree_centrality(G) val = degree_dict.values() print [N*x for x in np.sort(val)[-10:]] for k in range(10, 200, 10): a = np.argsort(val)[-k:] sample = torch.zeros(N) sample[a] = 1 print k, influ_obj(sample.numpy()).item()
def centrality_histogram(g, c): # Creates the centrality histogram specified if c == 'degree': degree_sequence = sorted( [val for key, val in nx.degree_centrality(g).items()]) elif c == 'in_degree': degree_sequence = sorted( [val for key, val in nx.in_degree_centrality(g).items()]) elif c == 'out_degree': degree_sequence = sorted( [val for key, val in nx.out_degree_centrality(g).items()]) elif c == 'closeness': degree_sequence = sorted( [val for key, val in nx.closeness_centrality(g).items()]) elif c == 'betweenness': degree_sequence = sorted( [val for key, val in nx.betweenness_centrality(g).items()]) elif c == 'eigenvector': degree_sequence = sorted( [val for key, val in nx.eigenvector_centrality(g).items()]) elif c == 'katz': degree_sequence = sorted( [val for key, val in nx.katz_centrality(g).items()]) degree_count = col.Counter(degree_sequence) deg, cnt = zip(*degree_count.items()) plt.bar(deg, cnt, width=0.01, color='b') plt.title('Degree Histogram') plt.ylabel('Count') plt.xlabel('Degree') plt.show()
def caluclate_network_attributes(Graph): all_nodes = Graph.nodes() graph_df = pd.Series(all_nodes).to_frame(name="Node") degree = nx.degree(Graph) degree_cen = nx.degree_centrality(Graph) in_degree = nx.in_degree_centrality(Graph) out_degree = nx.out_degree_centrality(Graph) closeness = nx.closeness_centrality(Graph) between = nx.betweenness_centrality(Graph) try: eigen = nx.eigenvector_centrality_numpy(Graph) graph_df['Eigenvector'] = pd.Series( [eigen[node] for node in all_nodes]) except eigenerror.ArpackNoConvergence: #if no eigenvector can be caluclated, set all to zero graph_df['Eigenvector'] = pd.Series([0 for n in all_nodes]) graph_df['Degree'] = pd.Series([degree[node] for node in all_nodes]) graph_df['DegreeCentrality'] = pd.Series( [degree_cen[node] for node in all_nodes]) graph_df['InDegree'] = pd.Series([in_degree[node] for node in all_nodes]) graph_df['OutDegree'] = pd.Series([out_degree[node] for node in all_nodes]) graph_df['Closeness'] = pd.Series([closeness[node] for node in all_nodes]) graph_df['Betweeness'] = pd.Series([between[node] for node in all_nodes]) return (graph_df)
def calculate_network_measures(G): in_degree = nx.in_degree_centrality(G) out_degree = nx.out_degree_centrality(G) betweenness = nx.betweenness_centrality(G, weight=WEIGHT) closeness = nx.closeness_centrality(G, distance=WEIGHT) eigenvector = nx.eigenvector_centrality(G.reverse(), weight=WEIGHT) clustering = nx.clustering(G.to_undirected(), weight=WEIGHT) pagerank = nx.pagerank(G, weight=WEIGHT) hubs, authorities = nx.hits_numpy(G) max_clique = node_clique_number(G.to_undirected()) node_cliques = cliques_containing_node(G.to_undirected()) node_cliques_count = {} for node, cliques in node_cliques.items(): node_cliques_count[node] = len(cliques) network_df = pd.DataFrame(list(G.nodes), columns=[ID]); network_df[IN_DEGREE] = network_df[ID].map(in_degree) network_df[OUT_DEGREE] = network_df[ID].map(out_degree) network_df[BETWEENNESS] = network_df[ID].map(betweenness) network_df[CLOSENESS] = network_df[ID].map(closeness) network_df[EIGENVECTOR] = network_df[ID].map(eigenvector) network_df[CLUSTERING] = network_df[ID].map(clustering) network_df[PAGERANK] = network_df[ID].map(pagerank) network_df[HUBS] = network_df[ID].map(hubs) network_df[AUTHORITIES] = network_df[ID].map(authorities) network_df[MAX_CLIQUE] = network_df[ID].map(max_clique) network_df[CLIQUES_COUNT] = network_df[ID].map(node_cliques_count) return network_df
def show_net(g_matrix): G = nx.DiGraph() count = 0 for i in range(len(g_matrix)): for j in range(len(g_matrix)): if g_matrix[i][j]>0.1: number = g_matrix[i][j] # G.add_edge([i,j,number]) count+=1 G.add_weighted_edges_from([(j, i, number)]) nx.draw(G, pos=nx.spring_layout(G),node_color = 'b', edge_color = 'r',alpha = 0.5, with_labels = True,font_size = 15, node_size = 50,width = 0.5) in_degree = nx.in_degree_centrality(G) out_degree = nx.out_degree_centrality(G) i=0 arr = np.zeros(shape=(36,3)) while i<36: arr[i][0]=i if in_degree.__contains__(i): arr[i][1] = in_degree.get(i) else: arr[i][1] = 0.0 if out_degree.__contains__(i): arr[i][2] = out_degree.get(i) else: arr[i][2] = 0.0 i = i+1 data = pd.DataFrame(data=arr,columns=['Motif ID','In-degree','Out-degree']) print(data) figname = "D:/For-F-drive/school/comp/research/Desktop/Email/Lasso_file/figure/trans_matrix_alpha-11_reduced_arrow.png" plt.savefig(figname, dpi=100, bbox_inches='tight') plt.show() return count
def centrality_algorithms(graph): # Centrality functions return a dictionary of values # Calculate the maximum and print node name with value # Value stays the same for closness centrality, but the node itself changes centrality_dict = nx.degree_centrality(graph) print('Degree Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.in_degree_centrality(graph) print('In Degree Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.out_degree_centrality(graph) print('Out Degree Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.eigenvector_centrality_numpy(graph) print('Eigenvector Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.katz_centrality(graph) print('Katz Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.closeness_centrality(graph) print('Closeness Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values())) centrality_dict = nx.betweenness_centrality(graph) print('Betweenness Centrality: ', max(centrality_dict, key=centrality_dict.get), max(centrality_dict.values()))
def plot_centralities(G): n = G.number_of_nodes() plt.figure('Graph %s: %s - %s' % (str(i), tmsp2str(Tmin + i * dt), tmsp2str(Tmin + (i + 1) * dt))) plt.suptitle('Centrality Measurements (Graph size = ' + str(n) + ')') in_degrees = [(n - 1) * d for d in nx.in_degree_centrality(G).values()] out_degrees = [(n - 1) * d for d in nx.out_degree_centrality(G).values()] degrees = [(n - 1) * d for d in nx.degree_centrality(G).values()] hist_plot('Degrees', [in_degrees, out_degrees, degrees], (3, 1, 1), ['r', 'g', 'b']) plt.legend(['Degree', 'In-Degree', 'Out-Degree']) G = nx.Graph(G) #directed -> undirected hist_plot('Closeness', nx.closeness_centrality(G).values(), (3, 2, 3), 'xkcd:orangered') hist_plot('Betweenness', nx.betweenness_centrality(G).values(), (3, 2, 4), 'xkcd:crimson') hist_plot('Eigenvector', nx.eigenvector_centrality_numpy(G).values(), (3, 2, 5), 'xkcd:teal') hist_plot('Katz', nx.katz_centrality_numpy(G).values(), (3, 2, 6), 'xkcd:brown') plt.tight_layout(rect=(0, 0, 1, 0.95)) if args.PDF: pp.savefig() plt.close() else: plt.show()
def centrality(DG): in_degree_centrality = nx.in_degree_centrality(DG) out_degree_centrality = nx.out_degree_centrality(DG) with open('/home/sun/PycharmProjects/Network/in_degree_centrality.csv', 'w') as f: for k, v in in_degree_centrality.items(): f.write(str(k) + ': ' + str(v) + '\n') f.close() with open('/home/sun/PycharmProjects/Network/out_degree_centrality.csv', 'w') as f: for k, v in out_degree_centrality.items(): f.write(str(k) + ': ' + str(v) + '\n') f.close() # def main(): # data = '/home/sun/PycharmProjects/Network/C-elegans-frontal.txt' # # data = 'www.adj' # DG = create_network(data) # # # draw_network(DG) # # clustering_coefficient(DG) # # centrality(DG) # degree_distribution(DG) # # if __name__ == '__main__': # main() # # # DG = nx.DiGraph() # # DG.add_edge(1,2) # # print(DG.edges()) # # # pos = nx.nx_agraph.graphviz_layout(DG) # # nx.draw_networkx(DG, pos = nx.spring_layout(DG)) # # plt.show() # # plt.ishold() # # plt.draw(DG)
def sna_calculations(g, play_file): """ :param g: a NetworkX graph object :type g: object :param play_file: the location of a play in .txt format :type play_file: string :return: returns a dictionary containing various network related figures :rtype: dict :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv """ file_name = os.path.splitext(os.path.basename(play_file))[0] sna_calculations_list = dict() sna_calculations_list['playType'] = file_name[0] sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avDegreeCentralityStd'] = numpy.std( numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avInDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float)) sna_calculations_list['avOutDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float)) try: sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g) except: sna_calculations_list['avShortestPathLength'] = 'not connected' sna_calculations_list['density'] = nx.density(g) sna_calculations_list['avEigenvectorCentrality'] = numpy.mean( numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float)) sna_calculations_list['avBetweennessCentrality'] = numpy.mean( numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float)) sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g) sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g) sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g) # sna_calculations.txt file sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL, delimiter=';') for key, value in sna_calculations_list.items(): sna_calc_file.writerow([key, value]) # all_characters.csv file if not os.path.isfile('results/allCharacters.csv'): with open('results/allCharacters.csv', 'w') as f: f.write( 'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n') all_characters = open('results/allCharacters.csv', 'a') character_speech_amount = speech_amount(play_file) for character in sna_calculations_list['DegreeCentrality']: all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str( sna_calculations_list['DegreeCentrality'][character]) + ';' + str( sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str( sna_calculations_list['BetweennessCentrality'][character]) + ';' + str( character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n') all_characters.close() return sna_calculations
def getclosenesscentrality(self): closeness_centrality = {'in': {}, 'out': {}} if self.is_directed else {} if self.is_directed: closeness_centrality['in'] = nx.in_degree_centrality(self.G) closeness_centrality['out'] = nx.out_degree_centrality(self.G) else: closeness_centrality = nx.degree_centrality(self.G) return closeness_centrality
def calculate_out_degree_centrality(self): """ Calculates Out - degree centrality for every node of graph. For directed graphs only. """ values = nx.out_degree_centrality(self.graph) nx.set_node_attributes(self.graph, 'out_degree', values)
def outdegree_centrality(graph_input, directed = False): if type(graph_input) == dict: graph = convert_graph_dict_to_nx_graph(graph_input, directed) else: graph = convert_graph_df_to_nx_graph(graph_input, directed) if len(graph) == 1: return defaultdict(lambda: 0, {}) return defaultdict(lambda: 0, nx.out_degree_centrality(graph))
def calculateDegreeCentrality(userConnectedGraph, counter): """ calculates the degree Centrality for given graph and writes the output to file parameters: userConnectedGraph - graph counter - int value for maintaining unique file names """ degreeCentrality = nx.out_degree_centrality(userConnectedGraph) writeCentralityOutput(degreeCentrality, path + 'degreeCentrality' + str(counter)) plotgraph(conn, path, 'degreeCentrality' + str(counter))
def sorting(E): in_degree_central = nx.in_degree_centrality(E) sorted(in_degree_central.items(), key=lambda x: x[1], reverse=True)[:10] out_degree_central = nx.out_degree_centrality(E) sorted(out_degree_central.items(), key=lambda x: x[1], reverse=True)[:10] print(in_degree_central) print(out_degree_central)
def get_leaf_ids(self): outdegree = nx.out_degree_centrality(self.to_digraph()) ids = filter(lambda id: not outdegree.get(id), outdegree) # Temporary hack to remove content path segments. scrub = partial(re.sub, r'/text\d+', '') ids = map(scrub, ids) return ids
def clustering_analys(DF_adj, re_type): #测试参数的函数。re_type是返回值的类型 labels = list(DF_adj.index) #print(DF_adj_1,DF_adj) #Network graph G = nx.Graph() G_i = nx.DiGraph() G.add_nodes_from(labels) G_i.add_nodes_from(labels) #Connect nodes for i in range(DF_adj.shape[0]): col_label = DF_adj.columns[i] for j in range(DF_adj.shape[1]): row_label = DF_adj.index[j] node = DF_adj.iloc[i,j] if node != 0: #print(node,DF_adj[labels[i]][labels[j]]) #print(node) G.add_edge(col_label,row_label,weight = node) G_i.add_edge(col_label,row_label,weight = node) if(re_type == 1): return dict_avg(nx.clustering(G))#取平均,队伍或者队员都可以 elif(re_type == 2): L = nx.normalized_laplacian_matrix(G) e = np.linalg.eigvals(L.A) #print("Largest eigenvalue:", max(e))#衡量什么同行网络 return max(e) elif(re_type == 3): return nx.algebraic_connectivity(G) elif(re_type == 4): return(nx.reciprocity(G_i)) elif(re_type == 5): return(nx.transitivity(G_i)) elif(re_type == 6): return(dict_max(nx.in_degree_centrality(G_i))) elif(re_type == 7): return(dict_max(nx.out_degree_centrality(G_i))) elif(re_type == 8): try: return(dict_avg(nx.pagerank(G, alpha=0.9))) except: return(0.01) elif(re_type == 9): try: return(dict_avg(nx.eigenvector_centrality(G))) except: return(0.25) elif(re_type == 10): return(dict_avg(nx.average_neighbor_degree(G_i))) print("-----------------") print(nx.closeness_centrality(G))#衡量星际球员 print("-----------------") print(nx.pagerank(G, alpha=0.9))#衡量球员 print("-----------------") print(nx.eigenvector_centrality(G))#衡量球员 print("-----------------") print()#宏观的连通性 print("-----------------")
def out_degree_centrality(self): """ Compute the out-degree centrality for nodes. See Also -------- https://networkx.github.io/documentation/development/reference/algorithms.centrality.html """ return Counter(nx.out_degree_centrality(self.directed_graph))
def pagerank(matrixJson, outcsv): deps = json.loads(open(matrixJson,"r").read()) nodes = deps['nodes'] links = deps['links'] packages = deps['packages'] node_package_map = {} # Load DSM into a graph G = nx.DiGraph() # Add nodes for node in nodes: n_name = node["name"] # node_package_map[n_name] = packages[node["group"]] G.add_node(n_name) # Add edges for link in links: row = link['source'] column = link['target'] value = link['value'] r_n_name = nodes[row]["name"] c_n_name = nodes[column]["name"] G.add_edge(r_n_name, c_n_name) metrics = {} # NOTE: DEFAULT ALPHA FROM LITERATURE IS 0.85 metrics["page_rank"] = nx.pagerank(G, alpha = 0.85) # Compute the in-degree centrality for nodes. # Compute the out-degree centrality for nodes. # closeness_centrality(G[, v, distance, ...]) # Compute closeness centrality for nodes. # betweenness_centrality(G[, k, normalized, ...]) # Compute the shortest-path betweenness centrality for nodes. # load_centrality(G[, v, cutoff, normalized, ...]) # Compute load centrality for nodes. if (ADDITIONAL_METRICS): metrics["degree_centrality"] = nx.degree_centrality(G) metrics["in_degree_centrality"] = nx.in_degree_centrality(G) metrics["out_degree_centrality"] = nx.out_degree_centrality(G) metrics["closeness_centrality"] = nx.closeness_centrality(G) metrics["betweenness_centrality"] = nx.betweenness_centrality(G) metrics["load_centrality"] = nx.load_centrality(G) with open(outcsv, 'w', newline = '') as outfile: logFile = csv.writer(outfile, delimiter = ',', quotechar = '|') for metricName in metrics: metric = metrics[metricName] for node_name in sorted(metric, key=metric.get, reverse=True): logFile.writerow([ node_name, str(metric[node_name]), metricName ]) print("Pagerank written to: ", outcsv)
def do_center(self,args): "Show the top 5 most central nodes" d = nx.out_degree_centrality(G) cent_items=[(b,a) for (a,b) in d.iteritems()] cent_items.sort() cent_items.reverse() print "[*] Most Central Nodes" for i in range(0,5): if cent_items[i]: print cent_items[i]
def do_center(self, args): "Show the top 5 most central nodes" d = nx.out_degree_centrality(G) cent_items = [(b, a) for (a, b) in d.iteritems()] cent_items.sort() cent_items.reverse() print "[*] Most Central Nodes" for i in range(0, 5): if cent_items[i]: print cent_items[i]
def process_data(denom=100000, round=0): f = csv.reader(open("../applab_new_6.csv", 'rb'), delimiter=',') db = nx.DiGraph() full_users = set() i = 0 uniquect = 0 for line in f: if i % 100000 == 0 : print "processed", i, "lines" if i == 1000: break sender, receiver, date, time, duration, cost, location, region = map(lambda x: x.strip(), line) if sender not in full_users: uniquect += 1 full_users.add(sender) if uniquect <= 2: #% denom - round == 0: db.add_node(sender) if db.has_node(receiver) == False: db.add_node(receiver) else: if db.has_node(receiver) == False: db.add_node(receiver) if db.has_edge(sender, receiver): db[sender][receiver]['weight'] += int(duration) else: db.add_edge(sender, receiver, weight=int(duration)) i+=1 #pickle.dump(db, open("users_networkx.p" % str(round), "wb")) #print "degree assortativity coeff:", nx.degree_assortativity_coefficient(db) #print "average degree connectivity:", nx.average_degree_connectivity(db) # print "k nearest neighbors:", nx.k_nearest_neighbors(db) print "calculating deg cent" deg_cent = nx.degree_centrality(db) #sorted(nx.degree_centrality(db).items(), key=lambda x: x[1]) print "calculating in deg cent" in_deg_cent = nx.in_degree_centrality(db) #sorted(nx.in_degree_centrality(db).items(), key=lambda x: x[1]) print "calculating out deg cent" out_deg_cent = nx.out_degree_centrality(db) #sorted(nx.out_degree_centrality(db).items(), key=lambda x: x[1]) print "closeness cent" closeness_cent = nx.closeness_centrality(db) #sorted(nx.closeness_centrality(db).items(), key=lambda x: x[1]) #print "betweenness cent" #btwn_cent = nx.betweenness_centrality(db) #sorted(nx.betweenness_centrality(db).items(), key=lambda x: x[1]) print "done" w = open("../output/user_network_stats.csv", 'w') w.write("uid,deg_cent,in_deg_cent,out_deg_cent,closeness_cent,btwn_cent\n") for user in deg_cent.keys(): try: w.write("%s,%s,%s,%s,%s\n" % (user, deg_cent[user], in_deg_cent[user], out_deg_cent[user], closeness_cent[user])) except: pass w.close() print "drawing..." nx.draw(db) plt.savefig("path.pdf") print "done!" print "edge betweenness centrality:", nx.edge_betweenness_centrality(db) print "communicability:", nx.communicability(db) print "communicability centrality:", nx.communicability_centrality(db)
def graph_degree(g): print "Computing degree centrality..." ac = nx.degree_centrality(g) bc = nx.in_degree_centrality(g) ec = nx.out_degree_centrality(g) ac_hash, bc_hash, ec_hash = {}, {}, {} for n, b in ac.iteritems(): ac_hash.setdefault(tuple(g.node[n]['languages']), []).append(b) for n, b in bc.iteritems(): bc_hash.setdefault(tuple(g.node[n]['languages']), []).append(b) for n, b in ec.iteritems(): ec_hash.setdefault(tuple(g.node[n]['languages']), []).append(b) return (ac_hash, bc_hash, ec_hash)
def compute_graph_parameters(graph,i=0): graph_parameters = {} if nx.is_directed(graph): graph_parameters['in_degree_centrality'] = nx.in_degree_centrality(graph) graph_parameters['out_degree_centrality'] = nx.out_degree_centrality(graph) else: graph_parameters['degree_centrality'] = nx.degree_centrality(graph) graph_parameters['closeness_centrality'] = nx.closeness_centrality(graph) graph_parameters['betweenness_centrality'] = nx.betweenness_centrality(graph) if i==0: graph_parameters['eigenvector_centrality'] = nx.eigenvector_centrality(graph) graph_parameters['pagerank_centrality'] = nx.pagerank(graph,alpha=0.85) graph_parameters['clustering'] = nx.clustering(graph.to_undirected()) return graph_parameters
def output_outdegree_centrality_info (graph, path, nodes_dict): """Output Out-degree centrality information about the graph. graph : (networkx.Graph) path: (String) contains the path to the output file nodes_dict: (dictionary) maps node id to node name """ outdeg_dict = nx.out_degree_centrality(graph) outdeg_dict = dict((nodes_dict[key], outdeg_dict[key]) for key in nodes_dict if key in outdeg_dict) outdeg_list = dict_to_sorted_list(outdeg_dict) with open(path, 'w') as out: out.write('***Out-Degree Centrality***\n') out.write('Node\tLayer\tOut-degree centrality\n') for element in outdeg_list: out.write('%d\t%d\t%f\n' % (element[0][0], element[0][1], element[1]))
def describe_graph(G): """Graph description""" # GRAPH DESCRIPTION graph_desc = pd.Series() # n. nodes graph_desc["number_of_nodes"] = G.number_of_nodes() # n. edges graph_desc["number_of_edges"] = G.number_of_edges() # n. of selfloops graph_desc["number_of_selfloops"] = len(G.selfloop_edges()) # density graph_desc["average_shortest_path_length"] = nx.average_shortest_path_length(G) # connectivity # graph_desc.append(pd.Series(nx.degree_assortativity_coefficient(G), name="degree_assortativity_coefficient")) graph_desc["degree_pearson_correlation_coefficient"] = nx.degree_pearson_correlation_coefficient(G) # NODE DESCRIPTION node_desc = list() # n. of neighbours node_desc.append(pd.Series(G.degree(), name="degree")) node_desc.append(pd.Series(nx.average_neighbor_degree(G), name="average_neighbor_degree")) # n. of outgoing outgoing = pd.Series(G.in_degree(), name="in_degree") node_desc.append(outgoing) # n. of incoming incoming = pd.Series(G.out_degree(), name="out_degree") node_desc.append(incoming) # fold change out/in ratio = np.log2(outgoing + 1) - np.log2(incoming + 1) node_desc.append(pd.Series(ratio, name="out_in_degree_fold_change")) # centrality # degree based node_desc.append(pd.Series(nx.degree_centrality(G), name="degree_centrality")) node_desc.append(pd.Series(nx.in_degree_centrality(G), name="in_degree_centrality")) node_desc.append(pd.Series(nx.out_degree_centrality(G), name="out_degree_centrality")) # closest-path based # node_desc.append(pd.Series(nx.closeness_centrality(G), name="closeness_centrality")) # node_desc.append(pd.Series(nx.betweenness_centrality(G), name="betweenness_centrality")) # # eigenvector-based # node_desc.append(pd.Series(nx.eigenvector_centrality(G), name="eigenvector_centrality")) # node_desc.append(pd.Series(nx.katz_centrality_numpy(G), name="katz_centrality")) # # load-based # node_desc.append(pd.Series(nx.load_centrality(G), name="load_centrality")) return (graph_desc, pd.DataFrame(node_desc).T)
def analyzer_centrality(request, project): projects = Project.objects.all() obj = get_project(request, project) G = nx.DiGraph() nodes = obj.nodeset_set.all() links = obj.network_set.all() for node in nodes: G.add_node(node.idnumber, name=node.name) for link in links: G.add_edge(link.sourceID, link.targetID, weight=link.weight) indeg_centrality = nx.in_degree_centrality(G) outdeg_centrality = nx.out_degree_centrality(G) deg_centrality = nx.degree_centrality(G) closeness_centrality = nx.closeness_centrality(G) betweenness_centrality = nx.betweenness_centrality(G) for key, value in indeg_centrality.items(): p = obj.nodeset_set.filter(idnumber=key) p.update(indegree_centrality=round(value, 3)) for key, value in outdeg_centrality.items(): p = obj.nodeset_set.filter(idnumber=key) p.update(outdegree_centrality=round(value, 3)) for key, value in deg_centrality.items(): p = obj.nodeset_set.filter(idnumber=key) p.update(degree_centrality=round(value, 3)) for key, value in closeness_centrality.items(): p = obj.nodeset_set.filter(idnumber=key) p.update(closeness_centrality=round(value, 3)) for key, value in betweenness_centrality.items(): p = obj.nodeset_set.filter(idnumber=key) p.update(betweenness_centrality=round(value, 3)) nodeset = obj.nodeset_set.all() context = {'nodeset': nodeset, 'object': obj, 'projects': projects, } return render(request, 'centrality.html', context)
def getHugeStats(g): if nx.is_directed(g) == True: P1 = pd.DataFrame({'load_centrality': nx.load_centrality(g, weight='weight'), 'betweenness_centrality': nx.betweenness_centrality(g, weight='weight'), 'pagerank': pd.Series(nx.pagerank(g, alpha=0.85, personalization=None, max_iter=100, tol=1e-08, nstart=None, weight='weight')), 'eigenvector_centrality': nx.eigenvector_centrality_numpy(g), 'degree_centrality': pd.Series(nx.degree_centrality(g)), 'in_degree_centrality': pd.Series(nx.in_degree_centrality(g)), 'out_degree_centrality': pd.Series(nx.out_degree_centrality(g))}) else: P1 = pd.Panel({'spl': pd.DataFrame(nx.shortest_path_length(g)), 'apdp': pd.DataFrame(nx.all_pairs_dijkstra_path(g)), 'apdl': pd.DataFrame(nx.all_pairs_dijkstra_path_length(g)), 'c_exp': pd.DataFrame(nx.communicability_exp(g))}) return P1
def centrality(edgeList, ctype): """ """ print "centrality start" file = open(edgeList, "r") graph = nx.read_edgelist(file, comments="#", create_using=nx.DiGraph(), nodetype=int) file.close() N = nx.number_of_nodes(graph) if ctype == "out_degree": centrality = nx.out_degree_centrality(graph) elif ctype == "betweenness": centrality=nx.betweenness_centrality(graph, k=int(N/100)) elif ctype == "height": return 1 else: centrality = nx.closeness_centrality(graph) return centrality
def analyze_graphs(graphs, days): undirected_graphs = list(map(lambda G: G.to_undirected(), graphs)) graph_days = dict(zip(undirected_graphs, days)) connected_graphs = list(filter(lambda G: nx.is_connected(G), undirected_graphs)) connected_days = dict(zip(connected_graphs, list(map( lambda G: graph_days[G], connected_graphs)))) metrics = { #"average_shortest_path_lengths": [lambda G: nx.average_shortest_path_length(G), connected_graphs, connected_days], "clustering": [lambda G: nx.average_clustering(G), undirected_graphs, graph_days], "average_neighbor_degree": [lambda G: nx.average_neighbor_degree(G), graphs, graph_days], "min_weighted_vertex_cover": [lambda G: len(min_weighted_vertex_cover(G)), undirected_graphs, graph_days], #"eccentricity": [lambda G: np.mean(nx.eccentricity(G).values()), connected_graphs, connected_days], #"diameter": [lambda G: nx.diameter(G), connected_graphs, connected_days], #"periphery": [lambda G: len(nx.periphery(G)), connected_graphs, connected_days], "degree_centralities": [lambda G: np.mean(nx.degree_centrality(G).values()), graphs, graph_days], "in_degree_centralities": [lambda G: np.mean(nx.in_degree_centrality(G).values()), graphs, graph_days], "out_degree_centralities": [lambda G: np.mean(nx.out_degree_centrality(G).values()), graphs, graph_days], "closeness_centralities": [lambda G: np.mean(nx.closeness_centrality(G).values()), graphs, graph_days], "betweenness_centralities": [lambda G: np.mean(nx.betweenness_centrality(G).values()), graphs, graph_days] } for metric in metrics: print("Analyzing {}...".format(metric)) function = metrics[metric][0] which_graphs = metrics[metric][1] which_days = metrics[metric][2].values() yArray = list(map(function, which_graphs)) print(which_days) print(yArray) plt.plot(which_days, yArray) plt.xlabel("Day") plt.ylabel(metric) plt.title("{} Over Time".format(metric)) plt.savefig("{}_VS_Time.png".format(metric)) plt.close()
def get_characters_by_importance(play_lines, speaking_characters, graph, reciprocal_graph, metrics_weight=[0.625, 0.125, 0.125, 0.125]): reverse_graph = graph.reverse(copy=True) # METRICS lines_by_character = get_lines_by_character(play_lines, speaking_characters) out_degree = nx.out_degree_centrality(graph) page_rank = nx.pagerank_numpy(reverse_graph) betweenness = nx.betweenness_centrality(reciprocal_graph) metrics = [lines_by_character, out_degree, page_rank, betweenness] for i, x in enumerate(metrics): normalize_linear(x) scale(x, metrics_weight[i]) # print(speaking_characters) # print(metrics) character_value = { character : sum( metric.get(character, 0) for metric in metrics ) for character in speaking_characters } sorted_characters = dict_sorted(character_value) return sorted_characters
def build_graph(word_word, word_sort, vectors, f, k): G = nx.DiGraph() # 创建空图 print len(word_sort) # print word_sort for i in range(0, len(word_sort)): G.add_node(i) # 创造节点 for i in range(0, word_word.shape[0]): for j in range(0, word_word.shape[1]): if word_word[i, j] >= 1: G.add_edge(i, j) # 加一条有向边 # pos =nx.circular_layout(G) # plot.title('the orginal graph with pos') # nx.draw(G,pos,with_label=True,node_size=300) # plot.show() # print nx.degree(G) # clusters=nx.clustering(G.to_undirected()) # print clusters # sort_cluster=sorted(clusters.iteritems(),key= lambda jj:jj[1],reverse=True) # print sort_cluster # print sort_cluster[len(clusters)/2+1] # print nx.betweenness_centrality(G) features = dict() # print vectors.shape[1] for i in range(0, vectors.shape[1]): features[i] = 0 out_cen = nx.out_degree_centrality(G) print out_cen for line in out_cen: print vectors[k].indices[line] features[vectors[k].indices[line]] = out_cen[line] print features for i in range(0, len(features)): f.write(str(features[i]) + "\t") f.write("\n")
def calculate_centrality_measures(G, create_using, directed): measures = [] centrality_dict = {} #check for directed or undirected if (directed): centrality_dict['in_degree'] = nx.in_degree_centrality(G) centrality_dict['out_degree'] = nx.out_degree_centrality(G) else: centrality_dict['degree'] = nx.degree_centrality(G) #print "Completed degree" #calculate harmonic if graph is disconnected if is_connected(G, directed): centrality_dict['closeness'] = nx.closeness_centrality(G) else: centrality_dict['harmonic'] = nx.harmonic_centrality(G) #print "Completed closeness_centrality" centrality_dict['betweenness'] = nx.betweenness_centrality(G) #print "Completed betweenness" centrality_dict['eigen'] = nx.eigenvector_centrality(G) centrality_dict['pagerank'] = nx.pagerank(G) G_prime = G if directed: G_prime = nx.read_edgelist(sys.argv[1], nodetype=int) centrality_dict['clustering'] = nx.clustering(G_prime) print_tsv(centrality_dict)
def add_out_degree_node(graf): print "Adding OUT degree to nodes" d_dict = nx.out_degree_centrality(graf) nx.set_node_attributes(graf, 'deg_out', d_dict)
def ActiveUsersNotes(self, list1): edgesInNotes = [] nodesInNotes = [] temp = [] temp2 = [] Nusers = numpy.zeros(shape=(len(list1), 4), dtype=numpy.int) # fill in matrix for JOURNAL USERS for row in dataTables.TblN: # userID; #notes; friends; commenters poster = row[0] # poster handle if poster == "creator": pass else: # UserID and times written a note u = list1.index(poster) # user's ID Nusers[u][0] = poster Nusers[u][2] = row[1] # the times poster received notes Nusers[u][3] = row[2] # calculate # of people receive from temp = row[3].split(";") set1 = [] for p in temp: if p != "": set1.append(Prepare().slice(p, "id")) # sender's position in array mX = list1.index(Prepare().slice(p, "id")) # times a user wrote notes Nusers[mX][0] = Prepare().slice(p, "id") Nusers[mX][1] = Nusers[mX][1] + 1 # calculate number of ppl written to # [u][4] # build pairs of communication set2 = set1 set2.insert(0, poster) ## algorithm for creating pairs partialPairs = [(x, y) for y in set1 for x in set2 if set2.index(x) > set1.index(y)] if len(partialPairs) > 0: for x in partialPairs: edgesInNotes.append(x) print(x) ## save edges in text file numpy.savetxt(foLN + ".pairs", edgesInNotes, fmt="%s") # completed row =====> # times WRITE a note; # times receive a note; # OF people receive from, # of ppl write to # filter: remove blank rows for row in Nusers: if row[1] >= 5: # limit to active users, 5+ notes written # if (((row[0] >= 5) ) | (row[1]>=5) | (row[2]>=5)): temp2.append(row) nodesInNotes.append(row[0]) numpy.savetxt(foLN, temp2, fmt="%s") print(str(len(nodesInNotes)) + " active notes users") ## make matrix with data matrixN = numpy.zeros(shape=(len(nodesInNotes), len(nodesInNotes)), dtype=numpy.int) nG = networkx.MultiDiGraph() for sender, target in edgesInNotes: try: x = nodesInNotes.index(int(target)) y = nodesInNotes.index(int(sender)) weight = matrixN[x][y] matrixN[x][y] = weight + 1 nG.add_edge(sender, target) except: pass numpy.savetxt(foMN, matrixN, fmt="%s") print("saved matrix for active notes users of this community.") print("\tdegree centrality") print(networkx.degree_centrality(nG)) print("\tin degree centrality") print(networkx.in_degree_centrality(nG)) print("\tout degree centrality") print(networkx.out_degree_centrality(nG)) return edgesInNotes
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg else: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb')) csv_bridging_writer.writerow(["Project", "Community", "Person_ID", "Competing_lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_vol_in", "FF_vol_out", "FF_groups_in", "FF_groups_out", "FF_rec", "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank", #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_vol_in", "AT_vol_out", "AT_groups_in", "AT_groups_out", "AT_rec", "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank", # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index, "AT_avg_tie_strength","AT_strength_centrality_in", "RT_bin_in_degree", "RT_bin_out_degree", "RT_vol_in", "RT_vol_out"]) #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])} # Read in the centralities of nodes in their corresponding community centralities = {} centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project)) for row in centrality_reader: centralities[row[2]] = {"ff_in_degree":row[5]} # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) print "Done reading in Networks" #Determine the Maximum subset of nodes present in all Networks maximum_subset = [] for node in FF_all.nodes(): if AT_all.has_node(node) and RT_all.has_node(node): maximum_subset.append(node) i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) # Add nodes AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 i = 0 #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values) if len(maximum_subset) < 1000: scaling_k = len(maximum_subset) else: scaling_k = len(maximum_subset)/100 dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k) dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k) #dFF_struc = sx.structural_holes(FF_all) for partition in partitions: project_name = groups[i] #Determine the groups that are not in the partition all_other_groups = groups[:] group = groups[i] all_other_groups.remove(group) # Get all the partitions without the current partition partitions_without_partition = partitions[:] partitions_without_partition.remove(partition) #Remove the nodes that are in this partition remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes for nodes_to_be_deleted in partition: remaining_nodes.remove(nodes_to_be_deleted) #Create Subgraphs that contain all nodes except the ones that are in the partition S_FF = FF_all.subgraph(remaining_nodes) S_AT = AT_all.subgraph(remaining_nodes) S_RT = RT_all.subgraph(remaining_nodes) i += 1 for node in partition: if node in maximum_subset: t0 = time.time() #Add FF nodes and edges S_FF.add_node(node, group = group) S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_FF: S_FF.remove_node(tmp_node) # Add AT nodes and edges S_AT.add_node(node, group = group) S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_AT: S_AT.remove_node(tmp_node) S_RT.add_node(node, group = group) S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_RT: S_RT.remove_node(tmp_node) print "Done creating Subgraphs" ## FF measures dFF_bin = nx.degree_centrality(S_FF) dFF_bin_in = nx.in_degree_centrality(S_FF) dFF_bin_out = nx.out_degree_centrality(S_FF) #nx.load_centrality(S_FF,v=node, weight="weight") #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node) #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight") dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0) dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0) dFF_rec = hp.individual_reciprocity(S_FF,node) #number of reciprocated ties ## AT Measures dAT_bin = nx.degree_centrality(S_AT) dAT_bin_in = nx.in_degree_centrality(S_AT) dAT_bin_out = nx.out_degree_centrality(S_AT) #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight") #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight") dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0) dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0) dAT_rec = hp.individual_reciprocity(S_AT,node) #number of @reciprocated ties dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node) #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person dAT_strength_centrality = 0 for edge in S_AT.in_edges(node,data=True): if edge[0] in maximum_subset: dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from ############### DEPENDENT VARIABLES ########### dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made print "Done computing Measures" try: c_size = dFF_struc[node]['C-Size'] c_dens = dFF_struc[node]['C-Density'] c_hierarch = dFF_struc[node]['C-Hierarchy'] c_index = dFF_struc[node]['C-Index'] except: c_size = "NaN" c_dens = "NaN" c_hierarch = "NaN" c_index = "NaN" csv_bridging_writer.writerow([project, project_name, node, listings[node]["competing_lists"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"), dFF_total_in_groups, dFF_total_out_groups, dFF_rec[node], dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node], #c_size,c_dens,c_hierarch,c_index, dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"), dAT_total_in_groups, dAT_total_out_groups, dAT_rec[node], dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node], #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'], dAT_avg_tie[node],dAT_strength_centrality, dRT_in[node],dRT_out[node], S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight") ]) t_delta = (time.time() - t0) print "Count: %s Node: %s Time: %s" % (i,node,t_delta) #Remove the nodes again S_FF.remove_node(node) S_AT.remove_node(node) S_RT.remove_node(node)
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### GROUP BRIDGING ########################" print "Project %s " % project print "Partition %s" % partitionfile ff_edges_writer = csv.writer(open("results/%s_ff_bridging_edges.csv" % project, "wb")) at_edges_writer = csv.writer(open("results/%s_at_bridging_edges.csv" % project, "wb")) rt_edges_writer = csv.writer(open("results/%s_rt_bridging_edges.csv" % project, "wb")) csv_bridging_writer = csv.writer(open('results/spss/group bridging/%s_group_bridging.csv' % project , 'wb')) csv_bridging_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_volume_in","FF_volume_out", "FF_bin_betweeness","FF_bin_closeness", "FF_bin_pagerank", #"FF_bin_eigenvector", "FF_bin_c_size","FF_bin_c_density","FF_bin_c_hierarchy","FF_bin_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_bin_betweeness", "AT_bin_closeness", "AT_bin_pagerank", #"AT_bin_eigenvector", "AT_bin_c_size","AT_bin_c_density","AT_bin_c_hierarchy","AT_bin_c_index", "AT_volume_in", "AT_volume_out", "RT_volume_in", "RT_volume_out", "FF_rec", "AT_rec", "AT_avg", "FF_avg"]) # Get the overall network from disk FF = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] #Read in members count for each project reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",") temp = {} reader.next() # Skip first row for row in reader: temp[row[0]] = {"name":row[0],"member_count":int(row[3])} #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: if listings.has_key(row[1]): listings[row[1]]["competing_lists"] += int(row[3]) else: listings[row[1]] = {"competing_lists": int(row[3])} # Add dummy nodes if they are missing in the networks for partition in partitions: for node in partition: FF.add_node(node) AT.add_node(node) RT.add_node(node) #Blockmodel the networks into groups according to the partition P_FF = nx.blockmodel(FF,partitions) P_AT = nx.blockmodel(AT,partitions) P_RT = nx.blockmodel(RT,partitions) #Name the nodes in the network #TODO check: How do I know that the names really match? mapping = {} mapping_pajek = {} i = 0 for group in groups: mapping_pajek[i] = "\"%s\"" % group # mapping for pajek mapping[i] = "%s" % group i += 1 H_FF = nx.relabel_nodes(P_FF,mapping) H_AT = nx.relabel_nodes(P_AT,mapping) H_RT = nx.relabel_nodes(P_RT,mapping) #Outpt the networks to pajek if needed if to_pajek: OUT_FF = nx.relabel_nodes(P_FF,mapping_pajek) OUT_AT = nx.relabel_nodes(P_AT,mapping_pajek) OUT_RT = nx.relabel_nodes(P_RT,mapping_pajek) #Write the blocked network out to disk nx.write_pajek(OUT_FF,"results/networks/%s_grouped_FF.net" % project) nx.write_pajek(OUT_AT,"results/networks/%s_grouped_AT.net" % project) nx.write_pajek(OUT_RT,"results/networks/%s_grouped_RT.net" % project) ########## Output the Edges between groups to csv ############## # Needed for the computation of individual bridging # Edges in both directions between the groups are addded up processed_edges = [] for (u,v,attrib) in H_FF.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_FF.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) ff_edges_writer.writerow([u,v,attrib["weight"]+H_FF[v][u]["weight"]]) else: ff_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_AT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_AT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) at_edges_writer.writerow([u,v,attrib["weight"]+H_AT[v][u]["weight"]]) else: at_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_RT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_RT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) rt_edges_writer.writerow([u,v,attrib["weight"]+H_RT[v][u]["weight"]]) else: rt_edges_writer.writerow([u,v,attrib["weight"]]) ########## TRIM EDGES ################ # For meaningfull results we have to trim edges in the AT and FF network so the whole network just doesnt look like a blob # It is chosen this way so the network remains as one component THRESHOLD = min([hp.min_threshold(H_AT),hp.min_threshold(H_FF)])-1 H_FF = hp.trim_edges(H_FF, THRESHOLD) H_AT = hp.trim_edges(H_AT, THRESHOLD) ########## MEASURES ############## #Get the number of nodes in the aggregated networks #FF_nodes = {} #for node in H_FF.nodes(data=True): # FF_nodes[node[0]] = node[1]["nnodes"] #Get the FF network measures of the nodes # Works fine on binarized Data FF_bin_degree = nx.degree_centrality(H_FF) FF_bin_in_degree = nx.in_degree_centrality(H_FF) # The attention paid towards this group FF_bin_out_degree = nx.out_degree_centrality(H_FF) # The attention that this group pays towards other people FF_bin_betweenness = nx.betweenness_centrality(H_FF,weight="weight") # How often is the group between other groups FF_bin_closeness = nx.closeness_centrality(H_FF) #FF_bin_eigenvector = nx.eigenvector_centrality(H_FF) FF_bin_pagerank = nx.pagerank(H_FF) FF_bin_struc = sx.structural_holes(H_FF) # AT network measures of the nodes AT_bin_degree = nx.degree_centrality(H_AT) AT_bin_in_degree = nx.in_degree_centrality(H_AT) AT_bin_out_degree = nx.out_degree_centrality(H_AT) AT_bin_betweenness = nx.betweenness_centrality(H_AT,weight="weight") AT_bin_closeness = nx.closeness_centrality(H_AT) #AT_bin_eigenvector = nx.eigenvector_centrality(H_AT) AT_bin_pagerank = nx.pagerank(H_AT) AT_bin_struc = sx.structural_holes(H_AT) # Tie strengths dAT_avg_tie = hp.individual_average_tie_strength(H_AT) dFF_avg_tie = hp.individual_average_tie_strength(H_FF) dAT_rec = hp.individual_reciprocity(H_AT) dFF_rec = hp.individual_reciprocity(H_FF) # Dependent Variable see csv # TODO A measure that calculates how often Tweets travel through this group: Eventually betweeness in the RT graph #Arrange it in a list and output for node in FF_bin_degree.keys(): csv_bridging_writer.writerow([project, node, int(temp[node]["member_count"]), listings[node]["competing_lists"], FF_bin_degree[node], FF_bin_in_degree[node], FF_bin_out_degree[node], H_FF.in_degree(node,weight="weight"), H_FF.out_degree(node,weight="weight"), FF_bin_betweenness[node],FF_bin_closeness[node],FF_bin_pagerank[node], #FF_bin_eigenvector[node], FF_bin_struc[node]['C-Size'],FF_bin_struc[node]['C-Density'],FF_bin_struc[node]['C-Hierarchy'],FF_bin_struc[node]['C-Index'], AT_bin_degree[node], AT_bin_in_degree[node], AT_bin_out_degree[node], AT_bin_betweenness[node], AT_bin_closeness[node], AT_bin_pagerank[node], #AT_bin_eigenvector[node], AT_bin_struc[node]['C-Size'],AT_bin_struc[node]['C-Density'],AT_bin_struc[node]['C-Hierarchy'],AT_bin_struc[node]['C-Index'], H_AT.in_degree(node,weight="weight"), H_AT.out_degree(node,weight="weight"), H_RT.in_degree(node,weight="weight"), H_RT.out_degree(node,weight="weight"), dFF_rec[node],dAT_rec[node],dAT_avg_tie[node],dFF_avg_tie[node] ])
def classesGraph(self,c,files_dict,scope): edges = [] interfaces = 'select path, superClass from classes' files_Names = [x.split(".")[-1] for x in files_dict] pathNames = {} for row in c.execute(interfaces): nameClass = (row[0]).split(".")[-1] pathNames[nameClass] = row[0] nameSuper = (row[1]).split(".")[-1] if (nameClass in files_Names): sup = 'root' if (nameSuper in files_Names): sup = nameSuper edges.append((sup, nameClass)) g = networkx.DiGraph() g.add_node('root') g.add_edges_from(edges) paths = networkx.single_source_dijkstra_path(g, 'root') methods={} for x in files_Names: methods[x]=[] methods['root']=[] sigsEdges=[] interfaces='select classPath,signature,return, name from methods where scope='+scope for row in c.execute(interfaces): nameClass = (row[0]).split(".")[-2] if nameClass in files_Names: methods[nameClass].append(row[3]) retAdd=[] if row[2] in files_Names: retAdd=[(nameClass,row[2])] sigsEdges.extend([ (nameClass,x) for x in self.signatureTolst(row[1]) if x in files_Names]+retAdd) fields='select classPath, name ,type from fields where scope='+scope fields_d={} for x in files_Names: fields_d[x]=[] fields_d['root']=[] for row in c.execute(fields): nameClass = (row[0]).split(".")[-2] if nameClass in files_Names: fields_d[nameClass].append(row[1]) type_f=[] if row[2] in files_Names: type_f=[(nameClass,row[2])] sigsEdges.extend(type_f) g2=networkx.DiGraph() g2.add_node('root') g2.add_edges_from(sigsEdges) counts= Counter(sigsEdges) g3=networkx.DiGraph() g3.add_node('root') for e,w in counts.items(): u,v=e g3.add_edge(u,v,weight=w) self.addFromDict(files_dict,g2.out_degree(),pathNames) self.addFromDict(files_dict,networkx.katz_centrality(g2),pathNames) self.addFromDict(files_dict,networkx.core_number(g2),pathNames) self.addFromDict(files_dict,networkx.closeness_centrality(g2),pathNames) self.addFromDict(files_dict,networkx.degree_centrality(g2),pathNames) self.addFromDict(files_dict,networkx.out_degree_centrality(g2),pathNames) self.addFromDict(files_dict,g3.out_degree(),pathNames) self.addFromDict(files_dict,networkx.core_number(g3),pathNames) self.addFromDict(files_dict,networkx.closeness_centrality(g3),pathNames) self.addFromDict(files_dict,networkx.degree_centrality(g3),pathNames) self.addFromDict(files_dict,networkx.out_degree_centrality(g3),pathNames) #gi = igraph.Graph(edges = g2.edges(),directed=True) #gi.modularity() #self.addFromDict(files_dict,gi.community_optimal_modularity(),pathNames) networkx.write_graphml(g,"C:\GitHub\weka\\graph.graphml")
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### INDIVIDUAL BRIDGING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging.csv' % project, 'wb')) csv_bridging_writer.writerow(["Name", "Group1", "Group2", "Number_between_ties", "Competing_lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_bin_betweeness", #"FF_c_size","FF_c_density","FF_c_hierarchy","FF_c_index", "FF_own_group_in_volume", "FF_other_group_in_volume", "FF_own_group_out_volume", "FF_other_group_out_volume", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_bin_betweeness", "AT_volume_in", "AT_volume_out", #"AT_c_size","AT_c_density","AT_c_hierarchy","AT_c_index", "AT_own_group_in_volume", "AT_other_group_in_volume", "AT_own_group_out_volume", "AT_other_group_out_volume", "RT_total_volume_in", "RT_total_volume_out", "RT_own_group_in_volume", "RT_other_group_in_volume", "RT_own_group_out_volume", "RT_other_group_out_volume"]) #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])} #Read in the edges between the groups and sort them GROUPS = 80 # 80x200 ~ 16000 individuals for analysis reader = csv.reader(open("results/%s_bridging_edges.csv" % project, "rb"), delimiter=",") edges = [] for row in reader: edges.append({"group1":row[0],"group2":row[1], "count":float(row[2])}) edges_sorted = sorted(edges, key=lambda k: k["count"]) distance_between_samples = int(float(len(edges_sorted)) / GROUPS) if distance_between_samples == 0: distance_between_samples = 1 #Minimal Distance iterator = 0 # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) # Add nodes AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 while iterator < len(edges_sorted): #Genereate a subgraph consisting out of two partitions # Problem: With n= 2(pairs of 2) and k = 200 (~number of groups) we can generate 200 ^ 200 /2 combinations. How to generate the two pairs? # Solution 1: By Random # Solution 2: Based on the ordered tie strength between groups from the group bridging step # e.g. [10,9,8,7,6,5,0] take every xth element to create set with this size [10,8,6,0] # TODO Bin same edges with same weight into the same category and then select a grop by random selected_edge = edges_sorted[iterator] group1 = selected_edge["group1"] group2 = selected_edge["group2"] index1 = groups.index(group1) index2 = groups.index(group2) print "%s : %s with %s of strength %s" % (iterator, group1, group2, selected_edge["count"]) # Create Subgraphs S_FF = FF_all.subgraph(partitions[index1]+partitions[index2]) S_FF.name = "%s_%s" % (group1, group2) S_AT = AT_all.subgraph(partitions[index1]+partitions[index2]) S_AT.name = "%s_%s" % (group1, group2) S_RT = RT_all.subgraph(partitions[index1]+partitions[index2]) S_RT.name = "%s_%s" % (group1, group2) iterator += distance_between_samples # Make equidistant steps in with the iterator #Optional Output to pajek if to_pajek: print "Generating pajek output for %s %s" % (groups[index1], groups[index2]) #Relabel for pajek def mapping(x): return "\"%s\"" % x H_FF = nx.relabel_nodes(S_FF,mapping) H_AT = nx.relabel_nodes(S_AT,mapping) H_RT = nx.relabel_nodes(S_RT,mapping) #Write it to disk nx.write_pajek(H_FF,"results/networks/pairs/%s_%s_%s_pair_FF.net" % (project, groups[index1], groups[index2])) nx.write_pajek(H_AT,"results/networks/pairs/%s_%s_%s_pair_AT.net" % (project, groups[index1], groups[index2])) nx.write_pajek(H_RT,"results/networks/pairs/%s_%s_%s_pair_RT.net" % (project, groups[index1], groups[index2])) ################ MEASURES ################ ## FF measures dFF_bin = nx.degree_centrality(S_FF) dFF_bin_in = nx.in_degree_centrality(S_FF) dFF_bin_out = nx.out_degree_centrality(S_FF) dFF_bin_betweeness = nx.betweenness_centrality(S_FF) # Structural Holes has problems, probably with nonconnected networks (eventually compte bigest component first) # dFF_struc = sx.structural_holes(S_FF) # Which one is own group which one is other ? dFF_group1_vol_in = hp.individual_in_volume(S_FF,group1) dFF_group2_vol_in = hp.individual_in_volume(S_FF,group2) dFF_group1_vol_out = hp.individual_out_volume(S_FF,group1) dFF_group2_vol_out = hp.individual_out_volume(S_FF,group2) ## AT Measures dAT_bin = nx.degree_centrality(S_AT) dAT_bin_in = nx.in_degree_centrality(S_AT) dAT_bin_out = nx.out_degree_centrality(S_AT) dAT_bin_betweeness = nx.betweenness_centrality(S_AT) # Why can here the structural holes not be computed? #dAT_struc = sx.structural_holes(S_AT) dAT_group1_vol_in = hp.individual_in_volume(S_AT,group1) dAT_group2_vol_in = hp.individual_in_volume(S_AT,group2) dAT_group1_vol_out = hp.individual_out_volume(S_AT,group1) dAT_group2_vol_out = hp.individual_out_volume(S_AT,group2) ############### DEPENDENT VARIABLES ########### dRT_group1_vol_in = hp.individual_in_volume(S_RT,group1) dRT_group2_vol_in = hp.individual_in_volume(S_RT,group2) dRT_group1_vol_out = hp.individual_out_volume(S_RT,group1) dRT_group2_vol_out = hp.individual_out_volume(S_RT,group2) ############ OUTPUT ########################### #Arrange it in a list and output for node in dFF_bin.keys(): # Depending if the node is in partition 1 or two the definition of "own" and "other" changes. if node in partitions[index1]: #FF FF_own_group_in_volume = dFF_group1_vol_in[node] FF_own_group_out_volume = dFF_group1_vol_out[node] FF_other_group_in_volume = dFF_group2_vol_in[node] FF_other_group_out_volume = dFF_group2_vol_out[node] #AT AT_own_group_in_volume = dAT_group1_vol_in[node] AT_own_group_out_volume = dAT_group1_vol_out[node] AT_other_group_in_volume = dAT_group2_vol_in[node] AT_other_group_out_volume = dAT_group2_vol_out[node] #RT RT_own_group_in_volume = dRT_group1_vol_in[node] RT_own_group_out_volume = dRT_group1_vol_out[node] RT_other_group_in_volume = dRT_group2_vol_in[node] RT_other_group_out_volume = dRT_group2_vol_out[node] else: FF_own_group_in_volume = dFF_group2_vol_in[node] FF_own_group_out_volume = dFF_group2_vol_out[node] FF_other_group_in_volume = dFF_group1_vol_in[node] FF_other_group_out_volume = dFF_group1_vol_out[node] #AT AT_own_group_in_volume = dAT_group2_vol_in[node] AT_own_group_out_volume = dAT_group2_vol_out[node] AT_other_group_in_volume = dAT_group1_vol_in[node] AT_other_group_out_volume = dAT_group1_vol_out[node] #RT RT_own_group_in_volume = dRT_group2_vol_in[node] RT_own_group_out_volume = dRT_group2_vol_out[node] RT_other_group_in_volume = dRT_group1_vol_in[node] RT_other_group_out_volume = dRT_group1_vol_out[node] csv_bridging_writer.writerow([node, group1, group2,selected_edge["count"], listings[node]["competing_lists"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], dFF_bin_betweeness[node], #dFF_struc[node]['C-Size'],dFF_struc[node]['C-Density'],dFF_struc[node]['C-Hierarchy'],dFF_struc[node]['C-Index'], FF_own_group_in_volume, FF_other_group_in_volume, FF_own_group_out_volume, FF_other_group_out_volume, dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], dAT_bin_betweeness[node], S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node,weight="weight"), #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'], AT_own_group_in_volume, AT_other_group_in_volume, AT_own_group_out_volume, AT_other_group_out_volume, S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight"), RT_own_group_in_volume, RT_other_group_in_volume, RT_own_group_out_volume, RT_other_group_out_volume, ])
def test_outdegree_centrality(self): d = nx.out_degree_centrality(self.G) exact = {0: 0.125, 1: 0.125, 2: 0.125, 3: 0.125, 4: 0.125, 5: 0.375, 6: 0.0, 7: 0.0, 8: 0.0} for n,dc in d.items(): assert_almost_equal(exact[n], dc)
def obj_transform(dataframe=None, G=None): dataframe1 = pd.concat([dataframe['enrollment_id'], pd.get_dummies(dataframe['category'])], axis=1) if G: betweenness = nx.betweenness_centrality(G) in_degree = nx.in_degree_centrality(G) out_degree = nx.out_degree_centrality(G) pagerank = nx.pagerank(G) nrow = dataframe.shape[0] graph_features = np.zeros((nrow, 7)) for i in xrange(nrow): graph_features[i,0] = in_degree[dataframe['module_id'][i]] * 5000.0 graph_features[i,1] = out_degree[dataframe['module_id'][i]] * 5000.0 graph_features[i,2] = betweenness[dataframe['module_id'][i]] * 5000.0 graph_features[i,3] = pagerank[dataframe['module_id'][i]] * 5000.0 #pre = nx.bfs_predecessors(G, dataframe['module_id'][i]) suc = nx.bfs_successors(G, dataframe['module_id'][i]) graph_features[i,4] = depth(dataframe['module_id'][i], suc) graph_features[i,5] = len( list(nx.ancestors(G, dataframe['module_id'][i]) )) graph_features[i,6] = len( list(nx.descendants(G, dataframe['module_id'][i]) )) temp = pd.DataFrame(graph_features, index=dataframe.index) temp.columns = ['inDgree', 'outDegree', 'betweenness', 'pagerank', 'depth', 'N_ancestor', 'N_child'] temp['enrollment_id'] = dataframe['enrollment_id'] temp.to_csv('debugDir/checkpoint.csv') # aggregating dataframe1 = dataframe1.groupby('enrollment_id').aggregate(np.sum) temp1 = temp.groupby('enrollment_id').aggregate(np.mean) nameList = [] colName = ['inDgree', 'outDegree', 'betweenness', 'pagerank', 'depth', 'N_ancestor', 'N_child'] for name in colName: nameList.append(name + '_mean') temp1.columns = nameList dataframe1 = pd.concat([dataframe1, temp1], axis=1) temp1 = temp.groupby('enrollment_id').aggregate(np.std) nameList = [] for name in colName: nameList.append(name + '_std') temp1.columns = nameList dataframe1 = pd.concat([dataframe1, temp1], axis=1) temp1 = temp.groupby('enrollment_id').aggregate(np.min) nameList = [] for name in colName: nameList.append(name + '_min') temp1.columns = nameList dataframe1 = pd.concat([dataframe1, temp1], axis=1) temp1 = temp.groupby('enrollment_id').aggregate(np.max) nameList = [] for name in colName: nameList.append(name + '_max') temp1.columns = nameList dataframe1 = pd.concat([dataframe1, temp1], axis=1) return dataframe1
## calculate indegree and outdegree centrality import networkx as nx import csv f = open('/Users/dukechan/Downloads/indegree_centrality.txt','w') f2 = open('/Users/dukechan/Downloads/outdegree_centrality.txt','w') csv_file = "/Users/dukechan/Downloads/sms_sna_oct18_directed-1.csv" reader = csv.reader(file(csv_file)) G = nx.DiGraph() for line in reader: G.add_edge(line[4],line[5]) In = nx.in_degree_centrality(G) for item in In: f.write("Node: %s Centrality %.10f\n" %(item,In[item]) ) f.close() Out = nx.out_degree_centrality(G) for item in Out: f2.write("Node: %s Centrality %.10f\n" %(item,Out[item]) ) f2.close()