def test_hits_numpy(self): G = self.G h, a = nx.hits_numpy(G) for n in G: assert almost_equal(h[n], G.h[n], places=4) for n in G: assert almost_equal(a[n], G.a[n], places=4)
def test_hits_numpy(self): G = self.G h, a = nx.hits_numpy(G) for n in G: assert h[n] == pytest.approx(G.h[n], abs=1e-4) for n in G: assert a[n] == pytest.approx(G.a[n], abs=1e-4)
def test_empty(self): numpy = pytest.importorskip('numpy') G = networkx.Graph() assert networkx.hits(G) == ({}, {}) assert networkx.hits_numpy(G) == ({}, {}) assert networkx.authority_matrix(G).shape == (0, 0) assert networkx.hub_matrix(G).shape == (0, 0)
def test_empty(self): G=networkx.Graph() assert_equal(networkx.hits(G),({},{})) assert_equal(networkx.hits_numpy(G),({},{})) assert_equal(networkx.hits_scipy(G),({},{})) assert_equal(networkx.authority_matrix(G).shape,(0,0)) assert_equal(networkx.hub_matrix(G).shape,(0,0))
def calculate_network_measures(G): in_degree = nx.in_degree_centrality(G) out_degree = nx.out_degree_centrality(G) betweenness = nx.betweenness_centrality(G, weight=WEIGHT) closeness = nx.closeness_centrality(G, distance=WEIGHT) eigenvector = nx.eigenvector_centrality(G.reverse(), weight=WEIGHT) clustering = nx.clustering(G.to_undirected(), weight=WEIGHT) pagerank = nx.pagerank(G, weight=WEIGHT) hubs, authorities = nx.hits_numpy(G) max_clique = node_clique_number(G.to_undirected()) node_cliques = cliques_containing_node(G.to_undirected()) node_cliques_count = {} for node, cliques in node_cliques.items(): node_cliques_count[node] = len(cliques) network_df = pd.DataFrame(list(G.nodes), columns=[ID]); network_df[IN_DEGREE] = network_df[ID].map(in_degree) network_df[OUT_DEGREE] = network_df[ID].map(out_degree) network_df[BETWEENNESS] = network_df[ID].map(betweenness) network_df[CLOSENESS] = network_df[ID].map(closeness) network_df[EIGENVECTOR] = network_df[ID].map(eigenvector) network_df[CLUSTERING] = network_df[ID].map(clustering) network_df[PAGERANK] = network_df[ID].map(pagerank) network_df[HUBS] = network_df[ID].map(hubs) network_df[AUTHORITIES] = network_df[ID].map(authorities) network_df[MAX_CLIQUE] = network_df[ID].map(max_clique) network_df[CLIQUES_COUNT] = network_df[ID].map(node_cliques_count) return network_df
def test_empty(self): G = nx.Graph() assert nx.hits(G) == ({}, {}) assert nx.hits_numpy(G) == ({}, {}) assert _hits_python(G) == ({}, {}) assert nx.hits_scipy(G) == ({}, {}) assert nx.authority_matrix(G).shape == (0, 0) assert nx.hub_matrix(G).shape == (0, 0)
def test_hits_numpy(self): numpy = pytest.importorskip('numpy') G = self.G h, a = networkx.hits_numpy(G) for n in G: assert almost_equal(h[n], G.h[n], places=4) for n in G: assert almost_equal(a[n], G.a[n], places=4)
def hits(g): (hubs, auths) = nx.hits_numpy(g) ranks = dict() for x in g.nodes(): if isinstance(x, int): ranks[x] = hubs[x] else: ranks[x] = auths[x] return ranks
def test_numpy_hits(self): G=self.G try: h,a=networkx.hits_numpy(G,tol=1.e-08) for (x,y) in zip(sorted(h),self.G.h): assert_almost_equal(x,y,places=5) for (x,y) in zip(sorted(a),self.G.a): assert_almost_equal(x,y,places=5) except ImportError: print "Skipping hits_numpy test"
def test_empty(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G=networkx.Graph() assert_equal(networkx.hits(G),({},{})) assert_equal(networkx.hits_numpy(G),({},{})) assert_equal(networkx.authority_matrix(G).shape,(0,0)) assert_equal(networkx.hub_matrix(G).shape,(0,0))
def test_empty(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G = networkx.Graph() assert_equal(networkx.hits(G), ({}, {})) assert_equal(networkx.hits_numpy(G), ({}, {})) assert_equal(networkx.authority_matrix(G).shape, (0, 0)) assert_equal(networkx.hub_matrix(G).shape, (0, 0))
def hits(g): k, v = nx.hits_numpy(g, True) minimo = min(k.items(), key=lambda x: x[1]) massimo = max(k.items(), key=lambda x: x[1]) media = sum(k.values()) / len(k) print(minimo, massimo, media) minimo_a = min(v.items(), key=lambda x: x[1]) massimo_a = max(v.items(), key=lambda x: x[1]) media_a = sum(v.values()) / len(v) print(minimo_a, massimo_a, media_a)
def test_numpy_hits(self): G = self.G try: h, a = networkx.hits_numpy(G, tol=1.e-08) for (x, y) in zip(sorted(h), self.G.h): assert_almost_equal(x, y, places=5) for (x, y) in zip(sorted(a), self.G.a): assert_almost_equal(x, y, places=5) except ImportError: print "Skipping hits_numpy test"
def test_numpy_hits(self): G = self.G try: import numpy except ImportError: raise SkipTest('numpy not available.') h, a = networkx.hits_numpy(G, tol=1.e-08) for (x, y) in zip(sorted(h), self.G.h): assert_almost_equal(x, y, places=5) for (x, y) in zip(sorted(a), self.G.a): assert_almost_equal(x, y, places=5)
def test_hits_numpy(self): try: import numpy as np except ImportError: raise SkipTest('NumPy not available.') G = self.G h, a = networkx.hits_numpy(G) for n in G: assert_almost_equal(h[n], G.h[n], places=4) for n in G: assert_almost_equal(a[n], G.a[n], places=4)
def test_numpy_hits(self): G=self.G try: import numpy except ImportError: raise SkipTest('numpy not available.') h,a=networkx.hits_numpy(G,tol=1.e-08) for (x,y) in zip(sorted(h),self.G.h): assert_almost_equal(x,y,places=5) for (x,y) in zip(sorted(a),self.G.a): assert_almost_equal(x,y,places=5)
def mineTrees(rf_model): result = pd.DataFrame(index=np.arange(0, rf_model.n_estimators), columns=[ 'nodes', 'edges', 'diameter', 'weak_components', 'strong_components', 'node_connectivity', 'mean_hub_score', 'mean_auth_score', 'median_degree', 'mean_degree' ]) for t in range(0, rf_model.n_estimators): tree = rf_model.estimators_[t] graph = nx.DiGraph() # export_graphviz(tree, out_file=str('results/trees/tree') + str(t) + '.dot', # feature_names=dataTrain.columns,class_names=data2.Class,rounded=True, # proportion=False,precision=2, filled=True) left_children = tree.tree_.children_left right_children = tree.tree_.children_right features = tree.tree_.feature for n in range(0, len(left_children)): node = features[n] l_child = left_children[n] r_child = right_children[n] if node >= 0: if l_child > 0 and features[l_child] >= 0: graph.add_edge(node, features[l_child]) if r_child > 0 and features[r_child] >= 0: graph.add_edge(node, features[r_child]) # Network metrics hubs, authorities = nx.hits_numpy(graph) mean_hub_score = np.mean(list(hubs.values())) mean_auth_score = np.mean(list(authorities.values())) nodes = nx.number_of_nodes(graph) diameter = nx.diameter(nx.to_undirected(graph)) edges = nx.number_of_edges(graph) strong_comp = nx.number_strongly_connected_components(graph) weak_comp = nx.number_weakly_connected_components(graph) degrees = nx.average_degree_connectivity(graph, target="in") avg_in_degree = np.mean(list(degrees)) median_in_degree = np.median(list(degrees)) node_connectivity = nx.average_node_connectivity(graph) row = [ nodes, edges, diameter, weak_comp, strong_comp, node_connectivity, mean_hub_score, mean_auth_score, median_in_degree, avg_in_degree ] result.loc[t] = row return result
def rank(self,return_type='set'): #排除一些异常情况 graph = self.get_graph() if graph==None: graph = self.build_graph() h,a = nx.hits_numpy(graph) imp = a result = sorted(imp.items(), key=lambda d: d[1],reverse=True) if return_type == 'set': result = [x for (x,y) in result] return result else: return result
def info_network(G): from networkx.algorithms import bipartite from decimal import Decimal print G.number_of_nodes() print G.number_of_edges() print "average_neighbor_degree" dict = nx.average_neighbor_degree(G) list1 = dict.keys() list2 = dict.values() print list1 print list2 print "degree_assortativity_coefficient" print nx.degree_assortativity_coefficient(G) print "degree_pearson_correlation_coefficient" print nx.degree_pearson_correlation_coefficient(G) # print nx.k_nearest_neighbors(G) print "STOP HERE" print "bipartite.closeness_centrality(G,G.node)" dict2 = bipartite.closeness_centrality(G, G.node) list3 = dict2.values() print list3 print "nx.degree_centrality(G)" dict3 = nx.degree_centrality(G) list4 = dict3.values() print list4 print "nx.betweenness_centrality(G)" dict4 = nx.betweenness_centrality(G) list5 = dict4.values() print list5 print "hits_numpy" dict5 = nx.hits_numpy(G) print dict5
def influence_charts_2013(): #open bills, make bill graph f = open("senate_bills/senate_bills_113.txt", "r") bill_list = json.load(f) bills = bill_graph(bill_list) f.close() #influence model metrics in_degree_dict = bills.in_degree() sorted_in_degree = sorted(in_degree_dict.iteritems(), key=operator.itemgetter(1)) # this is the order we'll base all of the graphs on sorted_senators = [x[0] for x in sorted_in_degree] senator_count = len(sorted_senators) in_degree_values = [in_degree_dict[x] for x in sorted_senators] pagerank_dict = nx.pagerank_numpy(bills) pagerank_values = [pagerank_dict[x] for x in sorted_senators] hubs, authorities = nx.hits_numpy(bills) hubs_values = [hubs[x] for x in sorted_senators] authorities_values = [authorities[x] for x in sorted_senators] #get the colors we'll need for coloring f = open("party_dict.txt", "r") party_dict = json.load(f) color = [party_dict[x] for x in sorted_senators] #visualize in and out degree fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k') ax2 = fig.add_subplot(111) plt.bar(range(senator_count), in_degree_values, align='center', color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by In-degree") plt.ylabel('In-degree') plt.show() #visualize centrality fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k') ax1 = fig.add_subplot(111) plt.bar(range(senator_count), pagerank_values, align='center', color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by Pagerank Score") plt.ylabel('Pagerank Score') plt.show() fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k') ax3 = fig.add_subplot(111) plt.bar(range(senator_count), hubs_values, align='center', color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by HITS Hub Score") plt.ylabel('Hub Score') plt.show() fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k') ax4 = fig.add_subplot(111) plt.bar(range(senator_count), authorities_values, align='center', color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by HITS Authorities Score") plt.ylabel('Authorities Score') plt.show() voter_model_scores = { u'Burr, Richard': 60, u'Johnson, Tim': 50, u'Johanns, Mike': 60, u'Begich, Mark': 71, u'Inhofe, James M.': 38, u'McCain, John': 25, u'Portman, Rob': 89, u'Rockefeller, John D., IV': 71, u'Landrieu, Mary L.': 101, u'Heinrich, Martin': 20, u'Pryor, Mark L.': 61, u'Brown, Sherrod': 81, u'Toomey, Pat': 50, u'Cardin, Benjamin L.': 101, u'Tester, Jon': 91, u'Wyden, Ron': 81, u'Klobuchar, Amy': 50, u'Lee, Mike': 65, u'Fischer, Deb': 0, u'Bennet, Michael F.': 20, u'Blunt, Roy': 71, u'Collins, Susan M.': 81, u'Schumer, Charles E.': 61, u'Harkin, Tom': 91, u'McCaskill, Claire': 91, u'Lautenberg, Frank R.': 91, u'Cruz, Ted': 57, u'Schatz, Brian': 19, u'Feinstein, Dianne': 90, u'Coats, Daniel': 48, u'Hagan, Kay': 49, u'King, Angus S. Jr.': 10, u'Murray, Patty': 50, u'Enzi, Michael B.': 49, u'Whitehouse, Sheldon': 40, u'Reed, Jack': 51, u'Ayotte, Kelly': 50, u'Levin, Carl': 61, u'Kaine, Tim': 19, u'Cowan, William M.': 0, u'Grassley, Chuck': 61, u'Baldwin, Tammy': 30, u'Chambliss, Saxby': 9, u'Gillibrand, Kirsten E.': 79, u'Sanders, Bernard': 60, u'Hoeven, John': 48, u'Leahy, Patrick J.': 71, u'Coons, Christopher A.': 38, u'Sessions, Jeff': 0, u'Thune, John': 61, u'Donnelly, Joe': 19, u'Moran, Jerry': 71, u'Hirono, Mazie K.': 40, u'Manchin, Joe, III': 40, u'Shelby, Richard C.': 90, u'Menendez, Robert': 71, u'Mikulski, Barbara A.': 81, u'Alexander, Lamar': 69, u'Scott, Tim': 0, u'Hatch, Orrin G.': 88, u'Cornyn, John': 60, u'Booker, Cory A.': 0, u'Blumenthal, Richard': 81, u'Markey, Edward J.': 0, u'Rubio, Marco': 68, u'Risch, James E.': 9, u'Cochran, Thad': 20, u'Franken, Al': 69, u'Coburn, Tom': 86, u'Kirk, Mark Steven': 69, u'Durbin, Richard': 69, u'Boozman, John': 48, u'Corker, Bob': 9, u'Barrasso, John': 59, u'Flake, Jeff': 25, u'Murphy, Christopher S.': 20, u'Stabenow, Debbie': 80, u'Johnson, Ron': 49, u'Carper, Thomas R.': 61, u'Udall, Tom': 40, u'Roberts, Pat': 67, u'Shaheen, Jeanne': 81, u'Vitter, David': 71, u'Paul, Rand': 49, u'Reid, Harry': 30, u'Heller, Dean': 40, u'Warren, Elizabeth': 10, u'McConnell, Mitch': 0, u'Isakson, Johnny': 30, u'Baucus, Max': 81, u'Casey, Robert P., Jr.': 90, u'Graham, Lindsey': 70, u'Heitkamp, Heidi': 36, u'Udall, Mark': 48, u'Murkowski, Lisa': 40, u'Cantwell, Maria': 61, u'Crapo, Mike': 0, u'Warner, Mark R.': 61, u'Boxer, Barbara': 70, u'Merkley, Jeff': 91, u'Nelson, Bill': 100, u'Wicker, Roger F.': 20, u'Chiesa, Jeff': 0 } voter_model_values = [voter_model_scores[x] for x in sorted_senators] fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k') ax5 = fig.add_subplot(111) plt.bar(range(senator_count), voter_model_values, align='center', color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by Voter-Model Score") plt.ylabel('Voter-Model Score') plt.show()
def hits_authorities(G): """The HITS authorities centralities""" return nx.hits_numpy(G)[1]
def hits_hubs(G): """The HITS hubs centralities""" return nx.hits_numpy(G)[0]
def mineTrees(rf_model, dataset_name, feature_name): """ :param rf_model: trained random forest :return: dataframe containing network metrics for each estimator in rf_model """ result = pd.DataFrame(index=np.arange(0, rf_model.n_estimators), columns=[ 'dataset_name', 'feature_name', 'tree_id', 'nodes', 'edges', 'diameter', 'weak_components', 'strong_components', 'node_connectivity', 'mean_hub_score', 'mean_auth_score', 'median_degree', 'mean_degree', 'depth', 'total_length_of_branches', 'branching_ratio' ]) for t in range(0, rf_model.n_estimators): # print("Tree " + str(t) + " is processing") tree = rf_model.estimators_[t] graph = nx.DiGraph() # Multiple edges are not allowed # export_graphviz(tree, out_file=str('results/trees/tree') + str(t) + '.dot', # feature_names=dataTrain.columns,class_names=data2.Class,rounded=True, # proportion=False,precision=2, filled=True) left_children = tree.tree_.children_left right_children = tree.tree_.children_right features = tree.tree_.feature depth = tree.tree_.max_depth total_length_of_branches = tree.tree_.node_count - 1 branching_ratio = depth / total_length_of_branches for n in range(0, len(left_children)): node = features[n] l_child = left_children[n] r_child = right_children[n] if node >= 0: if l_child > 0 and features[l_child] >= 0: # print(str(t) + ">" + str(node) + " l" + str(l_child) + " " + str(features[l_child])) graph.add_edge(node, features[l_child]) if r_child > 0 and features[r_child] >= 0: # print(str(t) + ">" + str(node) + " r" + str(r_child) + " " + str(features[r_child])) graph.add_edge( node, features[r_child] ) # compare the graph with the original decision tree to make that the graph is correct # Network metrics with warnings.catch_warnings(): # temporarily suppress warnings warnings.filterwarnings('ignore', category=np.ComplexWarning) hubs, authorities = nx.hits_numpy(graph) mean_hub_score = np.mean(list( hubs.values())) # hub = lots of links from mean_auth_score = np.mean(list( authorities.values())) # authority = lots of links to nodes = nx.number_of_nodes(graph) if nodes == 0: # empty tree would crash warnings.warn(f'Empty decision tree: t={t}', UserWarning) result.drop(index=t, inplace=True ) # data would be nan and crash next rf so delete row continue diameter = nx.diameter(nx.to_undirected( graph)) # greatest distance b/w any pair of vertices edges = nx.number_of_edges(graph) # size of subgraph where all components are connected strong_comp = nx.number_strongly_connected_components( graph) # directed weak_comp = nx.number_weakly_connected_components( graph) # ignore direction degrees = nx.average_degree_connectivity( graph, target="in") # num incoming edges for vertices avg_in_degree = np.mean(list(degrees)) median_in_degree = np.median(list(degrees)) # This line is VERY slow, setting it to dummy value for now # node_connectivity = nx.average_node_connectivity(graph) # how well the graph is connected node_connectivity = -1 row = [ dataset_name, feature_name, t, nodes, edges, diameter, weak_comp, strong_comp, node_connectivity, mean_hub_score, mean_auth_score, median_in_degree, avg_in_degree, depth, total_length_of_branches, branching_ratio ] result.loc[t] = row return result
def influence_charts_2013(): # open bills, make bill graph f = open("senate_bills/senate_bills_113.txt", "r") bill_list = json.load(f) bills = bill_graph(bill_list) f.close() # influence model metrics in_degree_dict = bills.in_degree() sorted_in_degree = sorted(in_degree_dict.iteritems(), key=operator.itemgetter(1)) # this is the order we'll base all of the graphs on sorted_senators = [x[0] for x in sorted_in_degree] senator_count = len(sorted_senators) in_degree_values = [in_degree_dict[x] for x in sorted_senators] pagerank_dict = nx.pagerank_numpy(bills) pagerank_values = [pagerank_dict[x] for x in sorted_senators] hubs, authorities = nx.hits_numpy(bills) hubs_values = [hubs[x] for x in sorted_senators] authorities_values = [authorities[x] for x in sorted_senators] # get the colors we'll need for coloring f = open("party_dict.txt", "r") party_dict = json.load(f) color = [party_dict[x] for x in sorted_senators] # visualize in and out degree fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k") ax2 = fig.add_subplot(111) plt.bar(range(senator_count), in_degree_values, align="center", color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by In-degree") plt.ylabel("In-degree") plt.show() # visualize centrality fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k") ax1 = fig.add_subplot(111) plt.bar(range(senator_count), pagerank_values, align="center", color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by Pagerank Score") plt.ylabel("Pagerank Score") plt.show() fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k") ax3 = fig.add_subplot(111) plt.bar(range(senator_count), hubs_values, align="center", color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by HITS Hub Score") plt.ylabel("Hub Score") plt.show() fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k") ax4 = fig.add_subplot(111) plt.bar(range(senator_count), authorities_values, align="center", color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by HITS Authorities Score") plt.ylabel("Authorities Score") plt.show() voter_model_scores = { u"Burr, Richard": 60, u"Johnson, Tim": 50, u"Johanns, Mike": 60, u"Begich, Mark": 71, u"Inhofe, James M.": 38, u"McCain, John": 25, u"Portman, Rob": 89, u"Rockefeller, John D., IV": 71, u"Landrieu, Mary L.": 101, u"Heinrich, Martin": 20, u"Pryor, Mark L.": 61, u"Brown, Sherrod": 81, u"Toomey, Pat": 50, u"Cardin, Benjamin L.": 101, u"Tester, Jon": 91, u"Wyden, Ron": 81, u"Klobuchar, Amy": 50, u"Lee, Mike": 65, u"Fischer, Deb": 0, u"Bennet, Michael F.": 20, u"Blunt, Roy": 71, u"Collins, Susan M.": 81, u"Schumer, Charles E.": 61, u"Harkin, Tom": 91, u"McCaskill, Claire": 91, u"Lautenberg, Frank R.": 91, u"Cruz, Ted": 57, u"Schatz, Brian": 19, u"Feinstein, Dianne": 90, u"Coats, Daniel": 48, u"Hagan, Kay": 49, u"King, Angus S. Jr.": 10, u"Murray, Patty": 50, u"Enzi, Michael B.": 49, u"Whitehouse, Sheldon": 40, u"Reed, Jack": 51, u"Ayotte, Kelly": 50, u"Levin, Carl": 61, u"Kaine, Tim": 19, u"Cowan, William M.": 0, u"Grassley, Chuck": 61, u"Baldwin, Tammy": 30, u"Chambliss, Saxby": 9, u"Gillibrand, Kirsten E.": 79, u"Sanders, Bernard": 60, u"Hoeven, John": 48, u"Leahy, Patrick J.": 71, u"Coons, Christopher A.": 38, u"Sessions, Jeff": 0, u"Thune, John": 61, u"Donnelly, Joe": 19, u"Moran, Jerry": 71, u"Hirono, Mazie K.": 40, u"Manchin, Joe, III": 40, u"Shelby, Richard C.": 90, u"Menendez, Robert": 71, u"Mikulski, Barbara A.": 81, u"Alexander, Lamar": 69, u"Scott, Tim": 0, u"Hatch, Orrin G.": 88, u"Cornyn, John": 60, u"Booker, Cory A.": 0, u"Blumenthal, Richard": 81, u"Markey, Edward J.": 0, u"Rubio, Marco": 68, u"Risch, James E.": 9, u"Cochran, Thad": 20, u"Franken, Al": 69, u"Coburn, Tom": 86, u"Kirk, Mark Steven": 69, u"Durbin, Richard": 69, u"Boozman, John": 48, u"Corker, Bob": 9, u"Barrasso, John": 59, u"Flake, Jeff": 25, u"Murphy, Christopher S.": 20, u"Stabenow, Debbie": 80, u"Johnson, Ron": 49, u"Carper, Thomas R.": 61, u"Udall, Tom": 40, u"Roberts, Pat": 67, u"Shaheen, Jeanne": 81, u"Vitter, David": 71, u"Paul, Rand": 49, u"Reid, Harry": 30, u"Heller, Dean": 40, u"Warren, Elizabeth": 10, u"McConnell, Mitch": 0, u"Isakson, Johnny": 30, u"Baucus, Max": 81, u"Casey, Robert P., Jr.": 90, u"Graham, Lindsey": 70, u"Heitkamp, Heidi": 36, u"Udall, Mark": 48, u"Murkowski, Lisa": 40, u"Cantwell, Maria": 61, u"Crapo, Mike": 0, u"Warner, Mark R.": 61, u"Boxer, Barbara": 70, u"Merkley, Jeff": 91, u"Nelson, Bill": 100, u"Wicker, Roger F.": 20, u"Chiesa, Jeff": 0, } voter_model_values = [voter_model_scores[x] for x in sorted_senators] fig = plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor="w", edgecolor="k") ax5 = fig.add_subplot(111) plt.bar(range(senator_count), voter_model_values, align="center", color=color) plt.xticks(range(senator_count), sorted_senators, rotation=90, fontsize=9) plt.xlabel("Senators by Voter-Model Score") plt.ylabel("Voter-Model Score") plt.show()
def network_info(post_id): default = [None] * 33 edge_info, all_miss_perc = get_edges(post_id) graph = nx.DiGraph(edge_info) all_num_nodes = graph.number_of_nodes() all_density = nx.density(graph) num_weak_comp = nx.number_weakly_connected_components(graph) num_strong_comp = nx.number_strongly_connected_components(graph) main_graph = get_main_component(graph) if main_graph == None: rv = [ post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp, num_strong_comp ] + [None] * 27 return rv main_graph_full, main_miss_perc = add_author_info(main_graph) nodes = dict(main_graph_full.nodes(data=True)) # edges = dict(main_graph_full.edges(data=True)) #net work structure # get rid of post where main graph less than 10 main_num_nodes = main_graph_full.number_of_nodes() if main_num_nodes < 10: rv = [ post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp, num_strong_comp, main_num_nodes ] + [None] * 26 return rv else: main_density = nx.density(main_graph_full) main_trans = nx.transitivity(main_graph_full) short_path = nx.average_shortest_path_length(main_graph_full, weight='weight') eccen = nx.eccentricity(main_graph_full) eccen_val_list = list(eccen.values()) diameter = np.max(eccen_val_list) radius = np.min(eccen_val_list) eccen_mean = np.mean(eccen_val_list) eccen_std = np.std(eccen_val_list) periphery = [key for key, value in eccen.items() if value == diameter] peri_lkarma, peri_ckarma = get_eccen_avg(nodes, periphery) center = [key for key, value in eccen.items() if value == radius] center_lkarma, center_ckarma = get_eccen_avg(nodes, center) # node importance in_degree_dict = dict(nx.in_degree_centrality(main_graph_full)) in_degree_lkarma, in_drgree_ckarma = get_top_karma_avg( nodes, in_degree_dict) out_degree_dict = dict(nx.out_degree_centrality(main_graph_full)) out_degree_lkarma, out_degree_ckarma = get_top_karma_avg( nodes, out_degree_dict) close_dict = dict(nx.closeness_centrality(main_graph_full)) close_lkarma, close_ckarma = get_top_karma_avg(nodes, close_dict) between_dict = dict( nx.betweenness_centrality(main_graph_full, weight='weight', endpoints=True)) between_lkarma, between_ckarma = get_top_karma_avg(nodes, between_dict) hub_dict, auth_dict = nx.hits_numpy(main_graph_full) hub_lkarma, hub_ckarma = get_top_karma_avg(nodes, hub_dict) auth_lkarma, auth_ckarma = get_top_karma_avg(nodes, auth_dict) try: author_id = FULL_DATA.loc[post_id]['author'] author_lkarma, author_ckarma = AUTHOR.loc[author_id] except: author_lkarma = None author_ckarma = None rv = [ post_id, all_miss_perc, all_num_nodes, all_density, num_weak_comp, num_strong_comp, main_miss_perc, main_num_nodes, main_density, main_trans, short_path, diameter, radius, eccen_mean, eccen_std, peri_lkarma, peri_ckarma, center_lkarma, center_ckarma, in_degree_lkarma, in_drgree_ckarma, out_degree_lkarma, out_degree_ckarma, close_lkarma, close_ckarma, between_lkarma, between_ckarma, hub_lkarma, hub_ckarma, auth_lkarma, auth_ckarma, author_lkarma, author_ckarma ] return rv
data['out_degree'] = g_mult_dir.out_degree() data['degree_centrality'] = { n: d / float(g_mult_undir.size()) for n, d in g_mult_undir.degree().items() } data['closeness_centrality'] = nx.closeness_centrality(g_mult_undir) data['betweenness_centrality'] = nx.betweenness_centrality(g_mult_undir) data['eigenvector_centrality'] = nx.eigenvector_centrality_numpy( g_single_undir, weight=None) data['katz_centrality'] = nx.katz_centrality_numpy(g_single_undir, weight=None) # B) link analysis print 'link analysis' data['page_rank'] = nx.pagerank_numpy(g_mult_undir, weight=None) hits = nx.hits_numpy(g_mult_dir) data['hits_hub'] = hits[0] data['hits_auth'] = hits[1] # C) Reichherzer & Leake 2006 print 'cmaps' data['u_weights'] = util.comp_u_weights(g_mult_dir) data['l_weights'] = util.comp_l_weights(g_mult_dir) # HARD p = [0, 2.235, 1.764] data['hard'] = { n: p[0] * data['hits_hub'][n] + p[1] * data['hits_auth'][n] + p[2] * data['u_weights'][n] for n in g_mult_dir.nodes() } # CRD
def f_hits_authorities(G,year=False): return nx.hits_numpy(G)[1]
def f_hits_hubs(G,year=False): return nx.hits_numpy(G)[0]
def hits(self): return networkx.hits_numpy(self.graph)
def hits(net): hubs,authorities = nx.hits_numpy(net) return distri(hubs.values(), 'hubs')+distri(authorities.values(),'authorities')
def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False): import scipy.io as sio import numpy as np import networkx as nx import pandas as pd import os Data = sio.loadmat(Mat_D) matX = Data['Correlation'] #[:tamn,:tamn] labels = Data['labels'] print(np.shape(matX)) print(np.shape(labels)) print(np.min(matX), np.max(matX)) if percentageConnections: if percentageConnections > 0 and percentageConnections < 1: for i in range(-100, 100): per = np.sum(matX > i / 100.) / np.size(matX) if per <= Threshold: Threshold = i / 100. break print(Threshold) else: print('The coefficient is outside rank') #Lista de conexion del grafo row, col = np.shape(matX) e = [] for i in range(1, row): for j in range(i): if complet: e.append((labels[i], labels[j], matX[i, j])) else: if matX[i, j] > Threshold: e.append((labels[i], labels[j], matX[i, j])) print(np.shape(e)[0], int(((row - 1) * row) / 2)) #Generar grafo G = nx.Graph() G.add_weighted_edges_from(e) labelNew = list(G.nodes) #Metricas por grafo (ponderados) Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight') cluster = nx.average_clustering(G, weight='weight') #No ponderados estra = nx.estrada_index(G) tnsity = nx.transitivity(G) conNo = nx.average_node_connectivity(G) ac = nx.degree_assortativity_coefficient(G) #Metricas por nodo tam = 15 BoolCenV = False BoolLoad = False alpha = 0.1 beta = 1.0 katxCN = nx.katz_centrality_numpy(G, alpha=alpha, beta=beta, weight='weight') bcen = nx.betweenness_centrality(G, weight='weight') av_nd = nx.average_neighbor_degree(G, weight='weight') ctr = nx.clustering(G, weight='weight') ranPaN = nx.pagerank_numpy(G, weight='weight') Gol_N = nx.hits_numpy(G) Dgc = nx.degree_centrality(G) cl_ce = nx.closeness_centrality(G) cluster_Sq = nx.square_clustering(G) centr = nx.core_number(G) cami = nx.node_clique_number(G) camiN = nx.number_of_cliques(G) trian = nx.triangles(G) colorG = nx.greedy_color(G) try: cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight') tam = tam + 1 BoolCenV = True except TypeError: print( "La red es muy pequeña y no se puede calcular este parametro gil") except: print('NetworkXPointlessConcept: graph null') if Threshold > 0: carga_cen = nx.load_centrality(G, weight='weight') #Pesos positivos BoolLoad = True tam = tam + 1 #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #Golp=nx.hits(G) #Gol_si=nx.hits_scipy(G) #ranPa=nx.pagerank(G, weight='weight') #ranPaS=nx.pagerank_scipy(G, weight='weight') matrix_datos = np.zeros((tam, np.shape(labelNew)[0])) tam = 15 print(np.shape(matrix_datos)) lim = np.shape(labelNew)[0] for i in range(lim): roi = labelNew[i] #print(roi) matrix_datos[0, i] = katxCN[roi] matrix_datos[1, i] = bcen[roi] matrix_datos[2, i] = av_nd[roi] matrix_datos[3, i] = ctr[roi] matrix_datos[4, i] = ranPaN[roi] matrix_datos[5, i] = Gol_N[0][roi] matrix_datos[6, i] = Gol_N[1][roi] matrix_datos[7, i] = Dgc[roi] matrix_datos[8, i] = cl_ce[roi] matrix_datos[9, i] = cluster_Sq[roi] matrix_datos[10, i] = centr[roi] matrix_datos[11, i] = cami[roi] matrix_datos[12, i] = camiN[roi] matrix_datos[13, i] = trian[roi] matrix_datos[14, i] = colorG[roi] if BoolCenV: matrix_datos[15, i] = cenVNum[roi] tam = tam + 1 if BoolLoad: matrix_datos[16, i] = carga_cen[roi] tam = tam + 1 #matrix_datos[0,i]=katxC[roi] #matrix_datos[2,i]=cenV[roi] #matrix_datos[7,i]=Golp[0][roi] #matrix_datos[9,i]=Gol_si[0][roi] #matrix_datos[10,i]=Golp[1][roi] #matrix_datos[12,i]=Gol_si[1][roi] #matrix_datos[22,i]=ranPa[roi] #matrix_datos[24,i]=ranPaS[roi] FuncName = [ 'degree_pearson_correlation_coefficient', 'average_clustering', 'estrada_index', 'transitivity', 'average_node_connectivity', 'degree_assortativity_coefficient', 'katz_centrality_numpy', 'betweenness_centrality', 'average_neighbor_degree', 'clustering', 'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality', 'closeness_centrality', 'square_clustering', 'core_number', 'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color', 'eigenvector_centrality_numpy', 'load_centrality' ] frame = pd.DataFrame(matrix_datos) frame.columns = labelNew frame.index = FuncName[6:tam] Resul = os.getcwd() out_data = Resul + '/graph_metrics.csv' out_mat = Resul + '/graph_metrics_global.mat' frame.to_csv(out_data) sio.savemat( out_mat, { FuncName[0]: Dpc, FuncName[1]: cluster, FuncName[2]: estra, FuncName[3]: tnsity, FuncName[4]: conNo, FuncName[5]: ac }) return out_data, out_mat
for u, v, a in G.edges(data=True): try: x = model.similarity(u, v) G[u][v]['weight'] = abs(x) except KeyError: continue for u, v, a in G.edges(data=True): print(u, v, a) bw_centrality = nx.betweenness_centrality(G, normalized=True, weight='weight') d_centrality = nx.degree_centrality(G) c_centrality = nx.closeness_centrality(G, distance='weight') pr = nx.pagerank_numpy(G, alpha=0.9, weight='weight') hub, authority = nx.hits_numpy(G) avg_bw = 0 avg_d = 0 avg_c = 0 avg_pr = 0 avg_hub = 0 avg_authority = 0 for i in bw_centrality: avg_bw += bw_centrality[i] avg_bw = avg_bw / len(bw_centrality) for i in d_centrality: avg_d += d_centrality[i]
#directed Graphでなくてはだめ #print(nx.clustering(G,0)) #print nx.average_clustering(G) # print nx.diameter(G, e=None) #print nx.center(G, e=None) #print "nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')" #print nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h') #print nx.network_simplex(G) #print nx.pagerank(G,alpha=0.9) #print "nx.pagerank_numpy" #print nx.pagerank_numpy(G,alpha=0.9) print "hits_numpy" dict5 = nx.hits_numpy(G) print dict5 # print(nx.shortest_path(G,source=0,target=4)) # print(nx.average_shortest_path_length(G)) #paths = nx.all_simple_paths(G, source=0, target=3, cutoff=2) #print(list(paths)) csvfile.close()
G = session_transition_graph(log) assert(G.number_of_nodes() == 14457) assert(G.number_of_edges() == 27315) """ Plot graph of user sessions parcours """ # pos = nx.spring_layout(G); nx.draw_networkx(G, pos, with_labels=False, node_size=1); plt.show() print("degree_assortativity_coefficient %2.2f" % nx.degree_assortativity_coefficient(G)) print("degree_pearson_correlation_coefficient %2.2f" % nx.degree_pearson_correlation_coefficient(G)) assert(not nx.is_connected(G)) assert(nx.number_connected_components(G) == 171) counter = Counter([c.number_of_edges() for c in nx.connected_component_subgraphs(G)]) print(counter) # Counter({1: 141, 2: 19, 3: 4, 5: 2, 6: 2, 4: 1, 27085: 1, 13: 1}) large_graphs = [c for c in nx.connected_component_subgraphs(G) if c.number_of_edges() > 20] largest_graph = large_graphs[0] #nx.write_gexf(largest_graph, './model/user_sessions.gexf') colors = ['r' if 'label' in d and d['label'] == 1 else 'b' for n, d in largest_graph.nodes_iter(data=True)] sizes = [50 if 'label' in d and d['label'] == 1 else 1 for n, d in largest_graph.nodes_iter(data=True)] #pos = nx.spring_layout(largest_graph); nx.draw_networkx(largest_graph, pos, with_labels=False, colors=colors, node_size=sizes); plt.show() hits = nx.hits_numpy(largest_graph) print('aa')
def get_top_keys(dictionary, top): items = dictionary.items() items.sort(reverse=True, key=lambda x: x[1]) return items[:top] print "Reading in Full Graph." stdout.flush() g = read_edgelist('data/wiki-Talk.txt', create_using=DiGraph(), nodetype=int) print "HITS." stdout.flush() hubs, authorities = hits_numpy(g) file = open("results/hubs_numpy.txt", "w+") file.write("Top 100 Hubs by HITS\n") for node in get_top_keys(hubs, 100): file.write("{} {}\n".format(node[0], node[1])) file.close() file = open("results/authorities_numpy.txt", "w+") file.write("Top 100 Authorities by HITS\n") for node in get_top_keys(authorities, 100): file.write("{} {}\n".format(node[0], node[1])) file.close() print "We Done Here."
file.write("\nTop 20 Nodes by Katz\n") katz_centrality = nx.katz_centrality_numpy(g) for node in get_top_keys(katz_centrality, 20): file.write("{}, {}\n".format(node[0], node[1])) file.flush() print "PageRank." sys.stdout.flush() file.write("\nTop 20 Nodes by PageRank\n") pagerank = nx.pagerank_numpy(g) for node in get_top_keys(pagerank, 20): file.write("{}, {}\n".format(node[0], node[1])) file.flush() print "HITS." sys.stdout.flush() file.write("\nTop 20 Nodes by HITS: Hubs\n") hits = nx.hits_numpy(g) for node in get_top_keys(hits[0], 20): file.write("{}, {}\n".format(node[0], node[1])) file.flush() file.write("Top 20 Nodes by HITS: Authorities\n") pagerank = nx.hits_numpy(g) for node in get_top_keys(hits[1], 20): file.write("{}, {}\n".format(node[0], node[1])) file.close() print "We Done Here." sys.stdout.flush()
def hits(graph): """""" h, _ = nx.hits_numpy(ensure_connected(graph)) return list(h.values())
#print(nx.clustering(G,0)) #print nx.average_clustering(G) # print nx.diameter(G, e=None) #print nx.center(G, e=None) print "max_flow(G, 'Fosl2.48h', 'Hoxa9.48h')" print nx.max_flow(G, 'Fosl2.48h', 'Hoxa9.48h') #print nx.network_simplex(G) #print nx.pagerank(G,alpha=0.9) print "pagerank_numpy" print nx.pagerank_numpy(G,alpha=0.9) print "hits_numpy" print nx.hits_numpy(G) # print(nx.shortest_path(G,source=0,target=4)) # print(nx.average_shortest_path_length(G)) #paths = nx.all_simple_paths(G, source=0, target=3, cutoff=2) #print(list(paths)) csvfile.close()
def hits(net): hubs, authorities = nx.hits_numpy(net) return distri(hubs.values(), 'hubs') + distri(authorities.values(), 'authorities')