def calculate_metrices(g, return_dic=True, d=0.95, number_of_loops=1000, weighted=True): if weighted: pagerank = nx.pagerank(g, alpha=d, max_iter=number_of_loops) hub_rank, authority_rank = nx.hits(g, max_iter=number_of_loops) else: g2 = copy.deepcopy(g) for n1, n2, wd in g2.edges(data=True): g2[n1][n2]['weight'] = float(1.0) pagerank = nx.pagerank(g2, alpha=d, max_iter=number_of_loops) hub_rank, authority_rank = nx.hits(g2, max_iter=number_of_loops) metrices = [pagerank, hub_rank, authority_rank] metrices_names = ['pagerank', 'hub_rank', 'authority_rank'] """ cal_res = { n:{} for n in g.nodes() } for dic, name in zip(metrices, metrices_names): for n,v in dic.items(): cal_res[n].update({name: v}) """ #cal_res = { name:res for name,res in zip(metrices_names, metrices) } cal_res = list(zip(metrices_names, metrices)) if return_dic: return cal_res else: cal_res2 = {a: b for a, b in cal_res} df_res = pd.DataFrame.from_dict(cal_res2) #, orient='index') df_res = df_res[metrices_names] return df_res
def usr_top_chans(usr, netWindow, nchans=5): chanList = list( netWindow.loc[netWindow['user'] == usr]['channel'].unique()) b = netWindow.groupby(['user', 'channel']).count().reset_index() b['weight'] = b['text'] b = b.drop(['subtype', 'type', 'ts', 'time', 'date', 'text'], axis=1) G = nx.DiGraph() networkG = nx.from_pandas_edgelist(b, source='user', target='channel', create_using=G) networkG.add_weighted_edges_from(list(b.itertuples(index=False, name=None))) try: h, a = nx.hits(networkG) bib = dict((k, a[k]) for k in chanList if k in a) chScore = pd.DataFrame.from_dict(bib, orient='index') chScore.columns = ['hScore'] chScore = chScore.sort_values(by='hScore', ascending=False) except: h, a = nx.hits(networkG, tol=1e-01) bib = dict((k, a[k]) for k in chanList if k in a) chScore = pd.DataFrame.from_dict(bib, orient='index') chScore.columns = ['hScore'] chScore = chScore.sort_values(by='hScore', ascending=False) return (chScore.iloc[0:nchans])
def get_hubs(c, method): try: if method == 'eigenvector': centralities = nx.eigenvector_centrality(c, weight='weight', max_iter=1000) elif method == 'pagerank': centralities = nx.pagerank(c, max_iter=1000) elif method == 'hits_hub': centralities = nx.hits(c, max_iter=1000)[1] elif method == 'hits_authority': centralities = nx.hits(c, max_iter=1000)[0] elif method == 'betweenness': centralities = nx.betweenness_centrality(c, weight='weight') elif method == 'katz': centralities = nx.katz_centrality(c, weight='weight', max_iter=10000) else: raise NameError except nx.PowerIterationFailedConvergence: return None max_hub = max(centralities.items(), key=itemgetter(1))[1] hubs = [node for node in centralities if abs(centralities[node]-max_hub)<=0.001] if len(hubs) == len(c.nodes()): return 'all nodes' else: return (', '.join(hubs))
def set_score(G, context, k, seed): if callable(context): score = context(G) elif context == 'eigenvector': score = nx.eigenvector_centrality(G, max_iter=1000000) elif context == 'degree': score = nx.degree_centrality(G) elif context == 'betweenness': score = betweenness_by_component(G, k, seed) elif context == 'hub': H = nx.DiGraph(G) score = nx.hits(max_iter=1000000)[0] elif context == 'authority': H = nx.DiGraph(G) score = nx.hits(max_iter=1000000)[1] elif context == 'clustering': score = nx.clustering(G) elif context == 'clustering_small': score = nx.clustering(G) for node in score.keys(): score[node] = 1 - score[node] elif context is None: score = {node: 0. for node in G.nodes} nx.set_node_attributes(G, score, name='score') return G
def rank_nodes_baselines(graph, method="katz", limit=20) : # If 'graph' is a string then a path was provided, so we load the graph from it if (isinstance(graph, basestring)) : graph = nx.read_gexf(graph, node_type=int) if method=="katz" : r = katz_centrality(graph, alpha=0.01, beta=1.0) elif method=="hits_hub" : hubs, auth = nx.hits(graph, max_iter=500) r = hubs elif method=="hits_auth" : hubs, auth = nx.hits(graph, max_iter=500) r = auth else : raise ValueError("Invalid method parameter: '%s'" % method) rank = sorted(r.items(), key=lambda (k,v):v, reverse=True) results = [] for node_id, score in rank : if graph.node[node_id]["type"]=="paper" : results.append((node_id, graph.node[node_id]["paper_id"], score)) if len(results) == limit : break return results
def run(edges, show=False): G=nx.DiGraph() #G.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)]) G.add_edges_from(edges) if show: nx.draw(G, pos=nx.spring_layout(G)) plt.show() nx.write_dot(G,'./graph.dot') # dot -n -Tpng graph.dot >graph.png print nx.hits(G,max_iter=10**3) #tol=1e-4) print nx.pagerank(G)
def test_hits_not_convergent(self): G = nx.path_graph(50) with pytest.raises(nx.PowerIterationFailedConvergence): nx.hits_scipy(G, max_iter=1) with pytest.raises(nx.PowerIterationFailedConvergence): _hits_python(G, max_iter=1) with pytest.raises(nx.PowerIterationFailedConvergence): nx.hits_scipy(G, max_iter=0) with pytest.raises(nx.PowerIterationFailedConvergence): _hits_python(G, max_iter=0) with pytest.raises(ValueError): nx.hits(G, max_iter=0) with pytest.raises(sp.sparse.linalg.ArpackNoConvergence): nx.hits(G, max_iter=1)
def answer_seven(): # Your Code Here # get all the hub scores and authority scores HITS_scores = nx.hits(G2) # get all the hub scores hub_score_list = nx.hits(G2)[0] # get all the authority scores authority_score_list = nx.hits(G2)[1] return (hub_score_list['realclearpolitics.com'], authority_score_list['realclearpolitics.com']) # Your Answer Here
def webpage_rank(features,graph,method,edges): if method.lower() == "hits_a": h, a = nx.hits(graph) return sorted(a.items(), key=lambda x: x[1], reverse=True) elif method.lower() == "hits_h": h, a = nx.hits(graph) return sorted(h.items(), key=lambda x: x[1], reverse=True) elif method.lower() == "leaderrank": lr = leaderrank(graph) #print("leaderrank+++++++++++",lr.items()) return sorted(lr.items(), key=lambda item: item[1], reverse=True) else: ###trustrank tr = trustrank(features,edges) return sorted(tr.items(), key=lambda item: item[1], reverse=True)
def test_hits(self): G = self.G h, a = networkx.hits(G, tol=1.e-08) for (x, y) in zip(sorted(h.values()), self.G.h): assert_almost_equal(x, y, places=5) for (x, y) in zip(sorted(a.values()), self.G.a): assert_almost_equal(x, y, places=5)
def test_empty(self): G=networkx.Graph() assert_equal(networkx.hits(G),({},{})) assert_equal(networkx.hits_numpy(G),({},{})) assert_equal(networkx.hits_scipy(G),({},{})) assert_equal(networkx.authority_matrix(G).shape,(0,0)) assert_equal(networkx.hub_matrix(G).shape,(0,0))
def __init__(self, all_user_check_ins, network, current_user): G = nx.DiGraph() for user in all_user_check_ins: for check_in in all_user_check_ins[user]: venue = check_in["venue_id"] if user not in G.nodes(): G.add_node(user) if venue not in G.nodes(): G.add_node(user) if (user, venue) not in G.edges(): G.add_edge(user, venue, weight=1) else: current_weight = G.get_edge_data(user, venue)["weight"] G.add_edge(user, venue, weight=current_weight + 1) (hub_scores, authority_scores) = nx.hits(G) self.authority_scores = authority_scores self.user = current_user self.user_check_ins = all_user_check_ins[current_user] self.network = network friend_count = {} for user in network: friend_count[user] = len(network[user]) self.friend_count = friend_count
def test_hits(self): G=self.G h,a=networkx.hits(G,tol=1.e-08) for n in G: assert_almost_equal(h[n],G.h[n],places=4) for n in G: assert_almost_equal(a[n],G.a[n],places=4)
def get_network_features_for_nodes(network): feature_dictionaries = [] print("Calculating node degrees...") feature_dictionaries.append(dict(nx.degree(network))) feature_dictionaries.append(dict(nx.degree(network, weight='weight'))) print("Calculating degree centrality...") feature_dictionaries.append(dict(nx.degree_centrality(network))) print("Calculating closeness centrality...") feature_dictionaries.append(dict(nx.closeness_centrality(network))) feature_dictionaries.append(dict(nx.closeness_centrality(network, distance='distance'))) print("Calculating betweenness centrality...") feature_dictionaries.append(dict(nx.betweenness_centrality(network))) feature_dictionaries.append(dict(nx.betweenness_centrality(network, weight='distance'))) print("Calculating clustering coefficients...") feature_dictionaries.append(dict(nx.clustering(network))) feature_dictionaries.append(dict(nx.clustering(network, weight='weight'))) print("Performing HITS and calculating hub scores...") feature_dictionaries.append(dict(nx.hits(network)[0])) print("Performing PageRank and calculating scores...") feature_dictionaries.append(dict(nx.pagerank(network, weight=None))) feature_dictionaries.append(dict(nx.pagerank(network, weight='weight'))) features = {} for node in network.nodes(): features[node] = [feature_dictionary[node] for feature_dictionary in feature_dictionaries] return features
def pagerank_hits(): conn = sqlite3.connect("zhihu.db") #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn) following_data = pd.read_sql( 'select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn) conn.close() G = nx.DiGraph() cnt = 0 for d in following_data.iterrows(): G.add_edge(d[1][0], d[1][1]) cnt += 1 print 'links number:', cnt pylab.figure(0) nx.draw_networkx(G) pylab.show() # PageRank pr = nx.pagerank(G) prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True) print 'pagerank top 100:\n' for p in prsorted[:100]: print p[0], p[1] # HITS hub, auth = nx.hits(G) print 'hub top 100:\n' for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]: print h[0], h[1] print '\nauth top 100:\n' for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]: print a[0], a[1]
def calc_nodesize(G, attr="a_score", min_size=1000, max_size=5000): if type(attr) != str and type(attr) != unicode: normal_size = max_size - min_size normal_size = attr print "all size uniformed" return dict([(node_no, normal_size) for node_no in G.node]) a_scores, h_scores = nx.hits(G) if attr == "a_score": use_vals = a_scores elif attr == "h_score": use_vals = h_scores else: print "invalid attribute" return max_val = max(use_vals.values()) size_dict = dict() for node_no, node_attr in G.nodes(data=True): val = node_attr.get(attr) if val == None: size = min_size / 2 else: size = (val / max_val) * (max_size - min_size) + min_size size_dict[node_no] = size return size_dict
def compute_hits(self): G = nx.DiGraph() file = open(self.pagerank_temp_file, 'r') for line in file: l = line.split(",") influenced = l[0].split("\n")[0] category = l[1].split("\n")[0] G.add_edge(influenced, category) pr = nx.hits(G) p1 = pr[0] sorted_p1 = sorted(p1.items(), key=operator.itemgetter(1)) sorted_p1.reverse() p2 = pr[1] sorted_p2 = sorted(p2.items(), key=operator.itemgetter(1)) sorted_p2.reverse() counter = 0 to_write = '' for i in sorted_p2: #print(i) to_write = to_write + str(i) + '\n' if counter == 20: break counter += 1 self.write_txt_file(self.normal_hits_file, to_write)
def answer_nine(): import operator hits = nx.hits(G2) return [ a for a, b in sorted( hits[1].items(), key=operator.itemgetter(1), reverse=True)[0:5] ]
def answer_seven(): hits = nx.hits(G2) hub_score = hits[0]['realclearpolitics.com'] authority_score = hits[1]['realclearpolitics.com'] return (hub_score, authority_score)
def pagerank_hits(): conn = sqlite3.connect("zhihu.db") #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn) following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn) conn.close() G = nx.DiGraph() cnt = 0 for d in following_data.iterrows(): G.add_edge(d[1][0],d[1][1]) cnt += 1 print 'links number:', cnt pylab.figure(0) nx.draw_networkx(G) pylab.show() # PageRank pr = nx.pagerank(G) prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True) print 'pagerank top 100:\n' for p in prsorted[:100]: print p[0], p[1] # HITS hub, auth = nx.hits(G) print 'hub top 100:\n' for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]: print h[0], h[1] print '\nauth top 100:\n' for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]: print a[0], a[1]
def compute_centrality(graph): centrality_values = nx.hits(graph) for node_id, centrality in centrality_values[0].items(): graph.nodes[node_id]['hub'] = centrality for node_id, centrality in centrality_values[1].items(): graph.nodes[node_id]['authority'] = centrality centrality_values = nx.pagerank(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['pagerank'] = centrality centrality_values = nx.in_degree_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['in_degree'] = centrality centrality_values = nx.out_degree_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['out_degree'] = centrality centrality_values = nx.closeness_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['closeness'] = centrality centrality_values = nx.betweenness_centrality(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['betweenness'] = centrality centrality_values = nx.pagerank(graph) for node_id, centrality in centrality_values.items(): graph.nodes[node_id]['pagerank'] = centrality
def calc_nodesize(G,attr="a_score",weight_key="weight",min_size=1000,max_size=5000,use_bhits=True): if type(attr)!=str and type(attr)!=unicode: normal_size=max_size-min_size normal_size=attr #print "all size uniformed" return dict([(node_no,normal_size) for node_no in G.node]) if attr=="a_score" or attr=="h_score": #a_scores,h_scores=nx.hits(G)#引数の順番違い.HCG論文提出時にこっちで出してしまっていた... if use_bhits is True: h_scores,a_scores=nx.bhits(G,weight_key=weight_key) else: h_scores,a_scores=nx.hits(G,weight_key=weight_key) if attr=="a_score": use_vals=a_scores elif attr=="h_score": use_vals=h_scores if attr=="in_degree": use_vals=dict() for g in G: use_vals[g]=G.in_degree(g) max_val=max(use_vals.values()) size_dict=dict() for node_no,node_attr in G.nodes(data=True): val=node_attr.get(attr)#論文提出時はauthorityをhubのmaxで割った if val==None: size=min_size/2 else: size=(val/max_val)*(max_size-min_size) + min_size size_dict[node_no]=size return size_dict
def salary_predictions(): df = pd.DataFrame(index=nx.nodes(G), columns=['Department', 'ManagementSalary']) for x, y in G.node.items(): df.loc[x] = y df['Department'] = df['Department'].astype('float') df['ManagementSalary'] = df['ManagementSalary'].astype('float') hits = nx.hits(G) df['Hub'] = hits[0].values() df['Auth'] = hits[1].values() df_nan_free = df.dropna() X_train = df_nan_free['Hub'].to_numpy().reshape(-1, 1) y_train = df_nan_free['ManagementSalary'] X_predict = df[(df.index.isin(df.dropna().index) == False)]['Hub'] log_reg = LogisticRegression().fit(X_train, y_train) predict = log_reg.predict_proba(X_predict.to_numpy().reshape(-1, 1)) return pd.Series(predict[:, 1], index=X_predict.index)
def test_hits(self): G=self.G h,a=networkx.hits(G,tol=1.e-08) for (x,y) in zip(sorted(h.values()),self.G.h): assert_almost_equal(x,y,places=5) for (x,y) in zip(sorted(a.values()),self.G.a): assert_almost_equal(x,y,places=5)
def search(): nodes = [] links = [] MDG = nx.read_pajek('./static/football.net'); G = nx.DiGraph(MDG) gn = G.nodes() cluster = nx.clustering(G) pagerank = nx.pagerank(G) hub, authority = nx.hits(G) lgn = list(gn) print(G) for u in gn: node = {} node['name'] = u node['intro'] = '该节点的聚集系数为:'+str(format(cluster[u],'.3f'))+"<br>"+\ '该节点的pagerank为:'+str(format(pagerank[u],'.3f'))+"<br>"+\ '该节点的权威值为:'+str(format(authority[u],'.3f'))+"<br>"+ \ '该节点的中枢值为:' + str(format(hub[u], '.3f')) nodes.append(node) for u, v in G.edges(): link = {} link['source'] = lgn.index(u) link['target'] = lgn.index(v) links.append(link) return render_template('Generation_Two/football.html',nodes = nodes,links = links)
def plotHubsVsViews(G): hubs, authorities = nx.hits(G, max_iter=10000, tol=1.0e-10) print "Finished Hits algorithm" sortedHubs = sorted(hubs.items(), key=lambda tuple: tuple[1], reverse=True) sortedViews = sorted(views, key=views.get, reverse=True) top100Views = sortedViews[0:100] print top100Views for i in range(25): hub = sortedHubs[i][0] count = 0 print G.out_edges(hub) for edge in G.out_edges(hub): if edge[1] in top100Views: count += 1 print "hub: " + str(hub) + " top 100: " + str(count) x, y = [views[i] for i in views], [hubs[j] for j in views] plt.xlabel('Views (log)') plt.ylabel('Hub Ranking') plt.title('Hub Ranking vs View Count For Comedy Videos') config = plt.gca() config.scatter(x, y) plt.show()
def realhits(G): rhits = {} hubs , auths =nx.hits(G) for pack in hubs: rhits[pack] = (int(hubs[pack]*1000000000000), int(auths[pack]*1000000000000)) return rhits
def prSH(name, graph): print('---- result of', name, '----') try: pagerank = nx.pagerank(graph, alpha=1.0) except: pagerank = 0 surf = nx.pagerank(graph, alpha=0.85) print('name', 'pagerank : ', pagerank) print('name', 'sf=0.85 : ', surf) h, a = nx.hits(graph) print('name', 'HITS hub : ', h) print('name', 'HITS auth: ', a) print() fig = plt.figure() fig.suptitle('---- result of ' + name + ' ----') count = 1 for k, d in { 'pagerank': pagerank, 'sf=0.85': surf, 'HITS hub': h, 'HITS auth': a }.items(): if d == 0: count += 1 continue plt.subplot(2, 2, count) plt.subplots_adjust(wspace=0.4, hspace=0.6) plt.title(k) plt.bar(range(len(d)), list(d.values()), tick_label=list(d.keys())) count += 1 plt.savefig(name + '.png') plt.show()
def plot_hubs_and_authorities(G): """ cria um bar plot """ ha = nx.hits(G) print G,len(ha) hubs = {k:v for k,v in ha[0].items() if v!=0.0} auth = {k:v for k,v in ha[1].items() if v!=0.0} hubs.pop('None') # Sorting by value hubs = sorted(hubs.items(),key=lambda i:i[1],reverse=1) auth = sorted(auth.items(),key=lambda i:i[1],reverse=1) # Hubs fig = P.figure() ax = fig.add_subplot(111) labels = [l[0].decode('utf8') for l in hubs] vals = [l[1] for l in hubs] pos = np.arange(len(hubs)) ax.barh(pos,vals,align='center',height=.8,) P.xlabel('Hub Statistic') P.ylabel('Ministers') P.yticks(pos,labels,size='small') # Authorities fig = P.figure() ax = fig.add_subplot(111) auth = auth[:25] # only the most cited laws labels = ['$%s$'%l[0] for l in auth] vals = [l[1] for l in auth] pos = np.arange(len(auth)) ax.barh(pos,vals,align='center',height=.8) P.xlabel('Authority Statistic') P.ylabel('Law id') P.yticks(pos,labels)
def node_treatments(self): G=self.graph tools.progress(0,100,"Degree de centralité") if len(nx.get_node_attributes(G,"centrality"))==0: nx.set_node_attributes(G,nx.degree_centrality(G),"centrality") tools.progress(20, 100, "Degree de betweeness") if len(nx.get_node_attributes(G, "betweenness")) == 0: nx.set_node_attributes(G, nx.betweenness_centrality(G), "betweenness") tools.progress(40, 100, "Degree de closeness") if len(nx.get_node_attributes(G, "closeness")) == 0: nx.set_node_attributes(G, nx.closeness_centrality(G), "closeness") tools.progress(60, 100, "Page rank") try: if len(nx.get_node_attributes(G, "pagerank")) == 0: nx.set_node_attributes(G, nx.pagerank(G), "pagerank") except: pass tools.progress(80, 100, "Hub and autorities") try: if len(nx.get_node_attributes(G, "hub")) == 0: hub, aut = nx.hits(G) nx.set_node_attributes(G, hub, "hub") nx.set_node_attributes(G, aut, "autority") except: pass #tools.progress(90, 100, "Excentricity") #nx.set_node_attributes(G, nx.eccentricity(G), "eccentricity") self.node_treatment=True tools.progress(100, 100, "Fin des traitements")
def analyze_graph(G): #centralities and node metrics out_degrees = G.out_degree() in_degrees = G.in_degree() betweenness = nx.betweenness_centrality(G) eigenvector = nx.eigenvector_centrality_numpy(G) closeness = nx.closeness_centrality(G) pagerank = nx.pagerank(G) avg_neighbour_degree = nx.average_neighbor_degree(G) redundancy = bipartite.node_redundancy(G) load = nx.load_centrality(G) hits = nx.hits(G) vitality = nx.closeness_vitality(G) for name in G.nodes(): G.node[name]['out_degree'] = out_degrees[name] G.node[name]['in_degree'] = in_degrees[name] G.node[name]['betweenness'] = betweenness[name] G.node[name]['eigenvector'] = eigenvector[name] G.node[name]['closeness'] = closeness[name] G.node[name]['pagerank'] = pagerank[name] G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name] G.node[name]['redundancy'] = redundancy[name] G.node[name]['load'] = load[name] G.node[name]['hits'] = hits[name] G.node[name]['vitality'] = vitality[name] #communities partitions = community.best_partition(G) for member, c in partitions.items(): G.node[member]['community'] = c return G
def Hits_algorithem(FinalsocialNetwork,nameList): G = nx.DiGraph() for x in FinalsocialNetwork: index =0 for y in x: if not index==0: if int(y)>0: G.add_edges_from([(x[0],nameList[index-1])]) index=index+1 plt.figure(figsize=(100, 100)) nx.draw_networkx(G, with_labels=True) hubs, authorities = nx.hits(G, max_iter= 50000000, normalized=True) # The in-built hits function returns two dictionaries keyed by nodes # containing hub scores and authority scores respectively. print("Hub Scores: ", hubs) for x in nameList: print(x ,":" , hubs.get(x)) print("authorities Scores: ", authorities) for y in nameList: print(y, ":", authorities .get(y)) return hubs,authorities
def calculate_centrality(self, mode='hits'): if mode == 'degree_centrality': a = nx.degree_centrality(self.G) else: h, a = nx.hits(self.G) max_a_u, min_a_u,max_a_v,min_a_v = 0, 100000, 0, 100000 for node in self.G.nodes(): if node[0] == "u": if max_a_u < a[node]: max_a_u = a[node] if min_a_u > a[node]: min_a_u = a[node] if node[0] == "i": if max_a_v < a[node]: max_a_v = a[node] if min_a_v > a[node]: min_a_v = a[node] for node in self.G.nodes(): if node[0] == "u": if max_a_u-min_a_u != 0: self.authority_u[node] = (float(a[node])-min_a_u) / (max_a_u-min_a_u) else: self.authority_u[node] = 0 if node[0] == 'i': if max_a_v-min_a_v != 0: self.authority_v[node] = (float(a[node])-min_a_v) / (max_a_v-min_a_v) else: self.authority_v[node] = 0
def answer_seven(): # Your Code Here hits = nx.hits(G2) return hits[0]['realclearpolitics.com'], hits[1][ 'realclearpolitics.com'] # Your Answer Here
def test_empty(self): numpy = pytest.importorskip('numpy') G = networkx.Graph() assert networkx.hits(G) == ({}, {}) assert networkx.hits_numpy(G) == ({}, {}) assert networkx.authority_matrix(G).shape == (0, 0) assert networkx.hub_matrix(G).shape == (0, 0)
def answer_nine(): # Your Code Here auts = nx.hits(G2)[1] return sorted(auts.keys(), key=lambda x: auts[x], reverse=True)[:5] # Your Answer Here
def hits_influence(adj_matrix, k): """ calculates the influence based on HITS Args: adj_matrix: data adjacency matrix k: budget to pick top-k influentail edges Returns: S: list of top-k influential edges """ B = get_graph(adj_matrix) S = [] while len(S) <= k: scores = nx.hits(B) hub = scores[0] auth = scores[1] for e1, e2 in B.edges(): B[e1][e2]['influence'] = hub[e1] + hub[e2] + auth[e1] + auth[e2] max_e = [(e[0], e[1]) for e in sorted(list(B.edges(data=True)), key=lambda x: x[2]['influence'], reverse=True)][:1][0] B.remove_edge(max_e[0], max_e[1]) S.append(max_e) return S
def answer_eight(): # Your Code Here hubs = nx.hits(G2)[0] return sorted(hubs.keys(), key=lambda x: hubs[x], reverse=True)[:5] # Your Answer Here
def p_original(fm, q_id, pqw, pwords, dls, pj, res, k1, k2, b, avdl, N, Nd, alpha, beta, gamma): """ Fa il retrieve per la singola query :param fm: frequency matrix :param q_id: query id :param pqw: lista di query words per questa query :param pwords: dizionario delle parole :param dls: lunghezze dei documenti :param pj: indice dove scrivere in res :param res: matrice di output :param k1: param per bm25 :param k2: param per bm25 :param b: param per bm25 :param avdl: lunghezza media dei documenti :param N: numero dei documenti :return: niente, salva la roba su res """ # ignorare questa parte, fate finta che funzioni, alla fine avete il risultato di bm25 actual_qw = [] indexes_of_qws = [] for qw in pqw: if qw in pwords: actual_qw.append(qw) indexes_of_qws.append(pwords[qw]) indexes_of_qws = np.array(indexes_of_qws) tmp = np.arange(0, fm.shape[1]) indexes_of_qws = np.in1d(tmp, indexes_of_qws) red_fm = fm[:, indexes_of_qws] idfs = np.ones(shape=(red_fm.shape[0], red_fm.shape[1])) tmp2 = np.copy(red_fm) tmp2[tmp2 != 0] = 1 nis = tmp2.sum(axis=0) Ns = np.ones(red_fm.shape[1])*N idfs = np.log((Ns - nis + 0.5)/(nis + 0.5)) Ks = k1*((1-b) + b*(dls/avdl)) tf1s = red_fm*(k1 + 1)/(np.tile(Ks, (red_fm.shape[1], 1)).T + red_fm) tf2s = np.ones(red_fm.shape) ress = np.multiply(idfs, tf1s) ress = ress.sum(axis=1) idss = np.arange(0, red_fm.shape[0]) idss_indx = np.argsort(ress)[::-1] idss = idss[idss_indx] ress = ress[idss_indx] idss_N = idss[0:Nd] ress_N = ress[0:Nd] G = get_graph_N(idss_N) try: auths, hubs = nx.hits(G) except nx.exception.NetworkXError, e: auths = {str(nid): 1.0 for nid in idss_N} hubs = {str(nid): 1.0 for nid in idss_N}
def test_empty(self): try: import numpy except ImportError: raise SkipTest('numpy not available.') G=networkx.Graph() assert_equal(networkx.hits(G),({},{})) assert_equal(networkx.hits_numpy(G),({},{})) assert_equal(networkx.authority_matrix(G).shape,(0,0)) assert_equal(networkx.hub_matrix(G).shape,(0,0))
def hits_algo(adj_matrix,hub_score): # INPUT: Initial hub_score, authorities score and adjacency matrix. # OUTPUT: Converged print "Running HITS algorithm..." graph = nx.to_networkx_graph(adj_matrix) # print graph nstart = dict([(i, hub_score[i]) for i in xrange(len(hub_score))]) # print nstart # return nx.hits(graph) return nx.hits(graph,nstart=nstart)
def extract_social_features(df_comments): socialVector = np.empty([df_comments.shape[0],8]) index = 0 graph = networkx.DiGraph() userdict = dict() for _, row in df_comments.iterrows(): userdict[row['comment_id']] = row['author'] for user in set(userdict.values()): graph.add_node(user) for _, row in df_comments.iterrows(): if not userdict.has_key(row['thread_root_id']): continue source = userdict[row['comment_id']] dest = userdict[row['thread_root_id']] if source == dest: continue graph.add_edge(source, dest) pageranker = networkx.pagerank(graph, alpha=0.85) hubs, auths = networkx.hits(graph) author_groupby = df_comments.groupby('author') user_age_dict = {} user_nr_posts_dict = {} for _,group in author_groupby: first_date = datetime.fromtimestamp(mktime(group.date.values[0])) last_date = datetime.fromtimestamp(mktime(group.date.values[-1])) diff = last_date - first_date days = diff.days user_age_dict[group.author.values[0]] = days + 1 user_nr_posts_dict[group.author.values[0]] = len(group) for ix, row in df_comments.iterrows(): user = userdict[row['comment_id']] socialVector[ix][0] = graph.in_degree(user) #In Degree socialVector[ix][1] = graph.out_degree(user) #Out Degree socialVector[ix][2] = user_age_dict[user] #User Age socialVector[ix][3] = user_nr_posts_dict[user] #Nr of Posts socialVector[ix][4] = user_nr_posts_dict[user]/float(user_age_dict[user]) # Postrate socialVector[ix][5] = pageranker[user] # Pagerank socialVector[ix][6] = hubs[user] # Pagerank socialVector[ix][7] = auths[user] # Pagerank index += 1 if index % 1000 == 0: print "extracted", index, "values" return socialVector
def test_hits(testgraph): """ Test hits algorithm """ h0, a0 = nx.hits(testgraph[0], max_iter=100) h1, a1 = sg.links.hits(testgraph[1], max_iter=100) for u in testgraph[0].nodes_iter(): assert abs(h0[u] - h1[u]) < 1e-5 assert abs(a0[u] - a1[u]) < 1e-5
def getHits(G): print "Calculating HITS" ret = [] try: hubs, auths = nx.hits(G) for pack in hubs: ret.append((pack, (hubs[pack], auth[pack]))) except nx.NetworkXError: print "HITS failed" return ret
def __link_analysis(self): # recalculates hub and authority rate # insert check for existing hub? reduce computational time nstart = {} for name in nx.nodes(self.OG): nstart[name] = self.OG.node[name]['normweightmax'] h, a = nx.hits(self.OG, max_iter = 30) for node in self.OG.nodes(): self.OG.node[node]['hub'] = h[node] self.OG.node[node]['authority'] = a[node] #for WOG nstart2 = {} for name in nx.nodes(self.WOG): nstart2[name] = self.WOG.node[name]['normweightmax'] h2, a2 = nx.hits(self.WOG, max_iter = 30) for node in self.WOG.nodes(): self.WOG.node[node]['hub'] = h2[node] self.WOG.node[node]['authority'] = a2[node]
def __link_analysis(self): nstart = {} for name in nx.nodes(self.OG): nstart[name] = self.OG.node[name]['normweightmax'] pr = nx.pagerank(self.OG, weight = "normweightbymax") h, a = nx.hits(self.OG, max_iter = 30) for node in self.OG.nodes(): self.OG.node[node]['pagerank'] = pr[node] self.OG.node[node]['hub'] = h[node] self.OG.node[node]['authority'] = a[node]
def hits_example(): n = 7 #~ g = nx.wheel_graph(n) #~ pos = nx.spring_layout(g) g = nx.DiGraph() g.add_nodes_from(range(0,n)) edge_list = [(0,1),(0,6),(0,5),(1,2),(1,6),(2,0),(2,1),(2,3),(3,4),(4,5),(4,6),(5,0),(5,3),(5,4)] g.add_edges_from(edge_list) hubs,auts = nx.hits(g) for n in range(0,n): print 'node',n print ' authority:',auts[n] print ' hubness:', hubs[n] print ' out:',g.successors(n) print ' in:',g.predecessors(n)
def get_statistics(): """ uses data statistics to find the most important papers in the collection. """ with open(MAIN_FOLDER + "network.pkl", "r") as f: network = pickle.load(f) betweenness_dict = networkx.betweenness_centrality(network.to_undirected()) sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True) betweenness = [x[0] for x in sorted_betweenness[:5]] pagerank_dict = networkx.pagerank(network.to_undirected()) sorted_pagerank = sorted(pagerank_dict.items(), key=operator.itemgetter(1), reverse=True) pagerank = [x[0] for x in sorted_pagerank[:5]] hits_dict = networkx.hits(network.to_undirected()) sorted_hits = sorted(hits_dict[0].items(), key=operator.itemgetter(1), reverse=True) hits = [x[0] for x in sorted_hits[:5]] in_degree_dict = network.in_degree() sorted_in_degree = sorted(in_degree_dict.items(), key=operator.itemgetter(1), reverse=True) in_degree = [x[0] for x in sorted_in_degree[:5]] community_dict = [] for k in xrange(20): community_dict += list(networkx.k_clique_communities(network.to_undirected(), 21 - k)) if community_dict: break modules = [] for index, community in enumerate(community_dict): modules.append([]) for p in community: modules[index].append(p) statistics = {'in_degree': in_degree, 'betweenness': betweenness, 'hits': hits, 'pagerank': pagerank, 'modules': modules} with open(MAIN_FOLDER + "statistics.pkl", "wa") as f: pickle.dump(statistics, f)
def recalculate(self): authorities = nx.hits(self.DG)[1] # Convert to log scale authorities_log = {} for user,value in authorities.items(): v = value * 10**30 if value > 0: v = math.log(v) else: v = 0 authorities_log[user] = abs(int(v)) # Normalise to 100 authorities_norm = {} max_user = max(authorities_log.iteritems(), key=operator.itemgetter(1))[0] max_val = authorities_log[max_user] r = 100/float(max_val) for user,value in authorities_log.items(): authorities_norm[user] = int(value*r) authorities_norm[max_user] = 100 # Clear existing values sql = "UPDATE tracker_users set karma = 0" self.queryDB(sql, ()) # Save values for user,karma in authorities_norm.items(): sql = "UPDATE tracker_users SET karma = %s WHERE username = %s" self.queryDB(sql, (karma, user))
def hits(self,gr): h,a = nx.hits(gr,max_iter = 300) return h
def champion_matrix_rank(champion_matrix_df, criteron, norm=False): ''' champion_matrix_df: pd.DataFrame, kill/death/assist counts between champions, (a,b)=i means a kills / killed by / assists b for i times criteron: 'count', 'eigen', 'eigen_ratio', 'eigen_diff', 'pagerank', 'hits' norm: True, False TODO: eigen_ratio and eigen_diff criteron can only be used in kill matrix, in assist do not make sense (would be forbidden in future) ''' # must be float for linalg.eigs champion_matrix = champion_matrix_df.as_matrix().astype(float) if norm == 'row': # use row sum to normalize row_sum = champion_matrix.sum(axis=1) champion_matrix = champion_matrix / row_sum[:, np.newaxis] # pandas broadcast # Count if criteron == 'count': print("Champion Rank by counts:") rank_df = pd.DataFrame() rank_df['champion'] = pd.Series(champion_matrix_df.index) rank_df['count'] = champion_matrix.sum(axis=1) print_full(rank_df.sort_values(by='count', ascending=False)) # ED elif criteron == 'eigen': print("Champion Rank by eigenvector centrality") rank_df = pd.DataFrame() rank_df['champion'] = pd.Series(champion_matrix_df.index) # eigenvector with largest eigenvalue (k=1), sometimes all negative, sometimes all positive, absolute values unchanged rank_df['eigen_1'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1])) print_full(rank_df.sort_values(by='eigen_1', ascending=False)) #rank_df['eigen_2'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=2)[1][:,1])) #print_full(rank_df.sort_values(by='eigen_2', ascending=False)) # ED Ratio, eigen(M)/eigen(M.T) elif criteron == 'eigen_ratio': print("Champion Rank by eigenvector centrality ratio") rank_df = pd.DataFrame() rank_df['champion'] = pd.Series(champion_matrix_df.index) rank_df['eigen'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1])) rank_df['eigen_t'] = pd.DataFrame(abs(linalg.eigs(champion_matrix.transpose(), k=1)[1])) rank_df['eigen_ratio'] = rank_df['eigen'] / rank_df['eigen_t'] print_full(rank_df.sort_values(by='eigen_ratio', ascending=False)) # ED Diff, eigen(M)-eigen(M.T) elif criteron == 'eigen_diff': print("Champion Rank by eigenvector centrality difference") rank_df = pd.DataFrame() rank_df['champion'] = pd.Series(champion_matrix_df.index) rank_df['eigen'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1])) rank_df['eigen_t'] = pd.DataFrame(abs(linalg.eigs(champion_matrix.transpose(), k=1)[1])) rank_df['eigen_diff'] = rank_df['eigen'] - rank_df['eigen_t'] print_full(rank_df.sort_values(by='eigen_diff', ascending=False)) # PageRank: similar results with eigenvector centrality elif criteron == 'pagerank': print("Champion Rank by PageRank") G = nx.DiGraph(champion_matrix) pr = nx.pagerank(G) rank_df = pd.DataFrame() rank_df['champion'] = pd.Series(champion_matrix_df.index) rank_df['pagerank'] = pd.DataFrame(data=list(pr.values()), index=list(pr.keys())) print_full(rank_df.sort_values(by='pagerank', ascending=False)) # HITS: elif criteron == 'hits': print("Champion Rank by HITS") G = nx.DiGraph(champion_matrix) hub, auth = nx.hits(G) hub_rank_df = pd.DataFrame() hub_rank_df['champion'] = pd.Series(champion_matrix_df.index) hub_rank_df['hub'] = pd.DataFrame(data=list(hub.values()), index=list(hub.keys())) print_full(hub_rank_df.sort_values(by='hub', ascending=False)) auth_rank_df = pd.DataFrame() auth_rank_df['champion'] = pd.Series(champion_matrix_df.index) auth_rank_df['auth'] = pd.DataFrame(data=list(auth.values()), index=list(auth.keys())) print_full(auth_rank_df.sort_values(by='auth', ascending=False)) else: raise ValueError('Invalid criteron provided.')
max_edge_distance = -1 meta_data = True for arg in sys.argv: m = re.match(r'--max=(\d+)', arg) if m: n = int(m.group(1)) m = re.match(r'--max-edge-distance=(\d+)', arg) if m: max_edge_distance = int(m.group(1)) if arg == '--no-meta-data': meta_data = False g = read_graph(sys.argv[1]) if '--graph' in sys.argv: sub = g.subgraph([v[0] for v in top(g, nx.pagerank(g), n, add_meta_data = meta_data)]) write_graph(remove_long_edges(sub, max_edge_distance)) elif '--pagerank' in sys.argv: writer = csv.writer(sys.stdout) keys = ['title', 'rank', 'top_250_rank', 'year', 'kind', 'imdb_id', 'rating', 'genre', 'director'] writer.writerow(keys) xunicode = lambda s: unicode(s).encode('utf-8') if s else "" for (node, score) in top(g, nx.pagerank(g), add_meta_data = meta_data): writer.writerow([xunicode(g.node[node].get(k, '')) for k in keys]) elif '--hits' in sys.argv: # HITS (hubs, authorities) = nx.hits(g) pp(top(g, hubs, n, meta_data)) pp(top(g, authorities, n, meta_data)) elif '--degree' in sys.argv: # degree pp(top(g, nx.degree(g), n, meta_data))
def compute_hubs_authorities(G): """For the given graph, compute hubs and authorities values for each node""" hits = nx.hits(G, max_iter=500) hubs = hits[0] authorities = hits[1] return hubs, authorities
G.add_weighted_edges_from(links) nx.draw_networkx(G, posNX, width = widths) plt.pyplot.show() plt.pyplot.savefig('flows.png') out.close() #nx.draw_networkx(G,pos, width = widths, alpha = .7 ) #plt.pyplot.show() newPOS = [] for coords in iter(posIG): newPOS.append(posIG[coords]) betterGraph = ig.Graph(25, newLinks, directed = True) betterGraph.es["width"] = widths betterGraph.vs["label"] = range(25) layout = newPOS ig.plot(betterGraph, layout = layout, weighted =True) hitsHubs, hitsAuths = nx.hits(G)[0].values(), nx.hits(G)[1].values()
def hits(self): '''h, a = hits() hub and authority''' return nx.hits(self._graph)
def test_hits_nstart(self): G = self.G nstart = dict([(i, 1./2) for i in G]) h, a = networkx.hits(G, nstart = nstart)
rprr = nx.pagerank_numpy(R, alpha=damping, personalization=p, weight='weighted', dangling=p) rprr = dict(zip([G.nodes()[i] for i in rprr.keys()],rprr.values())) rppr[G.nodes()[nd]] = n(rprr) nx.set_node_attributes(G, 'rppr', rppr) #remove inorganic nodes for graph visualisation G = graph.rm_nodes(G,inorganics) uwdeg = G.degree() h,a=nx.hits(G,tol=1e-8,normalized=False) h=n(h) a=n(a) nx.set_node_attributes(G, 'degree', uwdeg) nx.set_node_attributes(G, 'hubs', h) nx.set_node_attributes(G, 'authorities', a)
def test_hits_not_convergent(self): G = self.G networkx.hits(G, max_iter=0)
def hub_cent(G): centrality = nx.hits(G) return relevant_cen_results(centrality)