Beispiel #1
0
def calculate_metrices(g,
                       return_dic=True,
                       d=0.95,
                       number_of_loops=1000,
                       weighted=True):

    if weighted:
        pagerank = nx.pagerank(g, alpha=d, max_iter=number_of_loops)
        hub_rank, authority_rank = nx.hits(g, max_iter=number_of_loops)
    else:
        g2 = copy.deepcopy(g)
        for n1, n2, wd in g2.edges(data=True):
            g2[n1][n2]['weight'] = float(1.0)
        pagerank = nx.pagerank(g2, alpha=d, max_iter=number_of_loops)
        hub_rank, authority_rank = nx.hits(g2, max_iter=number_of_loops)

    metrices = [pagerank, hub_rank, authority_rank]
    metrices_names = ['pagerank', 'hub_rank', 'authority_rank']
    """
    cal_res = { n:{} for n in g.nodes() }
    for dic, name in zip(metrices, metrices_names):
        for n,v in dic.items():
            cal_res[n].update({name: v})
    """
    #cal_res = { name:res for name,res in zip(metrices_names, metrices) }
    cal_res = list(zip(metrices_names, metrices))
    if return_dic:
        return cal_res
    else:
        cal_res2 = {a: b for a, b in cal_res}
        df_res = pd.DataFrame.from_dict(cal_res2)  #, orient='index')
        df_res = df_res[metrices_names]
        return df_res
Beispiel #2
0
def usr_top_chans(usr, netWindow, nchans=5):

    chanList = list(
        netWindow.loc[netWindow['user'] == usr]['channel'].unique())
    b = netWindow.groupby(['user', 'channel']).count().reset_index()
    b['weight'] = b['text']
    b = b.drop(['subtype', 'type', 'ts', 'time', 'date', 'text'], axis=1)
    G = nx.DiGraph()
    networkG = nx.from_pandas_edgelist(b,
                                       source='user',
                                       target='channel',
                                       create_using=G)
    networkG.add_weighted_edges_from(list(b.itertuples(index=False,
                                                       name=None)))
    try:
        h, a = nx.hits(networkG)
        bib = dict((k, a[k]) for k in chanList if k in a)
        chScore = pd.DataFrame.from_dict(bib, orient='index')
        chScore.columns = ['hScore']
        chScore = chScore.sort_values(by='hScore', ascending=False)
    except:
        h, a = nx.hits(networkG, tol=1e-01)
        bib = dict((k, a[k]) for k in chanList if k in a)
        chScore = pd.DataFrame.from_dict(bib, orient='index')
        chScore.columns = ['hScore']
        chScore = chScore.sort_values(by='hScore', ascending=False)

    return (chScore.iloc[0:nchans])
Beispiel #3
0
def get_hubs(c, method):
    try:
        if method == 'eigenvector':
            centralities = nx.eigenvector_centrality(c, weight='weight', max_iter=1000)
        elif method == 'pagerank':
            centralities = nx.pagerank(c, max_iter=1000)
        elif method == 'hits_hub':
            centralities = nx.hits(c, max_iter=1000)[1]
        elif method == 'hits_authority':
            centralities = nx.hits(c, max_iter=1000)[0]
        elif method == 'betweenness':
            centralities = nx.betweenness_centrality(c, weight='weight')
        elif method == 'katz':
            centralities = nx.katz_centrality(c, weight='weight', max_iter=10000)
        else:
            raise NameError
    except nx.PowerIterationFailedConvergence:
        return None

    max_hub = max(centralities.items(), key=itemgetter(1))[1]
    hubs = [node for node in centralities if abs(centralities[node]-max_hub)<=0.001]
    if len(hubs) == len(c.nodes()):
        return 'all nodes'
    else:
        return (', '.join(hubs))
Beispiel #4
0
def set_score(G, context, k, seed):
    if callable(context):
        score = context(G)
    elif context == 'eigenvector':
        score = nx.eigenvector_centrality(G, max_iter=1000000)
    elif context == 'degree':
        score = nx.degree_centrality(G)
    elif context == 'betweenness':
        score = betweenness_by_component(G, k, seed)
    elif context == 'hub':
        H = nx.DiGraph(G)
        score = nx.hits(max_iter=1000000)[0]
    elif context == 'authority':
        H = nx.DiGraph(G)
        score = nx.hits(max_iter=1000000)[1]
    elif context == 'clustering':
        score = nx.clustering(G)
    elif context == 'clustering_small':
        score = nx.clustering(G)
        for node in score.keys():
            score[node] = 1 - score[node]
    elif context is None:
        score = {node: 0. for node in G.nodes}

    nx.set_node_attributes(G, score, name='score')
    return G
def rank_nodes_baselines(graph, method="katz", limit=20) :

    # If 'graph' is a string then a path was provided, so we load the graph from it
    if (isinstance(graph, basestring)) :
        graph = nx.read_gexf(graph, node_type=int)

    if method=="katz" :
        r = katz_centrality(graph, alpha=0.01, beta=1.0)
    elif method=="hits_hub" :
        hubs, auth = nx.hits(graph, max_iter=500)
        r = hubs
    elif method=="hits_auth" :
        hubs, auth = nx.hits(graph, max_iter=500)
        r = auth
    else :
        raise ValueError("Invalid method parameter: '%s'" % method)


    rank = sorted(r.items(), key=lambda (k,v):v, reverse=True)

    results = []
    for node_id, score in rank :
        if graph.node[node_id]["type"]=="paper" :
            results.append((node_id, graph.node[node_id]["paper_id"], score))

        if len(results) == limit :
            break

    return results
Beispiel #6
0
def run(edges, show=False):
    G=nx.DiGraph()
    #G.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)])
    G.add_edges_from(edges)
    if show:
        nx.draw(G, pos=nx.spring_layout(G))
        plt.show()
        nx.write_dot(G,'./graph.dot')
        # dot -n -Tpng graph.dot >graph.png
    print nx.hits(G,max_iter=10**3) #tol=1e-4)
    print nx.pagerank(G)
Beispiel #7
0
 def test_hits_not_convergent(self):
     G = nx.path_graph(50)
     with pytest.raises(nx.PowerIterationFailedConvergence):
         nx.hits_scipy(G, max_iter=1)
     with pytest.raises(nx.PowerIterationFailedConvergence):
         _hits_python(G, max_iter=1)
     with pytest.raises(nx.PowerIterationFailedConvergence):
         nx.hits_scipy(G, max_iter=0)
     with pytest.raises(nx.PowerIterationFailedConvergence):
         _hits_python(G, max_iter=0)
     with pytest.raises(ValueError):
         nx.hits(G, max_iter=0)
     with pytest.raises(sp.sparse.linalg.ArpackNoConvergence):
         nx.hits(G, max_iter=1)
Beispiel #8
0
def answer_seven():

    # Your Code Here

    # get all the hub scores and authority scores
    HITS_scores = nx.hits(G2)

    # get all the hub scores
    hub_score_list = nx.hits(G2)[0]

    # get all the authority scores
    authority_score_list = nx.hits(G2)[1]

    return (hub_score_list['realclearpolitics.com'],
            authority_score_list['realclearpolitics.com'])  # Your Answer Here
Beispiel #9
0
def webpage_rank(features,graph,method,edges):

    if method.lower() == "hits_a":
        h, a = nx.hits(graph)
        return sorted(a.items(), key=lambda x: x[1], reverse=True)
    elif method.lower() == "hits_h":
        h, a = nx.hits(graph)
        return sorted(h.items(), key=lambda x: x[1], reverse=True)
    elif method.lower() == "leaderrank":
        lr = leaderrank(graph)
        #print("leaderrank+++++++++++",lr.items())
        return sorted(lr.items(), key=lambda item: item[1], reverse=True)
    else:   ###trustrank
        tr = trustrank(features,edges)
        return sorted(tr.items(), key=lambda item: item[1], reverse=True)
Beispiel #10
0
 def test_hits(self):
     G = self.G
     h, a = networkx.hits(G, tol=1.e-08)
     for (x, y) in zip(sorted(h.values()), self.G.h):
         assert_almost_equal(x, y, places=5)
     for (x, y) in zip(sorted(a.values()), self.G.a):
         assert_almost_equal(x, y, places=5)
Beispiel #11
0
 def test_empty(self):
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.hits_scipy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
    def __init__(self, all_user_check_ins, network, current_user):
        G = nx.DiGraph()
        for user in all_user_check_ins:
            for check_in in all_user_check_ins[user]:
                venue = check_in["venue_id"]
                if user not in G.nodes():
                    G.add_node(user)
                if venue not in G.nodes():
                    G.add_node(user)
                if (user, venue) not in G.edges():
                    G.add_edge(user, venue, weight=1)
                else:
                    current_weight = G.get_edge_data(user, venue)["weight"]
                    G.add_edge(user, venue, weight=current_weight + 1)
        (hub_scores, authority_scores) = nx.hits(G)
        self.authority_scores = authority_scores
        self.user = current_user
        self.user_check_ins = all_user_check_ins[current_user]
        self.network = network

        friend_count = {}
        for user in network:
            friend_count[user] = len(network[user])

        self.friend_count = friend_count
Beispiel #13
0
 def test_hits(self):
     G=self.G
     h,a=networkx.hits(G,tol=1.e-08)
     for n in G:
         assert_almost_equal(h[n],G.h[n],places=4)
     for n in G:
         assert_almost_equal(a[n],G.a[n],places=4)
def get_network_features_for_nodes(network):
    feature_dictionaries = []
    print("Calculating node degrees...")
    feature_dictionaries.append(dict(nx.degree(network)))
    feature_dictionaries.append(dict(nx.degree(network, weight='weight')))
    print("Calculating degree centrality...")
    feature_dictionaries.append(dict(nx.degree_centrality(network)))
    print("Calculating closeness centrality...")
    feature_dictionaries.append(dict(nx.closeness_centrality(network)))
    feature_dictionaries.append(dict(nx.closeness_centrality(network, distance='distance')))
    print("Calculating betweenness centrality...")
    feature_dictionaries.append(dict(nx.betweenness_centrality(network)))
    feature_dictionaries.append(dict(nx.betweenness_centrality(network, weight='distance')))
    print("Calculating clustering coefficients...")
    feature_dictionaries.append(dict(nx.clustering(network)))
    feature_dictionaries.append(dict(nx.clustering(network, weight='weight')))
    print("Performing HITS and calculating hub scores...")
    feature_dictionaries.append(dict(nx.hits(network)[0]))
    print("Performing PageRank and calculating scores...")
    feature_dictionaries.append(dict(nx.pagerank(network, weight=None)))
    feature_dictionaries.append(dict(nx.pagerank(network, weight='weight')))

    features = {}
    for node in network.nodes():
        features[node] = [feature_dictionary[node] for feature_dictionary in feature_dictionaries]

    return features
Beispiel #15
0
def pagerank_hits():
    conn = sqlite3.connect("zhihu.db")
    #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn)
    following_data = pd.read_sql(
        'select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)',
        conn)
    conn.close()

    G = nx.DiGraph()
    cnt = 0
    for d in following_data.iterrows():
        G.add_edge(d[1][0], d[1][1])
        cnt += 1
    print 'links number:', cnt
    pylab.figure(0)
    nx.draw_networkx(G)
    pylab.show()

    # PageRank
    pr = nx.pagerank(G)
    prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True)
    print 'pagerank top 100:\n'
    for p in prsorted[:100]:
        print p[0], p[1]

    # HITS
    hub, auth = nx.hits(G)
    print 'hub top 100:\n'
    for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]:
        print h[0], h[1]
    print '\nauth top 100:\n'
    for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]:
        print a[0], a[1]
Beispiel #16
0
def calc_nodesize(G, attr="a_score", min_size=1000, max_size=5000):
    if type(attr) != str and type(attr) != unicode:
        normal_size = max_size - min_size
        normal_size = attr
        print "all size uniformed"
        return dict([(node_no, normal_size) for node_no in G.node])

    a_scores, h_scores = nx.hits(G)
    if attr == "a_score":
        use_vals = a_scores
    elif attr == "h_score":
        use_vals = h_scores
    else:
        print "invalid attribute"
        return

    max_val = max(use_vals.values())
    size_dict = dict()
    for node_no, node_attr in G.nodes(data=True):
        val = node_attr.get(attr)
        if val == None:
            size = min_size / 2
        else:
            size = (val / max_val) * (max_size - min_size) + min_size
        size_dict[node_no] = size
    return size_dict
    def compute_hits(self):
        G = nx.DiGraph()
        file = open(self.pagerank_temp_file, 'r')
        for line in file:
            l = line.split(",")
            influenced = l[0].split("\n")[0]
            category = l[1].split("\n")[0]
            G.add_edge(influenced, category)

        pr = nx.hits(G)
        p1 = pr[0]
        sorted_p1 = sorted(p1.items(), key=operator.itemgetter(1))
        sorted_p1.reverse()

        p2 = pr[1]
        sorted_p2 = sorted(p2.items(), key=operator.itemgetter(1))
        sorted_p2.reverse()

        counter = 0
        to_write = ''
        for i in sorted_p2:
            #print(i)
            to_write = to_write + str(i) + '\n'
            if counter == 20: break
            counter += 1

        self.write_txt_file(self.normal_hits_file, to_write)
Beispiel #18
0
 def test_hits(self):
     G=self.G
     h,a=networkx.hits(G,tol=1.e-08)
     for n in G:
         assert_almost_equal(h[n],G.h[n],places=4)
     for n in G:
         assert_almost_equal(a[n],G.a[n],places=4)
Beispiel #19
0
def answer_nine():
    import operator
    hits = nx.hits(G2)
    return [
        a for a, b in sorted(
            hits[1].items(), key=operator.itemgetter(1), reverse=True)[0:5]
    ]
Beispiel #20
0
def answer_seven():

    hits = nx.hits(G2)
    hub_score = hits[0]['realclearpolitics.com']
    authority_score = hits[1]['realclearpolitics.com']

    return (hub_score, authority_score)
def pagerank_hits():
    conn = sqlite3.connect("zhihu.db")     
    #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn)        
    following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn)        
    conn.close()
    
    G = nx.DiGraph()
    cnt = 0
    for d in following_data.iterrows():
        G.add_edge(d[1][0],d[1][1])
        cnt += 1
    print 'links number:', cnt
    pylab.figure(0)
    nx.draw_networkx(G)
    pylab.show()

    # PageRank
    pr = nx.pagerank(G)
    prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True)
    print 'pagerank top 100:\n'
    for p in prsorted[:100]:
        print p[0], p[1]
    
    # HITS
    hub, auth = nx.hits(G)
    print 'hub top 100:\n'
    for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]:
        print h[0], h[1]
    print '\nauth top 100:\n'    
    for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]:     
        print a[0], a[1]
def compute_centrality(graph):
    centrality_values = nx.hits(graph)
    for node_id, centrality in centrality_values[0].items():
        graph.nodes[node_id]['hub'] = centrality
    for node_id, centrality in centrality_values[1].items():
        graph.nodes[node_id]['authority'] = centrality

    centrality_values = nx.pagerank(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['pagerank'] = centrality

    centrality_values = nx.in_degree_centrality(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['in_degree'] = centrality

    centrality_values = nx.out_degree_centrality(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['out_degree'] = centrality

    centrality_values = nx.closeness_centrality(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['closeness'] = centrality

    centrality_values = nx.betweenness_centrality(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['betweenness'] = centrality

    centrality_values = nx.pagerank(graph)
    for node_id, centrality in centrality_values.items():
        graph.nodes[node_id]['pagerank'] = centrality
Beispiel #23
0
def calc_nodesize(G,attr="a_score",weight_key="weight",min_size=1000,max_size=5000,use_bhits=True):
	if type(attr)!=str and type(attr)!=unicode:
		normal_size=max_size-min_size
		normal_size=attr
		#print "all size uniformed"
		return dict([(node_no,normal_size) for node_no in G.node])

	if attr=="a_score" or attr=="h_score":
		#a_scores,h_scores=nx.hits(G)#引数の順番違い.HCG論文提出時にこっちで出してしまっていた...
		if use_bhits is True:
			h_scores,a_scores=nx.bhits(G,weight_key=weight_key)
		else:
			h_scores,a_scores=nx.hits(G,weight_key=weight_key)
		if attr=="a_score":
			use_vals=a_scores
		elif attr=="h_score":
			use_vals=h_scores

	if attr=="in_degree":
		use_vals=dict()
		for g in G:
			use_vals[g]=G.in_degree(g)

	max_val=max(use_vals.values())
	size_dict=dict()
	for node_no,node_attr in G.nodes(data=True):
		val=node_attr.get(attr)#論文提出時はauthorityをhubのmaxで割った
		if val==None:
			size=min_size/2
		else:
			size=(val/max_val)*(max_size-min_size) + min_size
		size_dict[node_no]=size
	return size_dict
def salary_predictions():
    df = pd.DataFrame(index=nx.nodes(G),
                      columns=['Department', 'ManagementSalary'])
    for x, y in G.node.items():
        df.loc[x] = y

    df['Department'] = df['Department'].astype('float')
    df['ManagementSalary'] = df['ManagementSalary'].astype('float')

    hits = nx.hits(G)
    df['Hub'] = hits[0].values()
    df['Auth'] = hits[1].values()

    df_nan_free = df.dropna()

    X_train = df_nan_free['Hub'].to_numpy().reshape(-1, 1)

    y_train = df_nan_free['ManagementSalary']

    X_predict = df[(df.index.isin(df.dropna().index) == False)]['Hub']

    log_reg = LogisticRegression().fit(X_train, y_train)
    predict = log_reg.predict_proba(X_predict.to_numpy().reshape(-1, 1))

    return pd.Series(predict[:, 1], index=X_predict.index)
Beispiel #25
0
 def test_hits(self):
     G=self.G
     h,a=networkx.hits(G,tol=1.e-08)
     for (x,y) in zip(sorted(h.values()),self.G.h):
         assert_almost_equal(x,y,places=5)
     for (x,y) in zip(sorted(a.values()),self.G.a):
         assert_almost_equal(x,y,places=5)
Beispiel #26
0
def search():
    nodes = []
    links = []
    MDG = nx.read_pajek('./static/football.net');
    G = nx.DiGraph(MDG)
    gn = G.nodes()
    cluster = nx.clustering(G)
    pagerank = nx.pagerank(G)
    hub, authority = nx.hits(G)
    lgn = list(gn)
    print(G)
    for u in gn:
        node = {}
        node['name'] = u
        node['intro'] = '该节点的聚集系数为:'+str(format(cluster[u],'.3f'))+"<br>"+\
                        '该节点的pagerank为:'+str(format(pagerank[u],'.3f'))+"<br>"+\
                        '该节点的权威值为:'+str(format(authority[u],'.3f'))+"<br>"+ \
                        '该节点的中枢值为:' + str(format(hub[u], '.3f'))
        nodes.append(node)
    for u, v in G.edges():
        link = {}
        link['source'] = lgn.index(u)
        link['target'] = lgn.index(v)
        links.append(link)
    return render_template('Generation_Two/football.html',nodes = nodes,links = links)
Beispiel #27
0
def plotHubsVsViews(G):
    hubs, authorities = nx.hits(G, max_iter=10000, tol=1.0e-10)
    print "Finished Hits algorithm"

    sortedHubs = sorted(hubs.items(), key=lambda tuple: tuple[1], reverse=True)
    sortedViews = sorted(views, key=views.get, reverse=True)

    top100Views = sortedViews[0:100]
    print top100Views
    for i in range(25):
        hub = sortedHubs[i][0]
        count = 0
        print G.out_edges(hub)
        for edge in G.out_edges(hub):
            if edge[1] in top100Views:
                count += 1
        print "hub: " + str(hub) + " top 100: " + str(count)

    x, y = [views[i] for i in views], [hubs[j] for j in views]
    plt.xlabel('Views (log)')
    plt.ylabel('Hub Ranking')
    plt.title('Hub Ranking vs View Count For Comedy Videos')
    config = plt.gca()
    config.scatter(x, y)
    plt.show()
def realhits(G):
	rhits = {}
	hubs , auths =nx.hits(G)
	for pack in hubs:
		rhits[pack] = (int(hubs[pack]*1000000000000),  int(auths[pack]*1000000000000))

	return rhits
Beispiel #29
0
def prSH(name, graph):
    print('---- result of', name, '----')

    try:
        pagerank = nx.pagerank(graph, alpha=1.0)
    except:
        pagerank = 0
    surf = nx.pagerank(graph, alpha=0.85)
    print('name', 'pagerank : ', pagerank)
    print('name', 'sf=0.85  : ', surf)
    h, a = nx.hits(graph)
    print('name', 'HITS hub : ', h)
    print('name', 'HITS auth: ', a)
    print()

    fig = plt.figure()
    fig.suptitle('---- result of ' + name + ' ----')
    count = 1
    for k, d in {
            'pagerank': pagerank,
            'sf=0.85': surf,
            'HITS hub': h,
            'HITS auth': a
    }.items():
        if d == 0:
            count += 1
            continue

        plt.subplot(2, 2, count)
        plt.subplots_adjust(wspace=0.4, hspace=0.6)
        plt.title(k)
        plt.bar(range(len(d)), list(d.values()), tick_label=list(d.keys()))
        count += 1
    plt.savefig(name + '.png')
    plt.show()
Beispiel #30
0
def plot_hubs_and_authorities(G):
    """
    cria um bar plot
    """
    ha = nx.hits(G)
    print G,len(ha)
    hubs = {k:v for k,v in ha[0].items() if v!=0.0}
    auth = {k:v for k,v in ha[1].items() if v!=0.0}
    hubs.pop('None')
    # Sorting by value
    hubs = sorted(hubs.items(),key=lambda i:i[1],reverse=1)
    auth = sorted(auth.items(),key=lambda i:i[1],reverse=1)

    # Hubs
    fig = P.figure()
    ax = fig.add_subplot(111)
    labels = [l[0].decode('utf8') for l in hubs]
    vals = [l[1] for l in hubs]
    pos = np.arange(len(hubs))
    ax.barh(pos,vals,align='center',height=.8,)
    P.xlabel('Hub Statistic')
    P.ylabel('Ministers')
    P.yticks(pos,labels,size='small')

    # Authorities
    fig = P.figure()
    ax = fig.add_subplot(111)
    auth = auth[:25] # only the most cited laws
    labels = ['$%s$'%l[0] for l in auth]
    vals = [l[1] for l in auth]
    pos = np.arange(len(auth))
    ax.barh(pos,vals,align='center',height=.8)
    P.xlabel('Authority Statistic')
    P.ylabel('Law id')
    P.yticks(pos,labels)
Beispiel #31
0
    def node_treatments(self):
        G=self.graph
        tools.progress(0,100,"Degree de centralité")
        if len(nx.get_node_attributes(G,"centrality"))==0:
            nx.set_node_attributes(G,nx.degree_centrality(G),"centrality")

        tools.progress(20, 100, "Degree de betweeness")
        if len(nx.get_node_attributes(G, "betweenness")) == 0:
            nx.set_node_attributes(G, nx.betweenness_centrality(G), "betweenness")

        tools.progress(40, 100, "Degree de closeness")
        if len(nx.get_node_attributes(G, "closeness")) == 0:
            nx.set_node_attributes(G, nx.closeness_centrality(G), "closeness")

        tools.progress(60, 100, "Page rank")
        try:
            if len(nx.get_node_attributes(G, "pagerank")) == 0:
                nx.set_node_attributes(G, nx.pagerank(G), "pagerank")
        except:
            pass

        tools.progress(80, 100, "Hub and autorities")
        try:
            if len(nx.get_node_attributes(G, "hub")) == 0:
                hub, aut = nx.hits(G)
                nx.set_node_attributes(G, hub, "hub")
                nx.set_node_attributes(G, aut, "autority")
        except:
            pass

        #tools.progress(90, 100, "Excentricity")
        #nx.set_node_attributes(G, nx.eccentricity(G), "eccentricity")

        self.node_treatment=True
        tools.progress(100, 100, "Fin des traitements")
def analyze_graph(G):    
    #centralities and node metrics
    out_degrees = G.out_degree()
    in_degrees = G.in_degree()
    betweenness = nx.betweenness_centrality(G)
    eigenvector = nx.eigenvector_centrality_numpy(G)
    closeness = nx.closeness_centrality(G)
    pagerank = nx.pagerank(G)
    avg_neighbour_degree = nx.average_neighbor_degree(G)
    redundancy = bipartite.node_redundancy(G)
    load = nx.load_centrality(G)
    hits = nx.hits(G)
    vitality = nx.closeness_vitality(G)
    
    for name in G.nodes():
        G.node[name]['out_degree'] = out_degrees[name]
        G.node[name]['in_degree'] = in_degrees[name]
        G.node[name]['betweenness'] = betweenness[name]
        G.node[name]['eigenvector'] = eigenvector[name]
        G.node[name]['closeness'] = closeness[name]
        G.node[name]['pagerank'] = pagerank[name]
        G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name]
        G.node[name]['redundancy'] = redundancy[name]
        G.node[name]['load'] = load[name]
        G.node[name]['hits'] = hits[name]
        G.node[name]['vitality'] = vitality[name]
        
    #communities
    partitions = community.best_partition(G)
    for member, c in partitions.items():
        G.node[member]['community'] = c   
    
    return G
def Hits_algorithem(FinalsocialNetwork,nameList):
    G = nx.DiGraph()
    for x in FinalsocialNetwork:
        index =0
        for y in x:
            if not index==0:
                if int(y)>0:
                    G.add_edges_from([(x[0],nameList[index-1])])
            index=index+1

    plt.figure(figsize=(100, 100))
    nx.draw_networkx(G, with_labels=True)

    hubs, authorities = nx.hits(G, max_iter= 50000000, normalized=True)
    # The in-built hits function returns two dictionaries keyed by nodes
    # containing hub scores and authority scores respectively.

    print("Hub Scores: ", hubs)
    for x in nameList:
        print(x ,":" , hubs.get(x))
    print("authorities Scores: ", authorities)
    for y in nameList:
        print(y, ":", authorities .get(y))

    return hubs,authorities
Beispiel #34
0
    def calculate_centrality(self, mode='hits'):
        if mode == 'degree_centrality':
            a = nx.degree_centrality(self.G)
        else:
            h, a = nx.hits(self.G)

        max_a_u, min_a_u,max_a_v,min_a_v = 0, 100000, 0, 100000

        for node in self.G.nodes():
            if node[0] == "u":
                if max_a_u < a[node]:
                    max_a_u = a[node]
                if min_a_u > a[node]:
                    min_a_u = a[node]
            if node[0] == "i":
                if max_a_v < a[node]:
                    max_a_v = a[node]
                if min_a_v > a[node]:
                    min_a_v = a[node]

        for node in self.G.nodes():
            if node[0] == "u":
                if max_a_u-min_a_u != 0:
                    self.authority_u[node] = (float(a[node])-min_a_u) / (max_a_u-min_a_u)
                else:
                    self.authority_u[node] = 0
            if node[0] == 'i':
                if max_a_v-min_a_v != 0:
                    self.authority_v[node] = (float(a[node])-min_a_v) / (max_a_v-min_a_v)
                else:
                    self.authority_v[node] = 0
def answer_seven():

    # Your Code Here
    hits = nx.hits(G2)

    return hits[0]['realclearpolitics.com'], hits[1][
        'realclearpolitics.com']  # Your Answer Here
Beispiel #36
0
 def test_empty(self):
     numpy = pytest.importorskip('numpy')
     G = networkx.Graph()
     assert networkx.hits(G) == ({}, {})
     assert networkx.hits_numpy(G) == ({}, {})
     assert networkx.authority_matrix(G).shape == (0, 0)
     assert networkx.hub_matrix(G).shape == (0, 0)
def answer_nine():

    # Your Code Here
    auts = nx.hits(G2)[1]

    return sorted(auts.keys(), key=lambda x: auts[x],
                  reverse=True)[:5]  # Your Answer Here
Beispiel #38
0
def hits_influence(adj_matrix, k):
    """
    calculates the influence based on HITS
    Args:
        adj_matrix: data adjacency matrix
        k: budget to pick top-k influentail edges
    Returns:
        S: list of top-k influential edges 
    """
    B = get_graph(adj_matrix)
    S = []
    while len(S) <= k:
        scores = nx.hits(B)
        hub = scores[0]
        auth = scores[1]

        for e1, e2 in B.edges():
            B[e1][e2]['influence'] = hub[e1] + hub[e2] + auth[e1] + auth[e2]

        max_e = [(e[0], e[1]) for e in sorted(list(B.edges(data=True)),
                                              key=lambda x: x[2]['influence'],
                                              reverse=True)][:1][0]
        B.remove_edge(max_e[0], max_e[1])
        S.append(max_e)
    return S
def answer_eight():

    # Your Code Here
    hubs = nx.hits(G2)[0]

    return sorted(hubs.keys(), key=lambda x: hubs[x],
                  reverse=True)[:5]  # Your Answer Here
def p_original(fm, q_id, pqw, pwords, dls, pj, res, k1, k2, b, avdl, N, Nd, alpha, beta, gamma):
    """
    Fa il retrieve per la singola query
    :param fm: frequency matrix
    :param q_id: query id
    :param pqw: lista di query words per questa query
    :param pwords: dizionario delle parole
    :param dls: lunghezze dei documenti
    :param pj: indice dove scrivere in res
    :param res: matrice di output
    :param k1: param per bm25
    :param k2: param per bm25
    :param b: param per bm25
    :param avdl: lunghezza media dei documenti
    :param N: numero dei documenti
    :return: niente, salva la roba su res
    """
    # ignorare questa parte, fate finta che funzioni, alla fine avete il risultato di bm25
    actual_qw = []
    indexes_of_qws = []
    for qw in pqw:
        if qw in pwords:
            actual_qw.append(qw)
            indexes_of_qws.append(pwords[qw])

    indexes_of_qws = np.array(indexes_of_qws)
    tmp = np.arange(0, fm.shape[1])
    indexes_of_qws = np.in1d(tmp, indexes_of_qws)
    red_fm = fm[:, indexes_of_qws]
    idfs = np.ones(shape=(red_fm.shape[0], red_fm.shape[1]))
    tmp2 = np.copy(red_fm)
    tmp2[tmp2 != 0] = 1
    nis = tmp2.sum(axis=0)
    Ns = np.ones(red_fm.shape[1])*N
    idfs = np.log((Ns - nis + 0.5)/(nis + 0.5))
    Ks = k1*((1-b) + b*(dls/avdl))
    tf1s = red_fm*(k1 + 1)/(np.tile(Ks, (red_fm.shape[1], 1)).T + red_fm)
    tf2s = np.ones(red_fm.shape)
    ress = np.multiply(idfs, tf1s)
    ress = ress.sum(axis=1)
    idss = np.arange(0, red_fm.shape[0])

    idss_indx = np.argsort(ress)[::-1]

    idss = idss[idss_indx]
    ress = ress[idss_indx]

    idss_N = idss[0:Nd]
    ress_N = ress[0:Nd]

    G = get_graph_N(idss_N)

    try:
        auths, hubs = nx.hits(G)
    except nx.exception.NetworkXError, e:
        auths = {str(nid): 1.0 for nid in idss_N}
        hubs = {str(nid): 1.0 for nid in idss_N}
Beispiel #41
0
 def test_empty(self):
     try:
         import numpy
     except ImportError:
         raise SkipTest('numpy not available.')
     G=networkx.Graph()
     assert_equal(networkx.hits(G),({},{}))
     assert_equal(networkx.hits_numpy(G),({},{}))
     assert_equal(networkx.authority_matrix(G).shape,(0,0))
     assert_equal(networkx.hub_matrix(G).shape,(0,0))
Beispiel #42
0
def hits_algo(adj_matrix,hub_score):  
    # INPUT: Initial hub_score, authorities score and adjacency matrix.
    # OUTPUT: Converged
    print "Running HITS algorithm..."
    graph = nx.to_networkx_graph(adj_matrix)
    # print graph
    nstart = dict([(i, hub_score[i]) for i in xrange(len(hub_score))])
    # print nstart
    # return nx.hits(graph)
    return nx.hits(graph,nstart=nstart)
def extract_social_features(df_comments):
    socialVector = np.empty([df_comments.shape[0],8])
    index = 0
        
    graph = networkx.DiGraph()   
    
    userdict = dict()
    for _, row in df_comments.iterrows():
        userdict[row['comment_id']] = row['author']
        
    for user in set(userdict.values()):
        graph.add_node(user)
        
         
    for _, row in df_comments.iterrows():
        if not userdict.has_key(row['thread_root_id']):
            continue
        
        source = userdict[row['comment_id']]
        dest = userdict[row['thread_root_id']]
        if source == dest:
            continue
        graph.add_edge(source, dest)
    
    pageranker = networkx.pagerank(graph, alpha=0.85)
    hubs, auths = networkx.hits(graph)
    
    author_groupby = df_comments.groupby('author')
    user_age_dict = {}
    user_nr_posts_dict = {}
    for _,group in author_groupby:
        first_date = datetime.fromtimestamp(mktime(group.date.values[0]))
        last_date = datetime.fromtimestamp(mktime(group.date.values[-1]))
        diff = last_date - first_date
        days = diff.days
        user_age_dict[group.author.values[0]] = days + 1
        user_nr_posts_dict[group.author.values[0]] = len(group)
        
    for ix, row in df_comments.iterrows():            
        user = userdict[row['comment_id']]
        socialVector[ix][0] = graph.in_degree(user) #In Degree
        socialVector[ix][1] = graph.out_degree(user) #Out Degree
        socialVector[ix][2] = user_age_dict[user] #User Age
        socialVector[ix][3] = user_nr_posts_dict[user] #Nr of Posts
        socialVector[ix][4] = user_nr_posts_dict[user]/float(user_age_dict[user]) # Postrate
        socialVector[ix][5] = pageranker[user] # Pagerank
        socialVector[ix][6] = hubs[user] # Pagerank
        socialVector[ix][7] = auths[user] # Pagerank
    
        index += 1
        if index % 1000 == 0:
            print "extracted", index, "values"
        
                
    return socialVector
Beispiel #44
0
def test_hits(testgraph):
    """
    Test hits algorithm
    """

    h0, a0 = nx.hits(testgraph[0], max_iter=100)
    h1, a1 = sg.links.hits(testgraph[1], max_iter=100)

    for u in testgraph[0].nodes_iter():
        assert abs(h0[u] - h1[u]) < 1e-5
        assert abs(a0[u] - a1[u]) < 1e-5
def getHits(G):
    print "Calculating HITS"
    ret = []
    try:
        hubs, auths = nx.hits(G)
        for pack in hubs:
            ret.append((pack, (hubs[pack], auth[pack])))

    except nx.NetworkXError:
        print "HITS failed"
    return ret
Beispiel #46
0
	def __link_analysis(self): # recalculates hub and authority rate

		# insert check for existing hub? reduce computational time
		nstart = {}
		for name in nx.nodes(self.OG):
			nstart[name] = self.OG.node[name]['normweightmax']
		
		h, a = nx.hits(self.OG, max_iter = 30)
		for node in self.OG.nodes():
			self.OG.node[node]['hub'] = h[node]
			self.OG.node[node]['authority'] = a[node]

		#for WOG
		nstart2 = {}
		for name in nx.nodes(self.WOG):
			nstart2[name] = self.WOG.node[name]['normweightmax']
		
		h2, a2 = nx.hits(self.WOG, max_iter = 30)
		for node in self.WOG.nodes():
			self.WOG.node[node]['hub'] = h2[node]
			self.WOG.node[node]['authority'] = a2[node]
Beispiel #47
0
	def __link_analysis(self):
		nstart = {}
		for name in nx.nodes(self.OG):
			nstart[name] = self.OG.node[name]['normweightmax']
		
		pr = nx.pagerank(self.OG, weight = "normweightbymax")
		h, a = nx.hits(self.OG, max_iter = 30)

		for node in self.OG.nodes():
			self.OG.node[node]['pagerank'] = pr[node]
			self.OG.node[node]['hub'] = h[node]
			self.OG.node[node]['authority'] = a[node]
Beispiel #48
0
def hits_example():
    n = 7
    #~ g = nx.wheel_graph(n)
    #~ pos = nx.spring_layout(g)
    g = nx.DiGraph()
    g.add_nodes_from(range(0,n))
    edge_list = [(0,1),(0,6),(0,5),(1,2),(1,6),(2,0),(2,1),(2,3),(3,4),(4,5),(4,6),(5,0),(5,3),(5,4)]
    g.add_edges_from(edge_list)
    hubs,auts = nx.hits(g)
    for n in range(0,n):
        print 'node',n
        print '  authority:',auts[n]
        print '  hubness:', hubs[n]
        print '  out:',g.successors(n)
        print '  in:',g.predecessors(n)
Beispiel #49
0
def get_statistics():
    """
    uses data statistics to find the most important papers in the collection.
    """
    
    
    with open(MAIN_FOLDER + "network.pkl", "r") as f:
        network = pickle.load(f)

    betweenness_dict = networkx.betweenness_centrality(network.to_undirected())
    sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True)
    betweenness = [x[0] for x in sorted_betweenness[:5]]

    pagerank_dict = networkx.pagerank(network.to_undirected())
    sorted_pagerank = sorted(pagerank_dict.items(), key=operator.itemgetter(1), reverse=True)
    pagerank = [x[0] for x in sorted_pagerank[:5]]

    hits_dict = networkx.hits(network.to_undirected())
    sorted_hits = sorted(hits_dict[0].items(), key=operator.itemgetter(1), reverse=True)
    hits = [x[0] for x in sorted_hits[:5]]

    in_degree_dict = network.in_degree()
    sorted_in_degree = sorted(in_degree_dict.items(), key=operator.itemgetter(1), reverse=True)
    in_degree = [x[0] for x in sorted_in_degree[:5]]

    community_dict = []
    for k in xrange(20):
        community_dict += list(networkx.k_clique_communities(network.to_undirected(), 21 - k))
        if community_dict:
            break

    modules = []
    for index, community in enumerate(community_dict):
        modules.append([])
        for p in community:
            modules[index].append(p)

    statistics = {'in_degree': in_degree, 'betweenness': betweenness, 'hits': hits, 'pagerank': pagerank,
                  'modules': modules}

    with open(MAIN_FOLDER + "statistics.pkl", "wa") as f:
        pickle.dump(statistics, f)
Beispiel #50
0
    def recalculate(self):
    
    
        authorities = nx.hits(self.DG)[1]
        
        # Convert to log scale
        authorities_log = {}
        
        for user,value in authorities.items():
        
            v = value * 10**30
        
            if value > 0:
                v = math.log(v)
            else:
                v = 0
        
            authorities_log[user] = abs(int(v))
            
            
        # Normalise to 100
        authorities_norm = {}
        max_user = max(authorities_log.iteritems(), key=operator.itemgetter(1))[0]
        max_val = authorities_log[max_user]
        
        r = 100/float(max_val)
        
        for user,value in authorities_log.items():
            
            authorities_norm[user] = int(value*r)
 
        authorities_norm[max_user] = 100

        # Clear existing values
        
        sql = "UPDATE tracker_users set karma = 0"
        self.queryDB(sql, ())
        
        # Save values
        for user,karma in authorities_norm.items():
            sql = "UPDATE tracker_users SET karma = %s WHERE username = %s"
            self.queryDB(sql, (karma, user))
Beispiel #51
0
 def hits(self,gr):
     h,a = nx.hits(gr,max_iter = 300)
     return h        
Beispiel #52
0
def champion_matrix_rank(champion_matrix_df, criteron, norm=False):
    '''
    champion_matrix_df: pd.DataFrame, kill/death/assist counts between champions, 
                        (a,b)=i means a kills / killed by / assists b for i times 
    criteron: 'count', 'eigen', 'eigen_ratio', 'eigen_diff', 'pagerank', 'hits' 
    norm: True, False
    TODO: eigen_ratio and eigen_diff criteron can only be used in kill matrix, in assist do not make sense (would be forbidden in future)
    '''
    # must be float for linalg.eigs 
    champion_matrix = champion_matrix_df.as_matrix().astype(float)

    if norm == 'row': # use row sum to normalize
        row_sum = champion_matrix.sum(axis=1)
        champion_matrix = champion_matrix / row_sum[:, np.newaxis] # pandas broadcast
    
    # Count
    if criteron == 'count':
        print("Champion Rank by counts:")
        rank_df = pd.DataFrame()
        rank_df['champion'] = pd.Series(champion_matrix_df.index)
        rank_df['count'] = champion_matrix.sum(axis=1)
        print_full(rank_df.sort_values(by='count', ascending=False))

    # ED
    elif criteron == 'eigen':
        print("Champion Rank by eigenvector centrality")
        rank_df = pd.DataFrame()
        rank_df['champion'] = pd.Series(champion_matrix_df.index)
        # eigenvector with largest eigenvalue (k=1), sometimes all negative, sometimes all positive, absolute values unchanged
        rank_df['eigen_1'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1]))
        print_full(rank_df.sort_values(by='eigen_1', ascending=False))

        #rank_df['eigen_2'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=2)[1][:,1]))      
        #print_full(rank_df.sort_values(by='eigen_2', ascending=False))

    # ED Ratio, eigen(M)/eigen(M.T)
    elif criteron == 'eigen_ratio':
        print("Champion Rank by eigenvector centrality ratio")
        rank_df = pd.DataFrame()
        rank_df['champion'] = pd.Series(champion_matrix_df.index)
        rank_df['eigen'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1]))
        rank_df['eigen_t'] = pd.DataFrame(abs(linalg.eigs(champion_matrix.transpose(), k=1)[1]))
        rank_df['eigen_ratio'] = rank_df['eigen'] / rank_df['eigen_t']
        print_full(rank_df.sort_values(by='eigen_ratio', ascending=False))  

    # ED Diff, eigen(M)-eigen(M.T)
    elif criteron == 'eigen_diff':
        print("Champion Rank by eigenvector centrality difference")
        rank_df = pd.DataFrame()
        rank_df['champion'] = pd.Series(champion_matrix_df.index)
        rank_df['eigen'] = pd.DataFrame(abs(linalg.eigs(champion_matrix, k=1)[1]))
        rank_df['eigen_t'] = pd.DataFrame(abs(linalg.eigs(champion_matrix.transpose(), k=1)[1]))
        rank_df['eigen_diff'] = rank_df['eigen'] - rank_df['eigen_t']
        print_full(rank_df.sort_values(by='eigen_diff', ascending=False))   

    # PageRank: similar results with eigenvector centrality
    elif criteron == 'pagerank':
        print("Champion Rank by PageRank")
        G = nx.DiGraph(champion_matrix)
        pr = nx.pagerank(G)
        rank_df = pd.DataFrame()
        rank_df['champion'] = pd.Series(champion_matrix_df.index)
        rank_df['pagerank'] = pd.DataFrame(data=list(pr.values()), index=list(pr.keys()))
        print_full(rank_df.sort_values(by='pagerank', ascending=False)) 

    # HITS:
    elif criteron == 'hits':
        print("Champion Rank by HITS")
        G = nx.DiGraph(champion_matrix)
        hub, auth = nx.hits(G)
        hub_rank_df = pd.DataFrame()
        hub_rank_df['champion'] = pd.Series(champion_matrix_df.index)
        hub_rank_df['hub'] = pd.DataFrame(data=list(hub.values()), index=list(hub.keys()))
        print_full(hub_rank_df.sort_values(by='hub', ascending=False))  
        auth_rank_df = pd.DataFrame()
        auth_rank_df['champion'] = pd.Series(champion_matrix_df.index)
        auth_rank_df['auth'] = pd.DataFrame(data=list(auth.values()), index=list(auth.keys()))
        print_full(auth_rank_df.sort_values(by='auth', ascending=False))    

    else:
        raise ValueError('Invalid criteron provided.')
Beispiel #53
0
  max_edge_distance = -1
  meta_data = True
  for arg in sys.argv:
    m = re.match(r'--max=(\d+)', arg)
    if m: n = int(m.group(1))
    m = re.match(r'--max-edge-distance=(\d+)', arg)
    if m: max_edge_distance = int(m.group(1))
    if arg == '--no-meta-data': meta_data = False

  g = read_graph(sys.argv[1])
  if '--graph' in sys.argv:
    sub = g.subgraph([v[0] for v in top(g, nx.pagerank(g), n, add_meta_data = meta_data)])
    write_graph(remove_long_edges(sub, max_edge_distance))
  elif '--pagerank' in sys.argv:
    writer = csv.writer(sys.stdout)

    keys = ['title', 'rank', 'top_250_rank', 'year', 'kind', 'imdb_id', 'rating', 'genre', 'director']
    writer.writerow(keys)
    xunicode = lambda s: unicode(s).encode('utf-8') if s else ""
    for (node, score) in top(g, nx.pagerank(g), add_meta_data = meta_data):
      writer.writerow([xunicode(g.node[node].get(k, '')) for k in keys])

  elif '--hits' in sys.argv:
    # HITS
    (hubs, authorities) = nx.hits(g)
    pp(top(g, hubs, n, meta_data))
    pp(top(g, authorities, n, meta_data))
  elif '--degree' in sys.argv:
    # degree
    pp(top(g, nx.degree(g), n, meta_data))
def compute_hubs_authorities(G):
    """For the given graph, compute hubs and authorities values for each node"""
    hits = nx.hits(G, max_iter=500)
    hubs = hits[0]
    authorities = hits[1]
    return hubs, authorities
Beispiel #55
0
G.add_weighted_edges_from(links)


nx.draw_networkx(G, posNX, width = widths)
plt.pyplot.show()
plt.pyplot.savefig('flows.png')
out.close()


#nx.draw_networkx(G,pos, width = widths, alpha = .7 )
#plt.pyplot.show()
newPOS = []
for coords in iter(posIG):
    newPOS.append(posIG[coords])


betterGraph = ig.Graph(25, newLinks, directed = True)

betterGraph.es["width"] = widths

betterGraph.vs["label"] = range(25)

layout = newPOS

ig.plot(betterGraph, layout = layout, weighted =True)



hitsHubs, hitsAuths = nx.hits(G)[0].values(), nx.hits(G)[1].values()
Beispiel #56
0
 def hits(self):
     '''h, a = hits() hub and authority'''
     return nx.hits(self._graph)
Beispiel #57
0
 def test_hits_nstart(self):
     G = self.G
     nstart = dict([(i, 1./2) for i in G])
     h, a = networkx.hits(G, nstart = nstart)
Beispiel #58
0
    rprr = nx.pagerank_numpy(R, alpha=damping, personalization=p, weight='weighted', dangling=p)
    rprr = dict(zip([G.nodes()[i] for i in rprr.keys()],rprr.values()))
    rppr[G.nodes()[nd]] = n(rprr)
    
    
nx.set_node_attributes(G, 'rppr', rppr)




#remove inorganic nodes for graph visualisation
G = graph.rm_nodes(G,inorganics)


uwdeg = G.degree()
h,a=nx.hits(G,tol=1e-8,normalized=False)
h=n(h)
a=n(a)
nx.set_node_attributes(G, 'degree', uwdeg)
nx.set_node_attributes(G, 'hubs', h)
nx.set_node_attributes(G, 'authorities', a)









Beispiel #59
0
 def test_hits_not_convergent(self):
     G = self.G
     networkx.hits(G, max_iter=0)
Beispiel #60
0
def hub_cent(G):
    centrality = nx.hits(G)
    return relevant_cen_results(centrality)