コード例 #1
0
def compare_pagerank_algorithms(graph_file_name):
    algo_name = ["PageRank-DOK", "PageRank-CSR", "PageRank-NetworkX"]
    algo_fns = [construct_sparse_graph_dictionary_of_keys, construct_sparse_graph_compressed_sparse_row, construct_sparse_graph_networkx]

    for i in range(len(algo_name)):
        print "Testing:", algo_name[i]

        start_time = time.time()
        G = algo_fns[i](graph_file_name)
        end_time = time.time()

        time_for_graph_construction = end_time - start_time

        start_time = time.time()
        if algo_name[i] == "PageRank-NetworkX":
            nx.pagerank(G)
        else:
            compute_PageRank(G)

        end_time = time.time()
        time_for_pagerank_computation = end_time - start_time
        total_time = time_for_graph_construction + time_for_pagerank_computation


        print "Time for graph, page rank and total", time_for_graph_construction, time_for_pagerank_computation, total_time
コード例 #2
0
ファイル: main.py プロジェクト: leonardolepus/pubmad
def process_data():
    file = 'data/Homo_Sapiens/EVEX_relations_9606.tab'
    t1 = time.time()
    g = import_graph(file)
    t2 = time.time()
    print 'load relations', t2 - t1
    t= transitiveness_graph(g)
    t3 = time.time()
    print 'find transitive relations', t3 - t2
    #plot(g, 'relations')
    pr = nx.pagerank(t)
    t4 = time.time()
    print 'pagerank', t4 - t3
    for node in pr:
        t.node[node]['confirming_weight'] = pr[node]
    t5 = time.time()
    print 'write pr into graph', t5 - t4
    #plot(t, 'good_confirming_relations', 'confirming_weight')
    t_rev = reverse(t)
    t6 = time.time()
    print 'reverse t', t6 - t5
    pr_rev = nx.pagerank(t_rev)
    t7 = time.time()
    print 'pagerank', t7 - t6
    for node in pr:
        t.node[node]['predicting_weight'] = pr_rev[node]
    t8 = time.time()
    print 'write pr into graph', t8 - t7
    #plot(t, 'good_predicting_relations', 'predicting_weight')
    save_data(g, 'data/g')
    save_data(t, 'data/t')
    save_data(pr, 'data/conf_pr')
    save_data(pr_rev, 'data/pre_pr')
コード例 #3
0
def create_schedule_graph(seasons,teams):
    #let's create a directional graph of teams who played each other so we can create a Page rank
    #teams =[t[1] for t in teams[1:]]
    t_lookup={int(t[0]):t[1] for t in teams[1:]}
    print teams
    teams =[int(t[0]) for t in teams[1:]]
    pr_hist={}
    pr_w_hist={}
    for year in years:
        G=nx.DiGraph()
        G.add_nodes_from(teams)
        G_w=G.copy()
        games=seasons[np.where((seasons['Season']==year))]

        for game in games:
            #add a directional endorsement from the losing team to winning team
            G.add_weighted_edges_from([(game['Lteam'],game['Wteam'],1)])
            # weight by win % squared
            G_w.add_weighted_edges_from([(game['Lteam'],game['Wteam'],(game['Wscore']/game['Lscore'])**2)])
        pr=nx.pagerank(G, alpha=0.9)
        pr_w=nx.pagerank(G_w, alpha=0.9)
        ranks=[]
        ranks_w=[]
        for r in pr:
            ranks.append((t_lookup[r],pr[r]))
        for r in pr_w:
            ranks_w.append((t_lookup[r],pr_w[r]))
        pr_hist[year]=pr
        pr_w_hist[year]=pr_w
    sorted_pr = sorted(ranks, key=lambda tup: tup[1],reverse=True)
    sorted_pr_w = sorted(ranks_w, key=lambda tup: tup[1],reverse=True)
    return pr_hist,pr_w_hist
コード例 #4
0
ファイル: windwild_rec.py プロジェクト: windwild/CodeBox
def calc():
    filepath = "/Users/windwild/Google Drive/CUHK/sina_data/user_rel.csv"
    G = nx.DiGraph()
    fp = open(filepath,"r") 
    fp.readline()
    array_list = {}
    for i in range(0,10):
        array_list['fui'] = {}
        line = fp.readline()
        line_arr = line.split('"')
        uid = line_arr[0][:-1]
        
        line = line_arr[1]
        print line
        line = line.replace("u'","'")
        print line
        items = demjson.decode(line)
        for key in items:
            array_list[key] = items[key]
        #print items['fui']
        print uid,i
        
        for follow in array_list['fui']:
            
            G.add_edge(uid,follow)
        
    fp.close()

    print nx.pagerank(G)
コード例 #5
0
 def _ppage_rank(self, u, v):
     personal = {nid: 0 for nid in self.IG.node}
     personal[u] = 1.0
     r_uv = nx.pagerank(self.IG, personalization=personal).get(v)
     personal[u] = 0
     personal[v] = 1.0
     r_vu = nx.pagerank(self.IG, personalization=personal).get(u)
     return r_uv + r_vu
コード例 #6
0
ファイル: graph_util.py プロジェクト: RonansPrograms/gothub
    def stats(self, g, degree, pagerank, bc):
        """Compute the requested stats and return as a dict."""
        options = self.options
        stats = {}
        if options.partial:
            seen = self.seen
            empty = self.empty
            nonempty_seen = [user for user in seen.keys() if user not in empty]

        # create degree CDF
        if degree:
            if options.partial:
                # The way below for computing degree only considers those for which
                # we have all the data.
                degree = [seen[user] for user in seen.keys()]
            else:
                # The method below considers all nodes, including those for which 
                # we may not have all the data.  Use w/caution on partial data sets.
                degree = nx.degree(g).values()
            stats["degree"] = {
                "type": "array",
                "values": degree
            }

        # compute PageRank.  Note: we have to ignore empties.
        if pagerank:
            start = time.time()
            if options.partial:
                pagerank_dict = nx.pagerank(g)
                nonempty_seen = [user for user in seen.keys() if user not in empty]
                pagerank = ([pagerank_dict[user] for user in nonempty_seen])
            else:
                # Assumption: no isolated nodes
                pagerank = nx.pagerank(g).values()
            duration = time.time() - start
            print "time to gen pagerank: %0.3f sec" % duration
            #print pagerank
            stats["pagerank"] = {
                "type": "array",
                "values": pagerank
            }
        
        # compute betweenness centrality  - should empties get added back to CDF?
        if bc:
            start = time.time()
            bc_dict = nx.betweenness_centrality(g)
            if options.partial:
                bc = ([bc_dict[user] for user in nonempty_seen])
            else:
                bc = bc_dict.values()
            duration = time.time() - start
            print "time to gen betweenness centrality: %0.3f sec" % duration
            stats["bc"] = {
                "type": "array",
                "values": bc
            }

        return stats
コード例 #7
0
    def train_weight(self,doc):
        self.type = 1
        self.key_sentences = []
        self.key_weight = []
        
        (self.sentences,self.words_all_filters,weight) = self.seg.segment_sentences_weight(text=doc)
        #print doc['title']
        (title) = self.seg.segment_sentence(sentence=doc['title'])
        #print title
        source = self.words_all_filters
        sim_func = self._get_similarity_standard
        
        sentences_num = len(source)
        
        self.graph = np.zeros((sentences_num, sentences_num))

        #import pdb

        weights = []
        summary = 0
        #print self.sentences[0]
        #pdb.set_trace()
        for x in xrange(sentences_num):
            lanlan = sim_func(source[x], title[0])
            w=weight[x]*lanlan
            weights.append(x)
            weights.append(w)
            summary+=w
            #print w
        if summary!=0 :
            dicts = {weights[i]: weights[i+1]/summary for i in range(0, len(weights), 2)}

        #pdb.set_trace()
        for x in xrange(sentences_num):
            for y in xrange(x, sentences_num):
                similarity = sim_func(source[x], source[y])
                self.graph[x, y] = similarity
                self.graph[y, x] = similarity
        #pdb.set_trace()        
#         for x in xrange(sentences_num):
#             row_sum = np.sum(self.graph[x, :])
#             if row_sum > 0:
#                 self.graph[x, :] = self.graph[x, :] / row_sum
                
        nx_graph = nx.from_numpy_matrix(self.graph)
        if summary!=0:
            scores = nx.pagerank(G=nx_graph,personalization=dicts)
        else:
            scores = nx.pagerank(G=nx_graph)
        sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)
        
        # print sorted_scores
        
        for index, _ in sorted_scores:
            self.key_sentences.append(self.sentences[index])
            self.key_weight.append(weight[index])
コード例 #8
0
 def pagerank(self, edge_weights={}, context=None, context_weight=10):
     G = self.graphs.unify(edge_weights)
     if not context:
         return nx.pagerank(G)
     else:
         weights = {}
         for k in G.nodes():
             weights[k] = 1
         weights[context] = context_weight
         return nx.pagerank(G, personalization=weights)
コード例 #9
0
ファイル: test_pagerank.py プロジェクト: jklaise/networkx
    def test_pagerank(self):
        G = self.G
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

        nstart = dict((n, random.random()) for n in G)
        p = networkx.pagerank(G, alpha=0.9, tol=1.e-08, nstart=nstart)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)
コード例 #10
0
ファイル: predict.py プロジェクト: DevJac/march_madness_2016
def team_strength(winner_losers):
    games_and_weights = defaultdict(int)
    for winner, loser, weight in winner_losers:
        games_and_weights[winner, loser] += weight
    win_graph = nx.DiGraph()
    loss_graph = nx.DiGraph()
    for (winner, loser), weight in games_and_weights.iteritems():
        win_graph.add_edge(loser, winner, weight=weight)
        loss_graph.add_edge(winner, loser, weight=weight)
    loss_ranks = nx.pagerank(loss_graph)
    return {k: v - loss_ranks[k] for k, v in nx.pagerank(win_graph).iteritems()}
コード例 #11
0
ファイル: socialnet.py プロジェクト: freephys/mylab
def run(edges, show=False):
    G = nx.DiGraph()
    #  G.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)])
    G.add_edges_from(edges)
    if show:
        nx.draw(G, pos=nx.spring_layout(G))
        plt.show()
        nx.write_dot(G, './graph.dot')
        # dot -n -Tpng graph.dot >graph.png
    #  print nx.hits(G, max_iter=10**3)  #tol=1e-4)
    print nx.pagerank(G)
コード例 #12
0
    def test_pagerank(self):
        G = self.G
        p = networkx.pagerank(G, alpha=0.9, tol=1.0e-08)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

        nstart = dict((n, random.random()) for n in G)
        p = networkx.pagerank(G, alpha=0.9, tol=1.0e-08, nstart=nstart)
        for n in G:
            assert_almost_equal(p[n], G.pagerank[n], places=4)

        assert_raises(networkx.NetworkXError, networkx.pagerank, G, max_iter=0)
コード例 #13
0
ファイル: build_graph.py プロジェクト: diamrem/twitter_chn
def build_graph():
    pair_list = TwitterUser.get_top_100_pair()
    DG = nx.DiGraph()
    DG.add_edges_from([(foer, twitter_user) for twitter_user, foer in
        pair_list])
    betweenness = nx.betweenness_centrality(DG)
    closeness = nx.closeness_centrality(DG)
    edge_betweenness = nx.edge_betweenness(DG)
    clustering_co = nx.clustering(nx.Graph(DG))
    page_rank = nx.pagerank(DG)
    for twitter_id in DG.nodes():
        t = TwitterUser.get_by_id(twitter_id)
        node = DG.node[twitter_id]
        node['user_id'] = t.user_id
        node['label'] = t.scrn_name
        node['follower_count'] = t.foer_cnt
        node['friend_count'] = t.friend_cnt
        node['status_count'] = t.status_cnt
        node['location'] = t.location
        node['verified'] = t.verified
        node['twitter_age'] = (date.today() - t.created_at).days
        node['daily_tweet'] = t.status_cnt*1.0/node['twitter_age']
        node['indegree'] = len([(id, foer) for id, foer 
            in pair_list if id == twitter_id])
        node['outdegree'] = len([(id, foer) for id, foer 
            in pair_list if foer == twitter_id])
        node['cluster'] = clustering_co[twitter_id]
        node['betweenness'] = betweenness[twitter_id]
        node['closeness'] = closeness[twitter_id]
        node['page_rank'] = page_rank[twitter_id]
    for out_n, in_n in DG.edges():
        DG[out_n][in_n]['edge_betweenness'] = edge_betweenness[(out_n,in_n)]

    return DG
コード例 #14
0
ファイル: RoleMining.py プロジェクト: kwerenda/role-mining
 def find_rolesX(community):
     pr = nx.pagerank(community)
     pr_vals = pr.values()
     m, sd = mean(pr_vals), std(pr_vals)
     leaders = [(n, p) for n, p in pr.items() if p > m + 1 * sd]
     outermosts = [(n, p) for n, p in pr.items() if p < m - 1 * sd]
     return leaders, outermosts
コード例 #15
0
ファイル: test_pagerank.py プロジェクト: jklaise/networkx
 def test_personalization(self):
     G = networkx.complete_graph(4)
     personalize = {0: 1, 1: 1, 2: 4, 3: 4}
     answer = {0: 0.23246732615667579, 1: 0.23246732615667579, 2: 0.267532673843324, 3: 0.2675326738433241}
     p = networkx.pagerank(G, alpha=0.85, personalization=personalize)
     for n in G:
         assert_almost_equal(p[n], answer[n], places=4)
コード例 #16
0
ファイル: test_pagerank.py プロジェクト: jklaise/networkx
 def test_incomplete_personalization(self):
     G = networkx.complete_graph(4)
     personalize = {3: 1}
     answer = {0: 0.22077931820379187, 1: 0.22077931820379187, 2: 0.22077931820379187, 3: 0.3376620453886241}
     p = networkx.pagerank(G, alpha=0.85, personalization=personalize)
     for n in G:
         assert_almost_equal(p[n], answer[n], places=4)
コード例 #17
0
ファイル: network_test.py プロジェクト: doufunao/MongoJs
def test1():
	f = open('Results/relation_top5.csv', 'rb')
	G = nx.read_adjlist(f, delimiter = ',')
	x = nx.pagerank(G, alpha = 0.9)
	sort_x = sorted(x.items(), key=lambda item: item[1], reverse=True)
	for a1, a2 in sort_x:
		print(str(a1) + ' : ' + str(a2))
コード例 #18
0
ファイル: summarize.py プロジェクト: esclee/visio
def return_summary(text):
    sent_list = nltk.tokenize.sent_tokenize(text)

    # deletes sentences that are only made of punctuations
    sent_list = [sent for sent in sent_list if checkValidSent(sent)]

    # makes a list of paragraphs - used to count the number of paragraphs
    pg = text.splitlines(0)
    pg = [par for par in pg if par != '']

    baseline = len(text)

    # if tehre are too many sentences, this will pick 150 random sentences
    if len(sent_list) > 150:
        sent_list = random.sample(sent_list, 150)
        baseline = sum([len(sent) for sent in sent_list])

    # makes graph to use for pagerank
    text_graph = buildGraph(sent_list)

    sent_scores = nx.pagerank(text_graph, weight = 'weight')

    sent_sorted = sorted(sent_scores, key = sent_scores.get, reverse = True)
    summary = ""
    scount = 0
    # selects a number of the most salient sentences
    while sent_sorted:
        sent = sent_sorted.pop(0)
        scount += 1
        if 4 * (len(sent) + len(summary)) >= baseline:
            break
        if scount > len(pg): break
        summary += sent + ' '

    return summary
コード例 #19
0
ファイル: networker_old.py プロジェクト: csrhau/sandpit
def pagerank(graph, records):
    """ Reports on the highest (Page)Ranked individuals in the graph """
    pr = nx.pagerank(graph)
    nodes = sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:records]
    print("Page Rank - top {} individuals".format(records))
    for n in nodes:
        print("  {:30}:\t{}".format(n[0], n[1]))
コード例 #20
0
ファイル: graph.py プロジェクト: himanshusapra9/TextNet
def pagerank_example():
    n = 7
    g = nx.wheel_graph(n)
    pos = nx.spring_layout(g)

    g = nx.DiGraph()
    g.add_nodes_from(range(0,n))

    g.add_edge(0,1)
    g.add_edge(0,6)
    g.add_edge(0,5)
    g.add_edge(1,2)
    g.add_edge(1,6)
    g.add_edge(2,0)
    g.add_edge(2,1)
    g.add_edge(2,3)
    g.add_edge(3,4)
    g.add_edge(4,5)
    g.add_edge(4,6)
    g.add_edge(5,0)
    g.add_edge(5,3)
    g.add_edge(5,4)

    ranks = nx.pagerank(g)
    for n in range(0,n):
        print 'node',n
        print '  rank:',ranks[n]
        print '  out edges:',g.neighbors(n)
        if g.neighbors(n):
            print '  per edge:',ranks[n]/len(g.neighbors(n))
        else:
            print '  per edge: null'

    draw_with_centrality(g, layout=pos)
コード例 #21
0
ファイル: mmrelevance.py プロジェクト: giahy2507/convae
def mmr_pagerank(document_list,len_sen_mat, threshold_t, max_word, mode):
    n = len(document_list)
    sim_matrix = build_sim_matrix(document_list, mode)

    g = nt.Graph()

    for i in range(n):
        for j in range(i+1,n,1):
            g.add_edge(i,j, distance_edge = sim_matrix[i,j])

    page_rank = nt.pagerank(g, weight = "distance_edge")

    score = []
    for i in range(n):
        score.append(page_rank[i])

    summary = []

    threshold_t = np.average(sim_matrix[0,:])

    while (stopCondition(len_sen_mat,summary, max_word) == 0):
        s = np.argmax(score)
        score[s] = 0 #delele s from score
        if check_threshold_mmr_pagerank(sim_matrix,summary,s,threshold_t) == 1:
            summary.append(s)


    return summary
コード例 #22
0
ファイル: network.py プロジェクト: bchugit/Survivor-Project
def centrality_scores(vote_matrix, season_graph):
    deg = nx.degree(season_graph)
    deg = {k: round(v,1) for k,v in deg.iteritems()}

    close = nx.closeness_centrality(season_graph)
    close = {k: round(v,3) for k,v in close.iteritems()}

    btw = nx.betweenness_centrality(season_graph)
    btw = {k: round(v,3) for k,v in btw.iteritems()}

    eig = nx.eigenvector_centrality_numpy(season_graph)
    eig = {k: round(v,3) for k,v in eig.iteritems()}
    
    page = nx.pagerank(season_graph)
    page = {k: round(v,3) for k,v in page.iteritems()}

    # Add contestant placement (rank)
    order = list(vote_matrix.index)
    place_num = list(range(len(order)))
    place = {order[i]:i+1 for i in place_num}
    
    names = season_graph.nodes()

    # Build a table with centralities 
    table=[[name, deg[name], close[name], btw[name], eig[name], page[name], place[name]] for name in names]

    # Convert table to pandas df
    headers = ['name', 'deg', 'close', 'btw', 'eig', 'page', 'place']
    df = pd.DataFrame(table, columns=headers)
    df = df.sort_values(['page', 'eig', 'deg'], ascending=False)
    
    return df
コード例 #23
0
ファイル: graphs.py プロジェクト: Purdom/humanist
    def build(self, matrix, skim_depth=10):

        """
        Build graph, with PageRanks on nodes.

        :param matrix: A term matrix.
        :param skim_depth: The number of sibling edges.
        """

        # Register nodes and edges.
        for anchor in progress.bar(matrix.terms):

            n1 = matrix.text.unstem(anchor)

            # Heaviest pair scores:
            pairs = matrix.anchored_pairs(anchor).items()
            for term, weight in list(pairs)[:skim_depth]:

                n2 = matrix.text.unstem(term)
                self.graph.add_edge(n1, n2, weight=weight)

        # Compute PageRanks.
        ranks = nx.pagerank(self.graph)
        first = max(ranks.values())

        # Convert to 0->1 ratios.
        ranks = {k: v/first for k, v in ranks.items()}

        # Annotate the nodes.
        nx.set_node_attributes(self.graph, 'pagerank', ranks)
コード例 #24
0
def build_diredge_and_pagerank():
	dirG = nx.DiGraph()
	#print "direction"
	#print post_author_dict
	#print post_subauthor_dict
	totalkey = post_author_dict.keys() + post_subauthor_dict.keys()
	for key in totalkey:
		if (key not in post_author_dict) or (key not in post_subauthor_dict):
			continue
		actor1_key = post_author_dict[key]
		actor1 = rev_author_dict[actor1_key]
		actor2_key_list = post_subauthor_dict[key]
		if len(actor2_key_list) == 1:
			#print "damn"
			continue
		for actor_key in actor2_key_list:
			#print "cool"
			if actor1_key == actor_key:
				continue
			actor2 = rev_author_dict[actor_key]
			#print actor1, actor2
			dirG.add_edge(actor1, actor2)
	pr = nx.pagerank(dirG)
	sort_pr = sorted(pr.keys(), key = lambda x :pr[x], reverse=True)
	print sort_pr[:10]
	return pr
コード例 #25
0
ファイル: util.py プロジェクト: maybefeicun/TextRank4ZH
def sort_sentences(sentences, words, sim_func = get_similarity, pagerank_config = {'alpha': 0.85,}):
    """将句子按照关键程度从大到小排序

    Keyword arguments:
    sentences         --  列表,元素是句子
    words             --  二维列表,子列表和sentences中的句子对应,子列表由单词组成
    sim_func          --  计算两个句子的相似性,参数是两个由单词组成的列表
    pagerank_config   --  pagerank的设置
    """
    sorted_sentences = []
    _source = words
    sentences_num = len(_source)        
    graph = np.zeros((sentences_num, sentences_num))
    
    for x in xrange(sentences_num):
        for y in xrange(x, sentences_num):
            similarity = sim_func( _source[x], _source[y] )
            graph[x, y] = similarity
            graph[y, x] = similarity
            
    nx_graph = nx.from_numpy_matrix(graph)
    scores = nx.pagerank(nx_graph, **pagerank_config)              # this is a dict
    sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)

    for index, score in sorted_scores:
        item = AttrDict(index=index, sentence=sentences[index], weight=score)
        sorted_sentences.append(item)

    return sorted_sentences
コード例 #26
0
def pagerank_doc(abstr_path, file_name, file_names, omega, phi, ldamodel,
                 corpus, d=0.85, nfselect='027', num_topics=20, window=2):
    from utils import CiteTextRank
    from utils.tools import dict2list
    file_text = read_file(abstr_path, file_name)
    tagged_tokens = get_tagged_tokens(file_text)
    filtered_text = get_filtered_text(tagged_tokens)
    # edge_and_freq = get_edge_freq(filtered_text)
    # edge_features = add_lev_distance(edge_and_freq)#edge_freq_lev
    # edge_weight = calc_edge_weight(edge_features, omega)
    if 'KDD' in abstr_path:
        dataset = 'kdd'
    else:
        dataset = 'www'
    cite_edge_weight = CiteTextRank.sum_weight(file_name, doc_lmdt=omega[0], citing_lmdt=omega[1],
                                               cited_lmdt=omega[2], dataset=dataset, window=window)
    # print(cite_edge_weight)
    edge_weight = dict2list(cite_edge_weight)
    # print(edge_weight)
    graph = build_graph(edge_weight)
    node_list = list(graph.node)

    if 'KDD' in abstr_path:
        raw_node_features = read_file('./data/', 'KDD_node_features')
    else:
        raw_node_features = read_file('./data/', 'WWW_node_features')
    node_features = read_node_features(node_list, raw_node_features, file_name, nfselect=nfselect)
    node_weight = calc_node_weight(node_features, phi)
    # word_prob = get_word_prob(file_name, file_names, node_list, ldamodel, corpus, num_topics=num_topics)
    node_weight_topic = {}
    for node in node_list:
        node_weight_topic[node] = node_weight[node]# * word_prob[node]
    pr = nx.pagerank(graph, alpha=d, personalization=node_weight_topic)

    return pr, graph
コード例 #27
0
ファイル: generate_document.py プロジェクト: ankit141189/bing
def textrank(sentences):
    bow_matrix = CountVectorizer().fit_transform(sentences)
    normalized = TfidfTransformer().fit_transform(bow_matrix)
    similarity_graph = normalized * normalized.T
    nx_graph = nx.from_scipy_sparse_matrix(similarity_graph)
    scores = nx.pagerank(nx_graph)
    return sorted(((scores[i], i, s) for i, s in enumerate(sentences)), reverse=True)
コード例 #28
0
def main():
	preprocess.main()
	nodes = []
	sentences = []
	with open('sentences.txt') as f:
		while(True):
			line = f.readline()
			if(line=='\n' or line==''):
				break
			nodes.append(sentence_node(0,line.strip('\n')))
	print len(nodes)
	for x in range(len(nodes)):
		sentences.append(nodes[x].sentence)
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
	similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
	G = nx.Graph()
	for x in range(len(nodes)):
		G.add_node(x)
	# G.add_nodes_from(nodes)
	for i in range(len(nodes)):
		for j in range(len(nodes)):
			if(i<j and similarity_matrix[i][j]!=0):
				G.add_edge(i,j,weight=similarity_matrix[i][j])
	pdb.set_trace()			
	for i in range(len(nodes)):
		if len(G[i]) == 0:
			print "No out edges"					
	pr = nx.pagerank(G,alpha=0.85)
	# print pr
	sorted_pr = sorted(pr.items(), key=operator.itemgetter(1), reverse=True)
	print sorted_pr[:10]
	for item in sorted_pr[:10]:
		print nodes[item[0]].sentence			
コード例 #29
0
def calculate_trust_features(users, test_users, trusts):
  """ Calculates features related to statistics of user in the trust network.

      Args:
        users: dictionary of users.
        test_users: ids of users which are in test.
        trusts: nx.Digraph object with trust network.

      Returns:
        None. Changes are made in users dictionary.
  """
  for user in trusts: 
    if user not in users and user in test_users:
      # cold-start in test but in trust network
      users[user] = create_user(user)
  prank = pagerank(trusts)
  for user in users:
    if user not in trusts:
      users[user]['num_trustors'] = 0 
      users[user]['num_trustees'] = 0 
      users[user]['pagerank'] = 0
    else:
      users[user]['num_trustors'] = trusts.in_degree(user)
      users[user]['num_trustees'] = trusts.out_degree(user)
      users[user]['pagerank'] = prank[user]
コード例 #30
0
def pagerank_hits():
    conn = sqlite3.connect("zhihu.db")     
    #following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 50000) and user_url in (select user_url from User where agree_num > 50000)', conn)        
    following_data = pd.read_sql('select user_url, followee_url from Following where followee_url in (select user_url from User where agree_num > 10000) and user_url in (select user_url from User where agree_num > 10000)', conn)        
    conn.close()
    
    G = nx.DiGraph()
    cnt = 0
    for d in following_data.iterrows():
        G.add_edge(d[1][0],d[1][1])
        cnt += 1
    print 'links number:', cnt
    pylab.figure(0)
    nx.draw_networkx(G)
    pylab.show()

    # PageRank
    pr = nx.pagerank(G)
    prsorted = sorted(pr.items(), key=lambda x: x[1], reverse=True)
    print 'pagerank top 100:\n'
    for p in prsorted[:100]:
        print p[0], p[1]
    
    # HITS
    hub, auth = nx.hits(G)
    print 'hub top 100:\n'
    for h in sorted(hub.items(), key=lambda x: x[1], reverse=True)[:100]:
        print h[0], h[1]
    print '\nauth top 100:\n'    
    for a in sorted(auth.items(), key=lambda x: x[1], reverse=True)[:100]:     
        print a[0], a[1]
コード例 #31
0
for i in clean_sentences:
    if len(i) != 0:
        v = sum([word_embeddings.get(w, np.zeros((100, )))
                 for w in i.split()]) / (len(i.split()) + 0.001)
    else:
        v = np.zeros((100, ))
    sentence_vectors.append(v)

# Similarity Matrix

# creating similarity matrix
sim_mat = np.zeros([len(sentences), len(sentences)])

# initialize the matrix with cosine similarity scores
for i in range(len(sentences)):
    for j in range(len(sentences)):
        if i != j:
            sim_mat[i][j] = cosine_similarity(
                sentence_vectors[i].reshape(1, 100),
                sentence_vectors[j].reshape(1, 100))[0, 0]

nx_graph = nx.from_numpy_array(sim_mat)
scores = nx.pagerank(nx_graph)

# Summary Extraction
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                          reverse=True)

# Extract top 10 sentences as the summary
for i in range(10):
    print(ranked_sentences[i][1])
コード例 #32
0
def hbase_test():

    # Default return value
    default_return = {'nodes': [], 'edges': []}

    # Input sanity checks
    search = request.args.get('search', '')
    if search == None or search == "":
        print "SD> WARN: Search query is empty"
        return default_return
    elif isinstance(search, str):
        if search.isdigit():
            search = int(search)
        else:
            print "SD> WARN: Search should be a digit"
            return default_return

    search_str = str(search)
    # Establish contact with database
    cluster = Cluster(contact_points=['54.219.144.56'], )
    session = cluster.connect('harary')

    # Look for node in database
    community_id = session.execute(
        "SELECT community FROM node_community_table WHERE source = " +
        search_str)
    if len(community_id) == 0:
        print "SD> WARN: Could not find node " + search_str + " in database"
        return default_return

    community_str = str(community_id[0].community)
    print "SD> INFO: Node " + search_str + " was found in database with community " + community_str

    # Search for community members
    print "SD> INFO: Executing query: " + "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING"
    result = session.execute(
        "SELECT * FROM node_community_table WHERE community = " +
        community_str + " ALLOW FILTERING;")
    print "SD> INFO: Query result: " + str(
        len(result)) + " members were found for community " + community_str

    # Empty result scenario
    if len(result) == 0:
        return default_return

    # Extreme cases are truncated for practicality
    max_number_of_nodes = 2000
    if len(result) > max_number_of_nodes:
        print "SD> WARN: Excessive number of node (%i). Something is probably wrong.." % len(
            result)
        result = result[0:max_number_of_nodes]

    node_index = 0
    edge_index = 0

    # Allocate the number of nodes
    expected_number_of_nodes = len(result)
    nodes = [{
        'id': '0',
        'index': '0',
        'label': '',
        'community': 0,
        'x': 0,
        'y': 0,
        'size': 10
    } for k in range(expected_number_of_nodes)]

    # Sigma.js
    # edges = [{'id': '0', 'source':'0', 'target':'0'} for k in range(expected_number_of_nodes * expected_number_of_nodes)]
    # D3
    edges = [{
        'source': 100,
        'target': 1000,
        'id': 0
    } for k in range(expected_number_of_nodes * expected_number_of_nodes)]

    # Filter for visualization
    def filter(x):
        return len(x.target) < 50

    # Map ID to linear range for D3
    keys = [r.source for r in result if filter(r)]
    values = range(len(keys))
    dictionary = dict(zip(keys, values))

    # Add all nodes
    for node in result:
        if filter(node):
            nodes[node_index]['id'] = str(dictionary[node.source])
            nodes[node_index]['index'] = str(node.source)
            nodes[node_index]['community'] = node.community
            nodes[node_index]['label'] = "Node: " + str(node.source)
            nodes[node_index]['x'] = random.random()
            nodes[node_index]['y'] = random.random()
            node_index = node_index + 1
            if node.target != None:
                # Add all edges
                for target in node.target:
                    if target in keys:
                        edges[edge_index]['source'] = dictionary[node.source]
                        edges[edge_index]['target'] = dictionary[target]
                        edges[edge_index]['id'] = str(edge_index)
                        edge_index = edge_index + 1

    # Truncate excess
    nodes = nodes[0:node_index]
    edges = edges[0:edge_index]

    # Build graph from json
    G = json_graph.node_link_graph({
        'nodes': nodes,
        'links': edges
    }, False, True)
    DiG = nx.DiGraph(G)
    G = nx.Graph(G)

    # On the fly computation of properties on manageable sizes
    bet_cen = nx.betweenness_centrality(G)
    clo_cen = nx.closeness_centrality(G)
    eig_cen = nx.eigenvector_centrality(G)
    pr = nx.pagerank(DiG, alpha=0.9)
    deg = G.degree()
    com = community.best_partition(G)

    for node in nodes:
        node['betweenness'] = bet_cen[node['id']]
        node['closeness'] = clo_cen[node['id']]
        node['eigenvector'] = eig_cen[node['id']]
        node['pagerank'] = pr[node['id']]
        node['degree'] = deg[node['id']]
        node['community'] = com[node['id']]

    # Return json string
    return json.dumps({'nodes': nodes, 'edges': edges})
コード例 #33
0
def answer_six():
    # Your Code Here
    ranks = nx.pagerank(G2)
    ranks = sorted(ranks.items(), key=lambda x: x[1], reverse=True)
    return [item[0] for item in ranks[:5]]  # Your Answer Here
コード例 #34
0
ファイル: applications-demo.py プロジェクト: lovre/netpy
tic = time()

for file in ['karate', 'java']:

    # Constructs a graph of real network

    G = read(file)

    # Prints out statistics of real network

    info(G)

    # Computes node centralities of real network

    g = nx.Graph(G)
    prs = nx.pagerank(g)
    cs = nx.clustering(g)

    dcs = nx.degree_centrality(G)
    ccs = nx.closeness_centrality(G)
    bcs = nx.betweenness_centrality(G)

    # Finds community structure of real network

    c = 0
    comms = {}
    for comm in algorithms.louvain(G).communities:
        for i in comm:
            comms[i] = c
        c += 1
コード例 #35
0
        # initialize a StorageClient. Set its connection timeout to 3000ms, retry times to 3
        storage_client = StorageClient(meta_client, 3000, 3)
        # initialize a ScanEdgeProcessor to process scanned edge data
        scan_edge_processor = ScanEdgeProcessor(meta_client)
        # initialize a ScanVertexProcessor to process scanned vertex data
        scan_vertex_processor = ScanVertexProcessor(meta_client)

        space_to_read = sys.argv[3]
        if space_to_read not in meta_client.get_parts_alloc_from_cache().keys(
        ):
            raise Exception('spaceToRead %s is not found in nebula graph' %
                            space_to_read)

        # get argument return_cols, which is used in function scan_edge, scan_vertex, scan_part_edge, scan_part_vertex
        vertex_return_cols, edge_return_cols = get_return_cols(space_to_read)
        all_cols = True

        # initialize a Graph in NetworkX
        G = nx.Graph()
        # scan vertex data
        scan_vertex(space_to_read, vertex_return_cols, all_cols)
        # scan edge data
        scan_edge(space_to_read, edge_return_cols, all_cols)

        # print the pagerank value of each node in Graph G of NetworkX
        print('\npagerank value of each node in Graph G of NetworkX:')
        print(nx.pagerank(G))

    except Exception as x:
        logging.exception(x)
コード例 #36
0
def function(input):

    if input == 1:
        clustering_coefficient = nx.clustering(G, weight='weight')
        clustering_coefficient = normalise(clustering_coefficient)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            clustering_coefficient.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            clustering_coefficient, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_clustering_coefficient.txt")
        write_to_file(dev_data, baseDir + "dev_clustering_coefficient.txt")
        write_to_file(test_data, baseDir + "test_clustering_coefficient.txt")

    if input == 2:
        betweenness_centrality = nx.betweenness_centrality(G,
                                                           normalized=True,
                                                           weight='weight')
        betweenness_centrality = normalise(betweenness_centrality)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            betweenness_centrality.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            betweenness_centrality, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_betweenness_centrality.txt")
        write_to_file(dev_data, baseDir + "dev_betweenness_centrality.txt")
        write_to_file(test_data, baseDir + "test_betweenness_centrality.txt")

    if input == 3:
        closeness_centrality = {}
        set_of_nodes = G.nodes()
        random.shuffle(set_of_nodes)
        subset_of_nodes = set_of_nodes[:100000]
        for node in subset_of_nodes:
            closeness_centrality[node] = nx.closeness_centrality(
                G, node, normalized=True, distance='weight')

        #closeness_centrality = nx.closeness_centrality(G,normalized=True,distance='weight')
        closeness_centrality = normalise(closeness_centrality)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            closeness_centrality.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            closeness_centrality, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_closeness_centrality.txt")
        write_to_file(dev_data, baseDir + "dev_closeness_centrality.txt")
        write_to_file(test_data, baseDir + "test_closeness_centrality.txt")

    if input == 4:
        average_neighbor_degree = nx.average_neighbor_degree(G,
                                                             weight='weight')
        average_neighbor_degree = normalise(average_neighbor_degree)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            average_neighbor_degree.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            average_neighbor_degree, train_keys, dev_keys, test_keys)
        write_to_file(train_data,
                      baseDir + "train_average_neighbor_degree.txt")
        write_to_file(dev_data, baseDir + "dev_average_neighbor_degree.txt")
        write_to_file(test_data, baseDir + "test_average_neighbor_degree.txt")

    if input == 5:
        degree_centrality = nx.degree_centrality(G)
        degree_centrality = normalise(degree_centrality)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            degree_centrality.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            degree_centrality, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_degree_centrality.txt")
        write_to_file(dev_data, baseDir + "dev_degree_centrality.txt")
        write_to_file(test_data, baseDir + "test_degree_centrality.txt")

    if input == 6:
        load_centrality = nx.load_centrality(G,
                                             normalized=True,
                                             weight='weight')
        load_centrality = normalise(load_centrality)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            load_centrality.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            load_centrality, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_load_centrality.txt")
        write_to_file(dev_data, baseDir + "dev_load_centrality.txt")
        write_to_file(test_data, baseDir + "test_load_centrality.txt")

    if input == 7:
        shortest_path_length = {}
        nodes_in_graph = G.nodes()
        random.shuffle(nodes_in_graph)
        subset_of_nodes = nodes_in_graph[:200000]
        for i in range(len(subset_of_nodes) - 1):
            key_1 = subset_of_nodes[i]
            key_2 = subset_of_nodes[i + 1]

            #print key_1, key_2
            try:
                #print nx.shortest_path_length(G,source=key_1,target=key_2)
                shortest_path_length[str(key_1) + "\t" +
                                     str(key_2)] = nx.shortest_path_length(
                                         G, source=key_1, target=key_2)
            except:
                #print 0
                shortest_path_length[str(key_1) + "\t" + str(key_2)] = 0
        '''
		shortest_path_length_dict = nx.shortest_path_length(G,weight='weight')
		shortest_path_length = {}
		for key_1 in shortest_path_length_dict:
			for key_2 in shortest_path_length_dict[key_1]:
				shortest_path_length[str(key_1)+"\t"+str(key_2)] = shortest_path_length_dict[key_1][key_2]
		shortest_patth_length = normalise(shortest_path_length)
		'''

        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            shortest_path_length.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            shortest_path_length, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_shortest_path_length.txt")
        write_to_file(dev_data, baseDir + "dev_shortest_path_length.txt")
        write_to_file(test_data, baseDir + "test_shortest_path_length.txt")

        #jaccard coefficient same for weighted and non-weighted
    if input == 9:
        katz_centrality = nx.katz_centrality(G,
                                             weight='weight',
                                             alpha=0.9,
                                             max_iter=100000)
        katz_centrality = normalise(katz_centrality)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            katz_centrality.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            katz_centrality, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_katz_centrality.txt")
        write_to_file(dev_data, baseDir + "dev_katz_centrality.txt")
        write_to_file(test_data, baseDir + "test_katz_centrality.txt")

    if input == 10:
        pagerank = nx.pagerank(G, weight='weight')
        pagerank = normalise(pagerank)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            pagerank.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            pagerank, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_pagerank.txt")
        write_to_file(dev_data, baseDir + "dev_pagerank.txt")
        write_to_file(test_data, baseDir + "test_pagerank.txt")

        #communicability same for weighted and non-weighted

    if input == 12:
        degree = G.degree(weight='weight')
        degree = normalise(degree)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            degree.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            degree, train_keys, dev_keys, test_keys)
        write_to_file(train_data, baseDir + "train_degree.txt")
        write_to_file(dev_data, baseDir + "dev_degree.txt")
        write_to_file(test_data, baseDir + "test_degree.txt")

    if input == 13:
        edges_in_graph = G.edges()
        random.shuffle(edges_in_graph)
        subset_of_edges = edges_in_graph[:100000]

        jaccard_coefficient = nx.jaccard_coefficient(G, ebunch=subset_of_edges)
        jaccard_coefficient_dict = {}
        for u, v, p in jaccard_coefficient:
            jaccard_coefficient_dict[str(u) + "\t" + str(v)] = p
        jaccard_coefficient_dict = normalise(jaccard_coefficient_dict)
        train_keys, dev_keys, test_keys = create_train_test_dev_split(
            jaccard_coefficient_dict.keys())
        train_data, dev_data, test_data = write_train_test_dev(
            jaccard_coefficient_dict, train_keys, dev_keys, test_keys)
        write_to_file(train_data,
                      baseDir + "train_jaccard_coefficient_dict.txt")
        write_to_file(dev_data, baseDir + "dev_jaccard_coefficient_dict.txt")
        write_to_file(test_data, baseDir + "test_jaccard_coefficient_dict.txt")
コード例 #37
0
keepbooking = {v: k for k, v in bookkeeping.items()}

graph = nx.DiGraph()
graph.add_nodes_from(bookkeeping.keys())


# Parse through HTML responses and gather up possible links
def gather_links(baseURLKey, html):
    links = []
    for link in BeautifulSoup(html, "lxml", parse_only=SoupStrainer('a')):
        if hasattr(link, "href"):
            url = urljoin(bookkeeping[baseURLKey], link.get('href'))

            # Only add into the graph if the link is bookmarked
            if url in keepbooking:
                links.append((baseURLKey, keepbooking[url]))
    return links


# Loop through all links and construct the directed graph
for k in bookkeeping:
    with open('/webpages_raw/' + k, encoding='utf-8') as rawData:
        links = gather_links(k, rawData.read())
        graph.add_edges_from(links)

# Calculate PageRank
calculated_page_rank = nx.pagerank(graph)

with open('pageranks', 'w') as outfile:
    json.dump(calculated_page_rank, outfile)
コード例 #38
0
def answer_six():
    import operator
    pr = sorted(nx.pagerank(G2, alpha = 0.85).items(), key = operator.itemgetter(1), reverse = True)
    
    return [i[0] for i in pr[0:5]]
コード例 #39
0
ファイル: textRank.py プロジェクト: palashc/CS671-NLP
def extractKeyphrases(text):
    #tokenize the text using nltk
    wordTokens = nltk.word_tokenize(text)

    #assign POS tags to the words in the text
    tagged = nltk.pos_tag(wordTokens)
    textlist = [x[0] for x in tagged]

    tagged = filter_for_tags(tagged)
    tagged = normalize(tagged)

    unique_word_set = unique_everseen([x[0] for x in tagged])
    word_set_list = list(unique_word_set)

    #this will be used to determine adjacent words in order to construct keyphrases with two words

    graph = buildGraph(word_set_list)

    #pageRank - initial value of 1.0, error tolerance of 0,0001,
    calculated_page_rank = nx.pagerank(graph, weight='weight')

    #most important words in ascending order of importance
    keyphrases = sorted(calculated_page_rank,
                        key=calculated_page_rank.get,
                        reverse=True)

    #the number of keyphrases returned will be relative to the size of the text (a third of the number of vertices)
    aThird = len(word_set_list) / 3
    keyphrases = keyphrases[0:aThird + 1]

    #take keyphrases with multiple words into consideration as done in the paper - if two words are adjacent in the text and are selected as keywords, join them
    #together
    modifiedKeyphrases = set([])
    dealtWith = set(
        []
    )  #keeps track of individual keywords that have been joined to form a keyphrase
    i = 0
    j = 1
    while j < len(textlist):
        firstWord = textlist[i]
        secondWord = textlist[j]
        if firstWord in keyphrases and secondWord in keyphrases:
            keyphrase = firstWord + ' ' + secondWord
            modifiedKeyphrases.add(keyphrase)
            dealtWith.add(firstWord)
            dealtWith.add(secondWord)
        else:
            if firstWord in keyphrases and firstWord not in dealtWith:
                modifiedKeyphrases.add(firstWord)

            #if this is the last word in the text, and it is a keyword,
            #it definitely has no chance of being a keyphrase at this point
            if j == len(
                    textlist
            ) - 1 and secondWord in keyphrases and secondWord not in dealtWith:
                modifiedKeyphrases.add(secondWord)

        i = i + 1
        j = j + 1

    return modifiedKeyphrases
コード例 #40
0
def answer_six():

    # Your Code Here
    pr = nx.pagerank(G2, alpha=0.85)
    return sorted(pr.keys(), key=lambda key: pr[key], reverse=True)[:5]
コード例 #41
0
def answer_five():

    # Your Code Here
    pr = nx.pagerank(G2, alpha=0.85)
    return pr['realclearpolitics.com']
コード例 #42
0
import networkx as nx

PATH = "/Users/Nandhini/Documents/Courses/CSCI 572 - IR Fall 20/Assignment 4/solr-7.7.0/../LATIMES/latimes/"

g = nx.read_edgelist("edge_dist_new.txt", create_using=nx.DiGraph())
pagerank = nx.pagerank(g,
                       alpha=0.85,
                       personalization=None,
                       max_iter=30,
                       tol=1e-06,
                       nstart=None,
                       weight='weight',
                       dangling=None)
prs = set()
for file, pr in pagerank.items():
    prs.add(pr)

print("Max", max(prs))
print("Min", min(prs))

with open("pagerank_new.txt", "w") as pg_file:
    for file, pr in pagerank.items():
        pg_file.write(PATH + file + "=" + str(pr) + "\n")

    pg_file.close()
コード例 #43
0
deg_cc = pd.concat([deg, cc], axis=1)
deg_cc.columns = ("Degree", "CC")
deg_cc.groupby("Degree").mean().reset_index()\
    .plot(kind="scatter", x="Degree", y="CC", s=100)
plt.xscale("log")
plt.ylim(ymin=0)
plt.grid()
dzcnapy.plot("deg_cc")

# A study of centralities
dgr = nx.degree_centrality(G)
clo = nx.closeness_centrality(G)
har = nx.harmonic_centrality(G)
eig = nx.eigenvector_centrality(G)
bet = nx.betweenness_centrality(G)
pgr = nx.pagerank(G)
hits = nx.hits(G)

centralities = pd.concat(
    [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
    axis=1)

centralities.columns = ("Authorities", "Eigenvector", "PageRank",
                        "Harmonic Closeness", "Closeness", "Hubs", "Degree",
                        "Betweenness")
centralities["Harmonic Closeness"] /= centralities.shape[0]

# Calculate the correlations for each pair of centralities
c_df = centralities.corr()
ll_triangle = np.tri(c_df.shape[0], k=-1)
c_df *= ll_triangle
コード例 #44
0
ファイル: pageRank.py プロジェクト: sgtrouge/Adsfluence
import networkx
import operator
f = open('user_retweet_list.txt', 'r')
G = networkx.Graph()
for line in f:
    splits = line.split()
    G.add_edge(splits[0], splits[1])
pr = networkx.pagerank(G)
print pr
print sorted(pr.items(), key=operator.itemgetter(1), reverse=True)[:10]
コード例 #45
0
                                    (math.sqrt(len(set(train_graph.successors(a)))*len((set(train_graph.successors(b))))))
        return sim
    except:
        return 0
def cosine_for_followers(a,b):
    try:
        
        if len(set(train_graph.predecessors(a))) == 0  | len(set(train_graph.predecessors(b))) == 0:
            return 0
        sim = (len(set(train_graph.predecessors(a)).intersection(set(train_graph.predecessors(b)))))/\
                                     (math.sqrt(len(set(train_graph.predecessors(a))))*(len(set(train_graph.predecessors(b)))))
        return sim
    except:
        return 0

pr = nx.pagerank(train_graph, alpha=0.85)
##pickle.dump(pr,open('data/page_rank.p','wb'))
mean_pr=float(sum(pr.values())) / len(pr)


def compute_shortest_path_length(a,b):
    p=-1
    try:
        if train_graph.has_edge(a,b):
            train_graph.remove_edge(a,b)
            p= nx.shortest_path_length(train_graph,source=a,target=b)
            train_graph.add_edge(a,b)
        else:
            p= nx.shortest_path_length(train_graph,source=a,target=b)
        return p
    except:
コード例 #46
0
ファイル: NewCI_sir_cegans.py プロジェクト: SONGjiaxiu/NewCI
def Page_Rank(G):
    PageRank_Centrality = nx.pagerank(G, alpha=0.85)
    #print "PageRank_Centrality:", sorted(PageRank_Centrality.iteritems(), key=lambda d:d[1], reverse = True)
    return PageRank_Centrality
コード例 #47
0
def calculatePageRank():
	pr = nx.pagerank(G, weight = 'weight')
	return pr
コード例 #48
0
def create(request):
    if request.method == 'POST':
        data = request.POST['parag']
        paragraph = data
        text = data.replace('\n', '')
        data = text
        for k in text.split("\n"):
            text2 = re.sub(r"[^a-zA-Z0-9&]+", ' ', k)
        text = text2
        tokens = [t for t in text.split()]
        sr = stopwords.words('english')
        clean_tokens = tokens[:]
        for token in tokens:
            if token in stopwords.words('english'):

                clean_tokens.remove(token)
        freq = nltk.FreqDist(clean_tokens)

        s = [(k, freq[k]) for k in sorted(freq, key=freq.get, reverse=True)]
        title = s[0][0]
        search_queries = [
            sorted(freq.items(), key=lambda kv:
                   (kv[1], kv[0]), reverse=True)[0][0] + "  " +
            sorted(freq.items(), key=lambda kv:
                   (kv[1], kv[0]), reverse=True)[1][0]
        ]
        for query in search_queries:
            downloadimages(query, title)

        stop_words = stopwords.words('english')
        summarize_text = []
        # Step 1 - Read text anc split it
        article = data.split(". ")
        sentences = []
        sentences_list = ''
        count_sentence = 0
        for sentence in article:
            count_sentence = count_sentence + 1
            sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
        sentences.pop()
        top_n = int(count_sentence / 3)
        # Step 2 - Generate Similary Martix across sentences
        sentence_similarity_martix = build_similarity_matrix(
            sentences, stop_words)
        # Step 3 - Rank sentences in similarity martix
        sentence_similarity_graph = nx.from_numpy_array(
            sentence_similarity_martix)
        scores = nx.pagerank(sentence_similarity_graph)
        # Step 4 - Sort the rank and pick top sentences
        ranked_sentence = sorted(
            ((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
        for i in range(top_n):
            summarize_text.append(" ".join(ranked_sentence[i][1]))
        # Step 5 - Offcourse, output the summarize texr
        m = 1
        # Driver Code
        with open("visualizer/input/op.tsv", "w") as text_file:
            text_file.write("content" + "\t" + "val" + '\n')
            for i in summarize_text:
                sentences_list = sentences_list + i
                search_queries.append(i)
                text_file.write(i + "\t" + str(m) + '\n')
                m = m + 1
        emotion = predict()
        for query in search_queries:
            review = re.sub('[^a-zA-Z]', ' ', query)
            review = review.lower()
            review = review.split()
            ps = PorterStemmer()
            review = [
                ps.stem(word) for word in review
                if not word in set(stopwords.words('english'))
            ]
            review = ' '.join(review)
            downloadimages(review, title)

        fps = 0.2

        file_list = glob.glob(
            'visualizer/images/' + title +
            '/*.jpg')  # Get all the pngs in the current directory
        file_list_sorted = natsorted(file_list,
                                     reverse=False)  # Sort the images

        clips = [ImageClip(m).set_duration(5) for m in file_list_sorted]

        concat_clip = concatenate(clips, method="compose")
        concat_clip.write_videofile("visualizer/output/project.mp4", fps=fps)

        folder = 'visualizer/images/' + title + '/'
        for the_file in os.listdir(folder):
            file_path = os.path.join(folder, the_file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
                #elif os.path.isdir(file_path): shutil.rmtree(file_path)
            except Exception as e:
                print(e)
        textClip = gTTS(text=sentences_list, lang=language, slow=False)
        textClip.save("visualizer/output/voice.mp3")
        audioclip = AudioFileClip("visualizer/output/voice.mp3")
        my_clip = VideoFileClip('visualizer/output/project.mp4')
        audio_background = AudioFileClip('visualizer/emotions/' + emotion +
                                         '.mp3')
        new_audioclip = CompositeAudioClip(
            [audio_background.volumex(0.08),
             audioclip.volumex(1)])

        final_audio = CompositeAudioClip([new_audioclip])
        audio = afx.audio_loop(final_audio, duration=audioclip.duration)
        final_clip = my_clip.set_audio(audio)
        final_clip.write_videofile("visualizer/output/" + title + '.mp4')
        data = title
        file_path = 'visualizer/output/' + data + '.mp4'
        video = Video()
        video.data = paragraph
        video.name = data
        video.videofile = file_path
        video.save()
        return redirect(video.videofile.url)

    if request.method == 'GET':
        return render(request, 'index.html')
コード例 #49
0
def get_graph_metrics(connectivity_vector) :
    
    # reshape into matrix
    connectivity_matrix = np.reshape(connectivity_vector, (90, 90))
    
    # convert to networkx graph
    connectivity_graph = nwx.from_numpy_matrix(connectivity_matrix)
    
    # convert to distance graph as some metrics need this instead
    distance_matrix = connectivity_matrix
    distance_matrix[distance_matrix == 0] = np.finfo(np.float32).eps
    distance_matrix = 1.0 / distance_matrix
    distance_graph = nwx.from_numpy_matrix(distance_matrix)
    
    # intialise vector of metrics
    metrics = np.zeros((21,))
    # fill the vector of metrics
    # 1 and 2: degree distribution
    degrees = np.sum(connectivity_matrix, axis = 1)
    metrics[0] = np.mean(degrees)
    metrics[1] = np.std(degrees)
    
    # 3 and 4: weight distribution
    weights = np.tril(connectivity_matrix, k = -1)
    metrics[2] = np.mean(weights)
    metrics[3] = np.std(weights)

    # 5: average shortest path length
    # transform weights to distances so this makes sense    
    metrics[4] = nwx.average_shortest_path_length(distance_graph, weight='weight')

    # 6: assortativity
    metrics[5] = nwx.degree_assortativity_coefficient(connectivity_graph, weight='None')
    
    # 7: clustering coefficient
    metrics[6] = nwx.average_clustering(connectivity_graph, weight='weight')
    
    # 8: transitivity
    metrics[7] = nwx.transitivity(connectivity_graph)
    
    # 9 & 10: local and global efficiency
    metrics[8] = np.mean(bct.efficiency_wei(connectivity_matrix, local=True))
    metrics[9] = bct.efficiency_wei(connectivity_matrix, local=False)
    
    # 11: Clustering coefficient
    metrics[10] = np.mean(nwx.clustering(connectivity_graph, weight='weight').values())
    
    # 12 & 13: Betweeness centrality
    metrics[11] = np.mean(nwx.betweenness_centrality(distance_graph, weight='weight').values())
    metrics[12] = np.mean(nwx.current_flow_betweenness_centrality(distance_graph, weight='weight').values())
    
    # 14: Eigenvector centrality
    metrics[13] = np.mean(nwx.eigenvector_centrality(distance_graph, weight='weight').values())
    
    # 15: Closeness centrality
    metrics[14] = np.mean(nwx.closeness_centrality(distance_graph, distance='weight').values())
    
    # 16: PageRank
    metrics[15] = np.mean(nwx.pagerank(connectivity_graph, weight='weight').values())
    
    # 17: Rich club coefficient
    metrics[16] = np.mean(nwx.rich_club_coefficient(connectivity_graph).values())
    
    # 18: Density    
    metrics[17] = bct.density_und(connectivity_matrix)[0]
    
    # 19, 20, 21: Eccentricity, radius, diameter
    spl_all = nwx.shortest_path_length(distance_graph, weight='weight')
    eccs = np.zeros(90,)
    for i in range(90) :
        
        eccs[i] = np.max(spl_all[i].values())
        
    metrics[18] = np.mean(eccs)
    metrics[19] = np.min(eccs)
    metrics[20] = np.max(eccs)  
    
    return metrics
コード例 #50
0
ファイル: util.py プロジェクト: ishine/NLP-1
    sentences         --  列表,元素是句子
    words             --  二维列表,子列表和sentences中的句子对应,子列表由单词组成
    sim_func          --  计算两个句子的相似性,参数是两个由单词组成的列表
    pagerank_config   --  pagerank的设置
    """
    sorted_sentences = []
    _source = words
    sentences_num = len(_source)        
    graph = np.zeros((sentences_num, sentences_num))
    
    for x in xrange(sentences_num):
        for y in xrange(x, sentences_num):
			'''_source[x]是分完词之后的句子'''
            similarity = sim_func( _source[x], _source[y] )
			'''无向(双向)有权图,权重是2个句子间的相似性'''
            graph[x, y] = similarity
            graph[y, x] = similarity
            
    nx_graph = nx.from_numpy_matrix(graph)
	'''使用pagerank算法'''
    scores = nx.pagerank(nx_graph, **pagerank_config)              # this is a dict
    sorted_scores = sorted(scores.items(), key = lambda item: item[1], reverse=True)

    for index, score in sorted_scores:
        item = AttrDict(index=index, sentence=sentences[index], weight=score)
        sorted_sentences.append(item)

    return sorted_sentences

if __name__ == '__main__':
    pass
コード例 #51
0
def page_rank(n, page_graph):
    ranks = pagerank(page_graph, alpha=0.6)
    summary = sorted(ranks.keys(), key=lambda k: ranks[k], reverse=True)[:n]
    return summary, ranks
コード例 #52
0
 def add_pagerank(self, graph):
     pr = nx.pagerank(graph)
     nx.set_node_attributes(graph, 'zpagerank', pr)
コード例 #53
0
        vtotnot += len(vnot[com])
        vtot0 += len(vpos0[com]) - len(vneg0[com])
        vtot1 += sum(vpos1[com]) - sum(vneg1[com])
        vtot2 += sum(vpos2[com]) - sum(vneg2[com])
        tv[com] = sum(vpos2[com]) - sum(vneg2[com])
        Gp[com].add_edges_from(vpos0[com])
        Gprp.add_edges_from(vpos0[com])
        Gn[com].add_edges_from(vneg0[com])
        Gprn.add_edges_from(vneg0[com])
        GpT = nx.DiGraph.reverse(Gp[com], copy=True)
        GnT = nx.DiGraph.reverse(Gn[com], copy=True)
        nx.set_node_attributes(GpT, False, 'visited')
        nx.set_edge_attributes(GpT, False, 'visited')
        nx.set_node_attributes(GnT, False, 'visited')
        nx.set_edge_attributes(GnT, False, 'visited')
        prp = nx.pagerank(GpT)
        prn = nx.pagerank(GnT)
        pr[i] += prp[i] - prn[i]

        wpp = PageRank(GpT, 0.85)
        wpn = PageRank(GnT, 0.85)
        wp[i] += wpp[i] - wpn[i]

    sh[i] = vtotnot
    un[i] = vtot0
    tw[i] = vtot1
    di[i] = vtot2

    print(
        "NODE {}, color {}, vnot {}, v0node {}, v1node {}, v2node {} v3node {}"
        .format(i, G.node[i], vtotnot, vtot0, vtot1, vtot2, pr[i]))
コード例 #54
0

def cleanGraph(Gr):
    # takes gexf file
    # output: better indexed network, dict of node labels to easier indices
    inds = {}
    for i in list(Gr.nodes):
        inds[i] = Gr.nodes[i]
    Gr = nx.convert_node_labels_to_integers(Gr)
    return Gr, inds


day1 = nx.read_gexf("data/sp_data_school_day_1_g.gexf_")
G, inds = cleanGraph(day1)
#G = nx.gnp_random_graph(600,0.1)
pagerank = nx.pagerank(G)
bet = nx.betweenness_centrality(G)
close = nx.closeness_centrality(G)


def getMaxMinMid(centralities):
    # returns node label for max, min, mid
    import operator
    length = len(centralities)
    nodelist = list(sorted(centralities.items(), key=operator.itemgetter(1)))

    mmax = nodelist[length - 1][0]
    mmin = nodelist[0][0]
    mmid = nodelist[int(length / 2)][0]

    return mmax, mmin, mmid
コード例 #55
0
import pandas as pd
import numpy as np
import networkx as nx
import community

train = pd.read_table('./Data/training_set.txt',sep=' ',names=['source','target','link'])


G = nx.Graph()
train_link = train[train.link == 1][['source','target']]
G.add_edges_from(train_link.values)

id_degree_dict = G.degree()
id_bc_dict = nx.betweenness_centrality(G)
id_cluster_dict = nx.clustering(G)
id_pagerank_dict = nx.pagerank(G)

def Save_Obj(Obj,File_Name):    
    import pickle
    File = File_Name + '.pkl'
    output = open(File, 'wb')
    pickle.dump(Obj, output)
    output.close()

Save_Obj(id_degree_dict,'./Data/id_degree_dict')
Save_Obj(id_bc_dict,'./Data/id_bc_dict')
Save_Obj(id_cluster_dict,'./Data/id_cluster_dict')
Save_Obj(id_pagerank_dict,'./Data/id_pagerank_dict')

# community
コード例 #56
0
ファイル: graph.py プロジェクト: manthanb/medium-analysis
def page_rank(graph):

	return nx.pagerank(graph)
コード例 #57
0
def answer_five():
    # Your Code Here
    ranks = nx.pagerank(G2)
    return ranks['realclearpolitics.com']  # Your Answer Here
コード例 #58
0
        for w1 in sent1:
            for w2 in sent2:
                if w1 == w2:
                    # print(w1)
                    if w1 in word_scores:
                        print(w1, "-->", i, j)
                        weight = weight + word_scores[w1]

        print(weight)
        w = weight
        G.add_edge(temp1, temp2, weight=w)

nx.draw(G)
plt.show()

sent_scores = nx.pagerank(G, 0.85)

print(sent_scores)

print("\n\n\n final scores \n\n\n")

s = [(k, sent_scores[k])
     for k in sorted(sent_scores, key=sent_scores.get, reverse=True)]
for k, sent_scores[k] in s:
    v = sent_scores[k]
    print(k, v)

print("\n\n\n final summary \n\n\n")

size = minimum(10, len(s))
コード例 #59
0
G.add_edge(18,100)
G.add_edge(19,100)
G.add_edge(20,100)
G.add_edge(22,100)
G.add_edge(25,100)
G.add_edge(27,100)
G.add_edge(34,100)

G.add_edge(13,1)
G.add_edge(13,15)
G.add_edge(13,17)
G.add_edge(13,18)
G.add_edge(13,19)
G.add_edge(13,20)
G.add_edge(18,1)
G.add_edge(18,13)
G.add_edge(18,17)
G.add_edge(18,19)
G.add_edge(18,20)
G.add_edge(19,18)
G.add_edge(22,15)

print 'Degree: '
print G.degree(G.nodes())
print 'PG:'
print nx.pagerank(G)
pos=nx.circular_layout(G,dim=50, scale=100)
plt.clf()
nx.draw(G,with_labels=True)
plt.savefig('C:/Users/Branko/Desktop/VPJXQGSRWJZDOB-UHFFFAOYSA-O_oriented.png')
コード例 #60
0
 def rank(self):
     return networkx.pagerank(self.graph, weight='weight')