Esempio n. 1
0
 def pagerank(self):
 # 	from networkx.algorithms.link_analysis import pagerank_scipy
 # 	from networkx.algorithms.link_analysis import pagerank_numpy
 	from networkx.algorithms.link_analysis import pagerank
 	from time import time
 	try:
 		start = time()
 # 		pagerank(graph, max_iter=1000) # 1.7s for #nodes = 2500
 		pagerank(self.graph, max_iter=1000, nstart=self.page_ranked) # 0.2-1.5s for #node = 2500
 # 		pagerank_scipy(graph) # 1.0s for #nodes = 2500
 # 		pagerank_numpy(graph) # > 30s if #nodes > 1000
 		print "Pagerank took: %f seconds" % (time()-start)
 	except ZeroDivisionError:
 		print "ZeroDivisionError in pagerank"
 
 	page_ranked_sorted = sorted(self.page_ranked.items(), key=lambda x: x[1], reverse=True)
 	print page_ranked_sorted[:4]
 	
 # 	from networkx.algorithms.centrality import *
 
 # 	start = time()
 # 	degree_centrality = degree_centrality(graph) # 0.003s for 1500 nodes
 # 	print "Degree centrality took: %f seconds" % (time()-start)	
 # 		
 # 	start = time()
 # 	closeness_centrality = closeness_centrality(graph) # 4s for 1500 nodes
 # 	print "Closeness centrality took: %f seconds" % (time()-start)	
 # 
 # 	start = time()
 # 	betweenness_centrality = betweenness_centrality(graph) # 18s for 1500 nodes
 # 	print "Betweenness centrality took: %f seconds" % (time()-start)	
 
 	return self.page_ranked
def graph_2():
    G = nx.nx.DiGraph()
    # G.add_nodes_from([2, 3, 5, 6, 7])
    G.add_edges_from([[2, 3], [5, 3], [6, 7], [7, 2], [5, 7]])
    # G.add_path([2,3,6,7])
    # G.add_path([2,4,5])
    # print(list(G.nodes()))
    print(list(G.edges()))
    print(list(G.out_degree()))
    print(list(G.in_degree()))
    print(centrality.in_degree_centrality(G))
    print(link_analysis.pagerank(G, personalization={2: -4}))
    print(link_analysis.pagerank(G, dangling={5: 0, 7: 1}))
Esempio n. 3
0
 def rank_it(self):
     rank = la.pagerank(self.pg.graph, max_iter=self.simulations, alpha=self.p_restart)
     
     print "\t".join(["id","pagerank","freq-abs","freq-rel","centrality"])
     for k, v in sorted(rank.items(), key=lambda q: q[1], reverse=True):
         print "%s:\t%s\t%s\t%s\t%s" % (k, v,
                                        self.pg.freq[k],
                                        self.pg.freq[k]/float(self.pg.total),
                                        self.pg.central[k])
Esempio n. 4
0
    def pagerank(self):
        # 	from networkx.algorithms.link_analysis import pagerank_scipy
        # 	from networkx.algorithms.link_analysis import pagerank_numpy
        from networkx.algorithms.link_analysis import pagerank
        from time import time
        try:
            start = time()
            # 		pagerank(graph, max_iter=1000) # 1.7s for #nodes = 2500
            pagerank(self.graph, max_iter=1000,
                     nstart=self.page_ranked)  # 0.2-1.5s for #node = 2500
            # 		pagerank_scipy(graph) # 1.0s for #nodes = 2500
            # 		pagerank_numpy(graph) # > 30s if #nodes > 1000
            print "Pagerank took: %f seconds" % (time() - start)
        except ZeroDivisionError:
            print "ZeroDivisionError in pagerank"

        page_ranked_sorted = sorted(self.page_ranked.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
        print page_ranked_sorted[:4]

        # 	from networkx.algorithms.centrality import *

        # 	start = time()
        # 	degree_centrality = degree_centrality(graph) # 0.003s for 1500 nodes
        # 	print "Degree centrality took: %f seconds" % (time()-start)
        #
        # 	start = time()
        # 	closeness_centrality = closeness_centrality(graph) # 4s for 1500 nodes
        # 	print "Closeness centrality took: %f seconds" % (time()-start)
        #
        # 	start = time()
        # 	betweenness_centrality = betweenness_centrality(graph) # 18s for 1500 nodes
        # 	print "Betweenness centrality took: %f seconds" % (time()-start)

        return self.page_ranked
Esempio n. 5
0
 def compute_pagerank(self):
     self.kg = KG(self.train_facts,
                  entity_num=len(self.entity_dict),
                  relation_num=len(self.relation_dict))
     graph = networkx.DiGraph(self.kg.to_networkx())
     print("Begin to compute pagerank")
     self.pagerank = pagerank(graph)
     self.pagerank = [
         self.pagerank[entity] for entity in range(len(self.pagerank))
     ]
     print("Begin to save pagerank")
     with open(os.path.join(self.data_directory, "pagerank.txt"),
               "w") as output:
         for value in self.pagerank:
             output.write("{}\n".format(value))
     print("Complete save pagerank")
Esempio n. 6
0
    def rank_it(self, out):

        f_out = open(out, "w")

        rank = la.pagerank(self.graph, personalization=self.personalize,
                           max_iter=self.simulations, alpha=self.p_restart)

        size_of_rank = len(rank)

        f_out.write("\t".join(["id", "pagerank", "odds", "freq-abs", "freq-rel"]) + "\n")

        for k, v in sorted(rank.items(), key=lambda q: q[1],
                           reverse=True):
            f_out.write("%s:\t%s\t%s\t%s\t%s\n" % (k, v,
                                                   float(v) * float(size_of_rank),
                                                   self.freq[k],
                                                   self.freq[k]/float(self.total)))

        f_out.close()
Esempio n. 7
0
def pagerank_worker(graph, page_ranked):
    print "Pagerank on graph with %d nodes and %d edges." \
              % (len(graph.nodes()), \
                 len(graph.edges()))
    for node in graph.nodes():
        page_ranked.setdefault(node, 1)
        
    from networkx.algorithms.link_analysis import pagerank
    from time import time
    
    try:
        start = time()
        page_ranked = pagerank(graph, max_iter=1000, nstart=page_ranked) # 0.2-1.5s for #node = 2500
        print "Pagerank took: %f seconds" % (time()-start)
    except ZeroDivisionError:
        print "ZeroDivisionError in pagerank"
    
    page_ranked_sorted = sorted(page_ranked.items(), key=lambda x: x[1], reverse=True)
    print page_ranked_sorted[:4]
Esempio n. 8
0
def pagerank_worker(graph, page_ranked):
    print "Pagerank on graph with %d nodes and %d edges." \
              % (len(graph.nodes()), \
                 len(graph.edges()))
    for node in graph.nodes():
        page_ranked.setdefault(node, 1)

    from networkx.algorithms.link_analysis import pagerank
    from time import time

    try:
        start = time()
        page_ranked = pagerank(graph, max_iter=1000,
                               nstart=page_ranked)  # 0.2-1.5s for #node = 2500
        print "Pagerank took: %f seconds" % (time() - start)
    except ZeroDivisionError:
        print "ZeroDivisionError in pagerank"

    page_ranked_sorted = sorted(page_ranked.items(),
                                key=lambda x: x[1],
                                reverse=True)
    print page_ranked_sorted[:4]
Esempio n. 9
0
        with open(graph_dump_filename, "r") as dump:
            graph = cPickle.load(dump)

    return graph


if __name__ == "__main__":

    G = get_graph()

    result = {"pagerank_ids": [], "harmonic_ids": []}

    logger.info('graph nodes %d' % G.number_of_nodes())

    logger.info('harmonic')
    harmonic = hc(G)
    sorted_h = sorted(harmonic.items(),
                      key=operator.itemgetter(1),
                      reverse=True)
    result['harmonic_ids'] = map(lambda x: str(x[0]), sorted_h[0:200])

    logger.info('pr')
    pr = pagerank(G, alpha=0.8507246376811566)
    sorted_pr = sorted(pr.items(), key=operator.itemgetter(1), reverse=True)
    result['pagerank_ids'] = map(lambda x: str(x[0]), sorted_pr[0:200])

    with open('lab6centralities.json', 'w') as f:
        f.write(json.dumps(result))
        f.close()

    logger.info('finish')