Ejemplo n.º 1
0
def nodes_centrality_page_rank():
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    sorted_PRankH = sorted(PRankH, key=lambda key: PRankH[key], reverse=True)
    # print top n nodes with highest PageRank
    for item in sorted_PRankH[0:5]:  #top 5
        print(item, PRankH[item])
def get_top_packages(graph_path, n):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)
    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now()))
    data_file = graph_name + "_pageranks"
    prank_hashtable = snap.TIntFltH()
    if not os.path.isfile(data_file):
        # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
        snap.GetPageRank(graph, prank_hashtable, 0.85)
        fout = snap.TFOut(data_file)
        prank_hashtable.Save(fout)
    else:
        fin = snap.TFIn(data_file)
        prank_hashtable.Load(fin)

    top_n = get_top_nodes_from_hashtable(prank_hashtable, n)
    top_n.sort(key=itemgetter(1))
    top_packages = []
    for pair in top_n:
        top_packages.append(id_pkg_dict[pair[0]])
    return top_packages
Ejemplo n.º 3
0
def compute_page_rank(graph):
    logging.info("compute pagerank")
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH)
    for item in PRankH:
        print item, PRankH[item]
    return PRankH
Ejemplo n.º 4
0
def pageRank(rankCommands, Graph, conn, cur):
    PRankH = snap.TIntFltH()
    before_time = time.time()
    snap.GetPageRank(Graph, PRankH)
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(PRankH, key=lambda key: PRankH[key], reverse=True)
    createTable(rankCommands, slist, PRankH, conn, cur)
def PageRank(d, e):
    f = open(d)
    s = f.read()
    s1 = re.split('\n', s)
    G1 = snap.PNGraph.New()
    PRankH = snap.TIntFltH()

    a = re.split(' ', s1[0])

    for i in range(0, int(a[0])):
        G1.AddNode(i)

    for i in range(1, int(a[1]) + 1):
        b = re.split(' ', s1[i])
        b0 = re.sub("\D", "", b[0])
        b1 = re.sub("\D", "", b[1])
        G1.AddEdge(int(b0), int(b1))

    snap.GetPageRank(G1, PRankH)

    EdgePara = dict()

    for i in range(1, int(a[1]) + 1):
        c = re.split(' ', s1[i])
        if PRankH[int(c[0])] == 0 and PRankH[int(c[1])] == 0:
            EdgePara[(int(c[0]), int(c[1]))] == 0
            EdgePara[(int(c[1]), int(c[0]))] == 0
        else:
            EdgePara[(int(c[0]), int(c[1]))] = e * PRankH[int(
                c[0])] / (PRankH[int(c[0])] + PRankH[int(c[1])])
            EdgePara[(int(c[1]), int(c[0]))] = e * PRankH[int(
                c[1])] / (PRankH[int(c[0])] + PRankH[int(c[1])])

    return EdgePara
Ejemplo n.º 6
0
 def rank_pagerank(self , C=0.85, Eps=1e-4, MaxIter=100):
     """ Return dictionary of node ID and its pagerank
     centrality score, in score order """
     PRankH = snap.TIntFltH()
     snap.GetPageRank(self._graph, PRankH, C, Eps, MaxIter)
     assert len(PRankH) == self._num_nodes, 'Number of nodes in centrality result must match number of nodes in graph'
     return snap_hash_to_dict(PRankH)
def get_page_rank_sum(G, n1, n2, reset=False):
    global PRankH
    if reset or PRankH is None:
        print 'Initializing Page Rank'
        PRankH = snap.TIntFltH()
        snap.GetPageRank(G, PRankH, 1e-2, 50)
    return PRankH[n1] + PRankH[n2]
Ejemplo n.º 8
0
def CalculatePageRank(graph, alpha, number_iteration):
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH, alpha, 1e-4, number_iteration)
    output = {}
    for item in PRankH:
        output[item] = PRankH[item]
    return output
Ejemplo n.º 9
0
def print_top_pr(G, K, iid_to_ingredient, Reverse=True):
    print 'PageRank:'
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    PageRank = sorted([(PRankH[item], item) for item in PRankH],
                      reverse=Reverse)
    for Rank, IId in PageRank[:K]:
        print '{}: {:.5f}'.format(iid_to_ingredient[IId], Rank)
Ejemplo n.º 10
0
def calc_PageRank(Graph, node_to_g):
    prot_to_PageRank = {}
    PRankH = snap.TIntFltH()
    snap.GetPageRank(Graph, PRankH)
    for node in PRankH:
        my_prot = node_to_g[node]
        prot_to_PageRank[my_prot] = PRankH[node]
    return prot_to_PageRank
Ejemplo n.º 11
0
def get_page_ranks(df, G, names_df):
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    ranks = []
    for item in PRankH:
        name = names_df[names_df["id"] == item]["name"].values[0]
        ranks.append((name, PRankH[item]))
    return sorted(ranks, key=lambda x: x[1], reverse=True)
Ejemplo n.º 12
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
def _get_pagerank(Graph, H, output_path):
    PRankH = snap.TIntFltH()
    snap.GetPageRank(Graph, PRankH)
    pr_list = list()
    for ID in PRankH:
        pr_list.append({'username': H.GetKey(ID), 'PR': PRankH[ID]})
    dataset = pd.DataFrame(pr_list)
    dataset = dataset[['username', 'PR']]
    dataset.to_csv(output_path, index=False, encoding='utf-8')
def PageRank(G):
    centrality = {}
    PRank = snap.TIntFltH()
    snap.GetPageRank(G, PRank)

    for item in PRank:
        centrality[item] = PRank[item]

    return centrality
Ejemplo n.º 15
0
    def compute_page_rank(self,
                          graph,
                          c: float = 0.85,
                          eps: float = 10e-4,
                          max_iter: int = 100):
        page_rank = snap.TIntFltH()
        snap.GetPageRank(graph, page_rank, c, eps, max_iter)

        return page_rank
Ejemplo n.º 16
0
    def getNodeAttributes(self,UGraph):

        attriList=[]
        for index in range(UGraph.GetNodes()):
            nodelist=[]
            attriList.append(nodelist)
            
            #page rank
        PRankH = snap.TIntFltH()
        snap.GetPageRank(UGraph, PRankH)
        counter=0
        for item in PRankH:
            attriList[counter].append(PRankH[item])
            counter+=1
            #HIN
        counter=0
        NIdHubH = snap.TIntFltH()
        NIdAuthH = snap.TIntFltH()
        snap.GetHits(UGraph, NIdHubH, NIdAuthH)
        for item in NIdHubH:
            attriList[counter].append(NIdHubH[item])
            attriList[counter].append(NIdAuthH[item])
            counter+=1

            # Betweenness Centrality 
        counter=0
        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()
        snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
        for node in Nodes:
            attriList[counter].append(Nodes[node])
            counter+=1

            # closeness centrality 
        counter=0
        for NI in UGraph.Nodes():
            CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId())
            attriList[counter].append(CloseCentr)
            counter+=1

            # farness centrality 
        counter=0
        for NI in UGraph.Nodes():
            FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId())
            attriList[counter].append(FarCentr)
            counter+=1

            # node eccentricity
        counter=0
        for NI in UGraph.Nodes():
            attriList[counter].append(snap.GetNodeEcc(UGraph, NI.GetId(), True))
            counter+=1

        atrriMarix=np.array(attriList)

        return atrriMarix
Ejemplo n.º 17
0
def getPageRank(Graph):
    nodeid_prank = {}
    prank = []
    PRankH = snap.TIntFltH()
    snap.GetPageRank(Graph, PRankH)
    for node in PRankH:
        nodeid_prank[node] = PRankH[node]
    for node_id in sorted(nodeid_prank):
        prank.append(nodeid_prank[node_id])
    return prank
def compute_pagerank(Graph):
    '''
    :param Graph: the graph to compute pagerank on
    :return: a list of tuple (pagerank_score, node_id) in descending order
    '''
    PRankH = snap.TIntFltH()
    snap.GetPageRank(Graph, PRankH)
    listPageRank = []
    for item in PRankH:
        listPageRank.append((PRankH[item], item))

    return sorted(listPageRank)[::-1]
Ejemplo n.º 19
0
def single_year_page_rank(df, id2names):
    years = sorted(df['year'].unique())
    page_ranks = {}
    for year in years:
        G = snap.TNEANet.New()
        cur_ranks = {}
        add_df_to_G(df[df['year'] == year], G, directed=True)
        PRankH = snap.TIntFltH()
        snap.GetPageRank(G, PRankH)
        for id in PRankH:
            cur_ranks[id2names[id]] = PRankH[id]
        page_ranks[year] = cur_ranks
    return page_ranks
Ejemplo n.º 20
0
def getUndirAttribute(filename, node_num, weighted=None, param=1.0):
    UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1)

    attributeNames = [
        'Graph', 'Id', 'Degree', 'NodeBetweennessCentrality', 'PageRank',
        'EgonetDegree', 'AvgNeighborDeg', 'EgonetConnectivity'
    ]
    if weighted:
        attributeNames += [
            'WeightedDegree', 'EgoWeightedDegree', 'AvgWeightedNeighborDeg',
            'EgonetWeightedConnectivity'
        ]

    attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))),
                              columns=attributeNames)

    attributes['Graph'] = [filename.split('/')[-1].split('.')[0]
                           ] * node_num  #node_num
    # Degree
    attributes['Id'] = range(0, node_num)
    degree = np.zeros((node_num, ))
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(UGraph, OutDegV)
    for item in OutDegV:
        degree[item.GetVal1()] = item.GetVal2()
    attributes['Degree'] = degree

    getEgoAttr(UGraph, node_num, attributes, directed=False)

    if weighted:
        df = getWeightedDegree(filename, node_num, attributes, directed=False)
        getWeightedEgoAttr(UGraph, node_num, attributes, df, directed=False)

    # Betweenness Centrality
    betCentr = np.zeros((node_num, ))
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(UGraph, Nodes, Edges, param)
    for node in Nodes:
        betCentr[node] = Nodes[node]
    attributes['NodeBetweennessCentrality'] = betCentr

    # PageRank
    pgRank = np.zeros((node_num, ))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(UGraph, PRankH)
    for item in PRankH:
        pgRank[item] = PRankH[item]
    attributes['PageRank'] = pgRank

    return attributes
def GetOverlap(filePathName, Graph, t):
    # l is here the final ranking of the nodes
    # Intially, we just put all the nodes in this
    # and afterwards we sort it
    l = [i for i in range(Graph.GetNodes())]

    # The reference vector whose information is used to sort l
    ref_vect = [0 for i in range(Graph.GetNodes())]

    # if Type 1, then fill ref_vect with closeness centrality measure
    if (t == 1):
        for NI in Graph.Nodes():
            ref_vect[NI.GetId()] = snap.GetClosenessCentr(Graph, NI.GetId())

    # if Type 2, then fill ref_vect with betweenness centrality measure
    if (t == 2):
        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()

        # Setting NodeFrac parameter as 0.8 as instructed
        snap.GetBetweennessCentr(Graph, Nodes, Edges, 0.8)
        for node in Nodes:
            ref_vect[node] = Nodes[node]

    # if Type 3, then fill ref_vect with PageRank scores
    if (t == 3):
        PRankH = snap.TIntFltH()

        # Taking the limit as 1e-6 as used in gen_centrality.py
        snap.GetPageRank(Graph, PRankH, 0.8, 1e-6, 100)
        for item in PRankH:
            ref_vect[item] = PRankH[item]

    # Now we sort l using the ref_vect
    l.sort(
        key=cmp_to_key(lambda item1, item2: ref_vect[item2] - ref_vect[item1]))

    # make a set containing top 100 nodes of l
    S1 = set(l[:100])

    # make another set containing top 100 nodes from the text files
    S2 = set()
    f = open(filePathName, 'r')
    for _ in range(100):
        s = f.readline()
        a, b = s.split()
        S2.add(int(a))

    # return the number of overlaps in S1 and S2
    return len(S1.intersection(S2))
Ejemplo n.º 22
0
def pageRank(graph, userId):

    Nodes = snap.TIntFltH()

    snap.GetPageRank(graph, Nodes)
    df = pd.DataFrame(columns=('Node', 'PageRank'))

    for node in Nodes:
        #print 'node: %d pageRank: %f' % (node, Nodes[node])
        df.loc[node] = [node, Nodes[node]]

    df.to_csv(write_DIR + 'pagerank_{}.csv'.format(userId),
              sep=',',
              index=False)
Ejemplo n.º 23
0
def _pageRankOverlap(elistPath, alpha=0.85):
    """
    Compute overlap between our values of PageRank centrality and SNAP's internal implementation

    Parameters
    ----------
    elistPath: str or pathlib.Path
        Edge list of the graph to compute centralities on

    alpha: float, default = 0.85
        Damping factor for PageRank computations
    ----------

    Returns
    ----------
    calculatedNodes: set
        Top 100 nodes by PageRank centrality according to our implementation

    SNAPNodes: set
        Top 100 nodes by PageRank centrality according to the SNAP implementation (snap.GetPageRank)

    len(overlap): int
        Count of overlapping nodes between the 2 sets
    ----------

    Reads from file our values of PageRank centrality and then calls the SNAP function
    Once we have 2 sets of top 100 nodes, perform a set.intersection() call for common elements between both sets
    """

    adjGraph = AdjGraph(elistPath, separator=" ")
    graph = adjGraph.SNAPGraph
    calculatedNodes = readNodes("pagerank.txt")

    SNAPPR = {}
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH, alpha)
    for item in PRankH:
        SNAPPR[item] = PRankH[item]

    SNAPPR = {
        k: v
        for k, v in sorted(SNAPPR.items(), key=lambda x: x[1], reverse=True)
    }

    SNAPNodes = list(SNAPPR.keys())[:100]
    SNAPNodes = set([int(node) for node in SNAPNodes])

    overlap = SNAPNodes.intersection(calculatedNodes)
    return (calculatedNodes, SNAPNodes, len(overlap))
def page_rank(input):
    id_to_login = get_user_id_to_login()

    print("Loading graph...")
    FIn = snap.TFIn(input)
    graph = snap.TNGraph.Load(FIn)

    print("Calculating page rank...")
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH)

    scores = sorted([(PRankH[item], item) for item in PRankH],
                    reverse=True)[:100]
    for i, (score, id) in enumerate(scores):
        print(i + 1, "&", id_to_login[id], "&", score, "\\\\")
Ejemplo n.º 25
0
def run(snap_graph):
    page_rank = snap.TIntFltH()
    snap.GetPageRank(snap_graph, page_rank)

    node_page_ranks = []
    node_page_rank_name_map = {}
    for node in page_rank:
        value = page_rank[node]
        if value not in node_page_ranks:
            node_page_ranks.append(value)
        node_page_rank_name_map[value] = step7.get_screen_name_from_hash(node)

    top_ten_page_ranks = heapq.nlargest(10, node_page_ranks)

    print "\nTop 10 page ranks from the merged graph =>"
    for top_node in top_ten_page_ranks:
        print node_page_rank_name_map[top_node], top_node
Ejemplo n.º 26
0
    def computePageRank(cls, graph, args_paths):
        """
        Calcolo del page rank sull'intero grafo.
        La funzione di page rank prende in input un hashtable vuota (int, float) e la riempie con i valori
        calcolati.
        Una volta calcolato il pagerank, lo salvo su file.

        :param cls
        :param graph: grafo su cui calcolare il pagerank
        :parma args_paths: dove salvare la table del page rank
        """
        params = {'C': 0.85, 'Eps': 1e-4, 'MaxIter': 100}

        table_rank = snap.TIntFltH()
        snap.GetPageRank(graph, table_rank, *params.values())

        snapSave(table_rank, args_paths.pagerank)
    def page_rank_score(self, C=0.85, Eps=1e-4, MaxIter=100):
        '''
        Computes the PageRank score of every node in Graph

        :param C: Damping factor.
        :param Eps: Convergence difference.
        :param MaxIter: Maximum number of iterations.
        
        '''
        snap = self.snap

        ret = []
        PRankH = snap.TIntFlt64H()
        snap.GetPageRank(self.graph, PRankH, C, Eps, MaxIter)
        for item in PRankH:
            ret.append((item, PRankH[item]))

        return ret
Ejemplo n.º 28
0
Archivo: bf.py Proyecto: eds000n/friend
def BCF(x):
    n = G.GetNodes()
    v = []
    for j in range(0, n):
        v.append(0)
    val = 100
    for j in range(0, n):
        if G.IsEdge(j + 1, x) and (ind[j + 1] > 1):
            G.DelEdge(j + 1, x)
            PRankH2 = snap.TIntFltH()
            snap.GetPageRank(G, PRankH2)
            v[j] = PRankH2[x]
            val = min(val, v[j])
            G.AddEdge(j + 1, x)
        else:
            v[j] = 100
    for j in range(0, n):
        if abs(val - v[j]) < 1e-8:
            return j + 1
Ejemplo n.º 29
0
def extract_top_nodes(edges_big_file, edges_extracted_file, top_n):

    graph = snap.LoadEdgeList(snap.PUNGraph, edges_big_file , 0, 1, '\t')
    total_node = graph.GetNodes()
    num_remove = total_node - top_n

    # Get page rank to extract top n:
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH)
    list_prank = []
    for item in PRankH:
        list_prank.append([item, PRankH[item]])
    
    list_prank = sorted(list_prank, key=lambda x: x[1]) # Sort by page rank
    remove_node = list_prank[: num_remove]

    for node in remove_node:
        graph.DelNode(node[0])
    snap.SaveEdgeList(graph, edges_extracted_file)
Ejemplo n.º 30
0
def main():
    network = snap.LoadEdgeList(
        snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1)
    Components = snap.TCnComV()
    snap.GetWccs(network, Components)
    print("The number of weakly connected components is %d" % Components.Len())
    MxWcc = snap.GetMxWcc(network)
    print(
        "The number of edges is %d and the number of nodes is %d in the largest weakly connected component."
        % (MxWcc.GetNodes(), MxWcc.GetEdges()))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(network, PRankH)
    PRankH.SortByDat(False)
    num = 0
    print(
        "IDs of the top 3 most central nodes in the network by PagePank scores. "
    )
    for item in PRankH:
        print(item, PRankH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(network, NIdHubH, NIdAuthH)
    NIdHubH.SortByDat(False)
    print("IDs of the top 3 hubs in the network by HITS scores. ")
    for item in NIdHubH:
        print(item, NIdHubH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdAuthH.SortByDat(False)
    print("IDs of top 3 authorities in the network by HITS scores. ")
    for item in NIdAuthH:
        print(item, NIdAuthH[item])
        num += 1
        if num == 3:
            num = 0
            break