Ejemplo n.º 1
0
def computeClusteringCoeff(G, NodeAttributes):
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)

    ClusterCoeffList = list()
    for nodeId in NIdCCfH:
        NodeAttributes[nodeId]['ClusterCoeff'] = NIdCCfH[nodeId]
        ClusterCoeffList.append((nodeId, NIdCCfH[nodeId]))

    ClusterCoeffList.sort(key=lambda x: x[1], reverse=True)
    minClusterCoeff = min(ClusterCoeffList, key=lambda x: x[1])[1]
    maxClusterCoeff = max(ClusterCoeffList, key=lambda x: x[1])[1]

    #
    # Sanity Check
    #
    print ClusterCoeffList[1], maxClusterCoeff, ClusterCoeffList[
        -1], minClusterCoeff

    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)
    ClusterCoeffList = list()
    for nodeId in NIdCCfH:
        clusterCoeff = NIdCCfH[nodeId]
        normClusterCoeff = (clusterCoeff - minClusterCoeff) / (
            maxClusterCoeff - minClusterCoeff)
        NodeAttributes[nodeId]['NormClusterCoeff'] = normClusterCoeff

    #print NodeAttributes[2012]
    return NodeAttributes
Ejemplo n.º 2
0
def get_hits_venues():
    mapping = snap.TStrIntSH()
    t0 = time()
    file_output_1 = open("paper_venues_hits_hub.txt", 'w')
    file_output_2 = open("paper_venues_hits_auth.txt", 'w')
    G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1,
                              mapping)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G0, NIdHubH, NIdAuthH, 1000)
    print("HITS time:", round(time() - t0, 3), "s")
    for item in NIdHubH:
        file_output_1.write(
            str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n')
    for item in NIdAuthH:
        file_output_2.write(
            str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n')
    # convert input string to node id
    # NodeId = mapping.GetKeyId("814DF491")
    # convert node id to input string
    # NodeName = mapping.GetKey(NodeId)
    # print "name", NodeName
    # print "id  ", NodeId
    print("finish hits!")
    file_output_1.close()
    file_output_2.close()
Ejemplo n.º 3
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
Ejemplo n.º 4
0
    def getNodeAttributes(self,UGraph):

        attriList=[]
        for index in range(UGraph.GetNodes()):
            nodelist=[]
            attriList.append(nodelist)
            
            #page rank
        PRankH = snap.TIntFltH()
        snap.GetPageRank(UGraph, PRankH)
        counter=0
        for item in PRankH:
            attriList[counter].append(PRankH[item])
            counter+=1
            #HIN
        counter=0
        NIdHubH = snap.TIntFltH()
        NIdAuthH = snap.TIntFltH()
        snap.GetHits(UGraph, NIdHubH, NIdAuthH)
        for item in NIdHubH:
            attriList[counter].append(NIdHubH[item])
            attriList[counter].append(NIdAuthH[item])
            counter+=1

            # Betweenness Centrality 
        counter=0
        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()
        snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
        for node in Nodes:
            attriList[counter].append(Nodes[node])
            counter+=1

            # closeness centrality 
        counter=0
        for NI in UGraph.Nodes():
            CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId())
            attriList[counter].append(CloseCentr)
            counter+=1

            # farness centrality 
        counter=0
        for NI in UGraph.Nodes():
            FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId())
            attriList[counter].append(FarCentr)
            counter+=1

            # node eccentricity
        counter=0
        for NI in UGraph.Nodes():
            attriList[counter].append(snap.GetNodeEcc(UGraph, NI.GetId(), True))
            counter+=1

        atrriMarix=np.array(attriList)

        return atrriMarix
Ejemplo n.º 5
0
def hits(graph_filename):
    # create graph
    name_id_map = snap.TStrIntSH()
    graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1,
                                 name_id_map)

    # run HITS algo
    id_hub_map = snap.TIntFltH()
    id_auth_map = snap.TIntFltH()
    snap.GetHits(graph, id_hub_map, id_auth_map, 1000)  # iterate 1000 times

    return name_id_map, id_hub_map, id_auth_map
Ejemplo n.º 6
0
def getUndirAttribute(filename, node_num, weighted=None, param=1.0):
    UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1)

    attributeNames = [
        'Graph', 'Id', 'Degree', 'NodeBetweennessCentrality', 'PageRank',
        'EgonetDegree', 'AvgNeighborDeg', 'EgonetConnectivity'
    ]
    if weighted:
        attributeNames += [
            'WeightedDegree', 'EgoWeightedDegree', 'AvgWeightedNeighborDeg',
            'EgonetWeightedConnectivity'
        ]

    attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))),
                              columns=attributeNames)

    attributes['Graph'] = [filename.split('/')[-1].split('.')[0]
                           ] * node_num  #node_num
    # Degree
    attributes['Id'] = range(0, node_num)
    degree = np.zeros((node_num, ))
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(UGraph, OutDegV)
    for item in OutDegV:
        degree[item.GetVal1()] = item.GetVal2()
    attributes['Degree'] = degree

    getEgoAttr(UGraph, node_num, attributes, directed=False)

    if weighted:
        df = getWeightedDegree(filename, node_num, attributes, directed=False)
        getWeightedEgoAttr(UGraph, node_num, attributes, df, directed=False)

    # Betweenness Centrality
    betCentr = np.zeros((node_num, ))
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(UGraph, Nodes, Edges, param)
    for node in Nodes:
        betCentr[node] = Nodes[node]
    attributes['NodeBetweennessCentrality'] = betCentr

    # PageRank
    pgRank = np.zeros((node_num, ))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(UGraph, PRankH)
    for item in PRankH:
        pgRank[item] = PRankH[item]
    attributes['PageRank'] = pgRank

    return attributes
Ejemplo n.º 7
0
    def compute_hub_authority_score(self, graph):
        # A hash table of int keys and float values (output).
        # The keys are the node ids and the values are the hub scores as outputed by the HITS algorithm.
        # Type: snap.TIntFltH
        hub_scores = snap.TIntFltH()

        # A hash table of int keys and float values (output)
        # The keys are the node ids and the values are the authority scores as outputed by the HITS algorithm.
        # Type: snap.TIntFltH
        authority_scores = snap.TIntFltH()

        snap.GetHits(graph, hub_scores, authority_scores)

        return hub_scores, authority_scores
def GetOverlap(filePathName, Graph, t):
    # l is here the final ranking of the nodes
    # Intially, we just put all the nodes in this
    # and afterwards we sort it
    l = [i for i in range(Graph.GetNodes())]

    # The reference vector whose information is used to sort l
    ref_vect = [0 for i in range(Graph.GetNodes())]

    # if Type 1, then fill ref_vect with closeness centrality measure
    if (t == 1):
        for NI in Graph.Nodes():
            ref_vect[NI.GetId()] = snap.GetClosenessCentr(Graph, NI.GetId())

    # if Type 2, then fill ref_vect with betweenness centrality measure
    if (t == 2):
        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()

        # Setting NodeFrac parameter as 0.8 as instructed
        snap.GetBetweennessCentr(Graph, Nodes, Edges, 0.8)
        for node in Nodes:
            ref_vect[node] = Nodes[node]

    # if Type 3, then fill ref_vect with PageRank scores
    if (t == 3):
        PRankH = snap.TIntFltH()

        # Taking the limit as 1e-6 as used in gen_centrality.py
        snap.GetPageRank(Graph, PRankH, 0.8, 1e-6, 100)
        for item in PRankH:
            ref_vect[item] = PRankH[item]

    # Now we sort l using the ref_vect
    l.sort(
        key=cmp_to_key(lambda item1, item2: ref_vect[item2] - ref_vect[item1]))

    # make a set containing top 100 nodes of l
    S1 = set(l[:100])

    # make another set containing top 100 nodes from the text files
    S2 = set()
    f = open(filePathName, 'r')
    for _ in range(100):
        s = f.readline()
        a, b = s.split()
        S2.add(int(a))

    # return the number of overlaps in S1 and S2
    return len(S1.intersection(S2))
Ejemplo n.º 9
0
def calc_HubAndAuthorityScores(Graph, node_to_g):
    ## calculate Hub and Authority scores for nodes in the graph.
    prot_to_hub = {}
    prot_to_authority = {}
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(Graph, NIdHubH, NIdAuthH)
    for node in NIdHubH:
        my_prot = node_to_g[node]
        prot_to_hub[my_prot] = NIdHubH[node]
    for node in NIdAuthH:
        my_prot = node_to_g[node]
        prot_to_authority[my_prot] = NIdAuthH[node]
    return (prot_to_hub, prot_to_authority)
Ejemplo n.º 10
0
def HITS(G):
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    max = 0.0
    for item in NIdHubH:
        if NIdHubH[item] > max:
            max = NIdHubH[item]
            print item, NIdHubH[item]

    max = 0.0
    for item in NIdAuthH:
        if NIdAuthH[item] > max:
            max = NIdAuthH[item]
            print item, NIdAuthH[item]
Ejemplo n.º 11
0
def pageRank(rankCommands, Graph, conn, cur):
    PRankH = snap.TIntFltH()
    before_time = time.time()
    snap.GetPageRank(Graph, PRankH)
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(PRankH, key=lambda key: PRankH[key], reverse=True)
    createTable(rankCommands, slist, PRankH, conn, cur)
def get_ev_centr_sum(G, n1, n2, reset=False):
    global NIdEigenH
    if reset or NIdEigenH is None:
        print 'Initializing EV Centrality...'
        NIdEigenH = snap.TIntFltH()
        snap.GetEigenVectorCentr(G, NIdEigenH, 1e-2, 50)
    return NIdEigenH[n1] + NIdEigenH[n2]
Ejemplo n.º 13
0
 def generate_scores(self):
     scores = {}
     common_neighbor_scores = {}
     for e in self.g.Edges():
         # common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = snap.GetCmnNbrs(self.g, e.GetSrcNId(), e.GetDstNId())
         n1 = snap.TIntV()
         n2 = snap.TIntV()
         snap.GetNodesAtHop(self.g, e.GetSrcNId(), 1, n1, True)
         snap.GetNodesAtHop(self.g, e.GetDstNId(), 1, n2, True)
         common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = len(set(n1) & set(n2))
     Nodes = snap.TIntFltH()
     Edges = snap.TIntPrFltH()
     snap.GetBetweennessCentr(self.g, Nodes, Edges, self.node_frac, True)
     edge_betweenness_scores = {}
     for e in Edges:
         edge_betweenness_scores[(e.GetVal1(), e.GetVal2())] = Edges[e]
     max_cn = max(common_neighbor_scores.values())
     max_eb = max(edge_betweenness_scores.values())
     print(common_neighbor_scores)
     print(edge_betweenness_scores)
     for e in self.g.Edges():
         src = e.GetSrcNId()
         dst = e.GetDstNId()
         scores[(src, dst)] = self.l * common_neighbor_scores[(src,dst)] / max_cn + (1-self.l) * edge_betweenness_scores[(src,dst)] / max_eb
     return scores
Ejemplo n.º 14
0
def computeBetweenessCentrality(G, NodeAttributes):
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    BetweenessNodeList = list()
    BetweenessEdgeList = list()

    snap.GetBetweennessCentr(G, Nodes, Edges, 1.0)
    for node in Nodes:
        NodeAttributes[node]['Betweeness'] = Nodes[node]
        BetweenessNodeList.append((node, Nodes[node]))

    for edge in Edges:
        #print "edge: (%d, %d) centrality: %f" % (edge.GetVal1(), edge.GetVal2(), Edges[edge])
        BetweenessEdgeList.append(
            (edge.GetVal1(), edge.GetVal2(), Edges[edge]))

    BetweenessNodeList.sort(key=lambda x: x[1], reverse=True)
    BetweenessEdgeList.sort(key=lambda x: x[2], reverse=True)

    #print BetweenessNodeList[0], BetweenessNodeList[-1]

    minBetweeness = BetweenessNodeList[-1][1]
    maxBetweeness = BetweenessNodeList[0][1]
    for (node, betweeness) in BetweenessNodeList:
        normBetweeness = (betweeness - minBetweeness) / (maxBetweeness -
                                                         minBetweeness)
        NodeAttributes[node]['normBetweeness'] = normBetweeness

    #print NodeAttributes[1669]
    #print NodeAttributes[884]

    return NodeAttributes
def get_page_rank_sum(G, n1, n2, reset=False):
    global PRankH
    if reset or PRankH is None:
        print 'Initializing Page Rank'
        PRankH = snap.TIntFltH()
        snap.GetPageRank(G, PRankH, 1e-2, 50)
    return PRankH[n1] + PRankH[n2]
Ejemplo n.º 16
0
def nodes_centrality_page_rank():
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    sorted_PRankH = sorted(PRankH, key=lambda key: PRankH[key], reverse=True)
    # print top n nodes with highest PageRank
    for item in sorted_PRankH[0:5]:  #top 5
        print(item, PRankH[item])
Ejemplo n.º 17
0
def CalculateClusteringCoefficient(graph):
    #output={}
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(graph, NIdCCfH)
    print "CLUSTERRING COEFFICIENT"
    for item in NIdCCfH:
        print "Node %d th have coefficient %f" % (item, NIdCCfH[item])
Ejemplo n.º 18
0
def RW_iteration(graph, PRankH, C=0.85):
 
    # Performs one Random Walk
    
    PRankH_temp = snap.TIntFltH()

    # Step 1: calculate new page ranks from in-nodes
    # This new page rank is 'dampened' by a factor C, usually about 0.85

    for i in PRankH:
        node_id = i.GetKey()
        PR = get_in_node_PR_weight(graph, node_id, PRankH)
        PRankH_temp.AddDat(node_id, PR * C)
        
    # Step 2: The total rank lost to leakage is calculated (sum)
    # The leaked value is then apportioned to all nodes by adding leakage/|N| to each node

    sum = diff = NewVal = 0.00
    
    for i in PRankH_temp:
        sum += i.GetDat()
    
    leaked = (1 - sum)/float(graph.GetNodes())

    for i in PRankH:
        NewVal = PRankH_temp(i.GetKey()) + leaked
        diff += abs(PRankH(i.GetKey()) - NewVal)
        PRankH.AddDat(i.GetKey(), NewVal)

    print diff

    # Return value is the 'difference' in value between the new PRs and the old ones.
    # After this value goes below some threshold, we will want to stop iterating random walks

    return diff
def PageRank(d, e):
    f = open(d)
    s = f.read()
    s1 = re.split('\n', s)
    G1 = snap.PNGraph.New()
    PRankH = snap.TIntFltH()

    a = re.split(' ', s1[0])

    for i in range(0, int(a[0])):
        G1.AddNode(i)

    for i in range(1, int(a[1]) + 1):
        b = re.split(' ', s1[i])
        b0 = re.sub("\D", "", b[0])
        b1 = re.sub("\D", "", b[1])
        G1.AddEdge(int(b0), int(b1))

    snap.GetPageRank(G1, PRankH)

    EdgePara = dict()

    for i in range(1, int(a[1]) + 1):
        c = re.split(' ', s1[i])
        if PRankH[int(c[0])] == 0 and PRankH[int(c[1])] == 0:
            EdgePara[(int(c[0]), int(c[1]))] == 0
            EdgePara[(int(c[1]), int(c[0]))] == 0
        else:
            EdgePara[(int(c[0]), int(c[1]))] = e * PRankH[int(
                c[0])] / (PRankH[int(c[0])] + PRankH[int(c[1])])
            EdgePara[(int(c[1]), int(c[0]))] = e * PRankH[int(
                c[1])] / (PRankH[int(c[0])] + PRankH[int(c[1])])

    return EdgePara
Ejemplo n.º 20
0
def get_betweenness_centr(net, label, outpath):
    """
    get betweenness centrality.
    :param net:
    :param label:
    :param outpath:
    :return:
    """
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(net, Nodes, Edges, 1.0)
    node_betweenness_centr_file = open(outpath + label + '-node_btweennesss_centr', 'w')
    node_betweenness_centr_top_file = open(outpath + label + '-node_betweenness_centr-top100', 'w')
    node_betweenness_centr = {}
    for item in Nodes:
        node_betweenness_centr[item] = Nodes[item]
    node_betweenness_centr = sorted(node_betweenness_centr.items(), key=operator.itemgetter(1), reverse=True)
    id, value = zip(*node_betweenness_centr)
    for i in range(len(id)):
        node_betweenness_centr_file.write(str(id[i]) + '\t' + str(value[i]) + '\n')
    for i in range(100):
        node_betweenness_centr_top_file.write(str(id[i]) + '\t' + str(value[i]) + '\n')
    node_betweenness_centr_file.close()
    node_betweenness_centr_top_file.close()
    return id, value
Ejemplo n.º 21
0
def CalculatePageRank(graph, alpha, number_iteration):
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH, alpha, 1e-4, number_iteration)
    output = {}
    for item in PRankH:
        output[item] = PRankH[item]
    return output
Ejemplo n.º 22
0
    def compute_betwenness_centrality(self, graph):
        nodes_betweenness_centrality = snap.TIntFltH()
        edges_betweenness_centrality = snap.TIntPrFltH()
        snap.GetBetweennessCentr(graph, nodes_betweenness_centrality,
                                 edges_betweenness_centrality, 1.0)

        return nodes_betweenness_centrality, edges_betweenness_centrality
Ejemplo n.º 23
0
    def getCentr(self, centrMethod):
        nodesKeysCentrVals = snap.TIntFltH()
        for node in self.G.Nodes():
            centrValue = centrMethod(self.G, node.GetId())
            nodesKeysCentrVals[node.GetId()] = centrValue

        return nodesKeysCentrVals
Ejemplo n.º 24
0
 def rank_eigvec(self):
     """ Return dictionary of node ID and its eigenvector
     centrality score, in score order """
     NIdEigenH = snap.TIntFltH()
     snap.GetEigenVectorCentr(self._graph, NIdEigenH)
     assert len(NIdEigenH) == self._num_nodes, 'Number of nodes in centrality result must match number of nodes in graph'
     return snap_hash_to_dict(NIdEigenH)
Ejemplo n.º 25
0
def compute_page_rank(graph):
    logging.info("compute pagerank")
    PRankH = snap.TIntFltH()
    snap.GetPageRank(graph, PRankH)
    for item in PRankH:
        print item, PRankH[item]
    return PRankH
Ejemplo n.º 26
0
 def rank_pagerank(self , C=0.85, Eps=1e-4, MaxIter=100):
     """ Return dictionary of node ID and its pagerank
     centrality score, in score order """
     PRankH = snap.TIntFltH()
     snap.GetPageRank(self._graph, PRankH, C, Eps, MaxIter)
     assert len(PRankH) == self._num_nodes, 'Number of nodes in centrality result must match number of nodes in graph'
     return snap_hash_to_dict(PRankH)
Ejemplo n.º 27
0
    def getBetweennessCentr(self):
        nodesKeyCentrVals = snap.TIntFltH()
        edgesKeyCentrVals = snap.TIntPrFltH()
        snap.GetBetweennessCentr(self.G, nodesKeyCentrVals, edgesKeyCentrVals,
                                 1.0)

        return nodesKeyCentrVals
Ejemplo n.º 28
0
def betweenness_test(name):
    if os.path.isfile(DATA_PATH + name + ".between"):
        print "Skipping", name
        return

    start = time.time()

    G, coords = osmParser.simpleLoadFromFile(name)

    print "Calculating betweenness", name

    nodeToBetweenness = snap.TIntFltH()
    edgeToBetweenness = snap.TIntPrFltH()
    snap.GetBetweennessCentr(G, nodeToBetweenness, edgeToBetweenness, 0.25)

    betweenness = {}
    for node in nodeToBetweenness:
        betweenness[node] = nodeToBetweenness[node]

    betweenOut = open(DATA_PATH + name + ".between", 'w')
    pickle.dump(betweenness, betweenOut, 1)

    plotTopK(name, betweenness, coords, "GnBu")

    end = time.time()
    print "took", end - start, "seconds"
Ejemplo n.º 29
0
def pageRank_components(g):
    print 'executing pagerank components ---- getting components for page rank'
    Components = snap.TCnComV()
    snap.GetWccs(g, Components)
    f = open('component_pr.txt', 'w')
    cgraphs = []
    for com in Components:
        v = snap.TIntV()
        for ni in com:
            v.Add(ni)
        cgraphs.append(snap.GetSubGraph_PNGraph(g, v))

    print 'components retrived for pagerank'
    f.write('Total components:' + str(len(cgraphs)) + '\n')
    for graph in cgraphs:
        if graph.GetNodes() == 2:
            continue
        sprank = snap.TIntFltH()
        snap.GetPageRank_PNGraph(graph, sprank)
        sprank.SortByDat(False)
        f.write(
            str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) +
            '\n')
    f.close()
    print 'finished writing pagerank components values'
def get_top_packages(graph_path, n):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)
    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # print("{0} Computing top {0} nodes with highest pagerank".format(n, datetime.datetime.now()))
    data_file = graph_name + "_pageranks"
    prank_hashtable = snap.TIntFltH()
    if not os.path.isfile(data_file):
        # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
        snap.GetPageRank(graph, prank_hashtable, 0.85)
        fout = snap.TFOut(data_file)
        prank_hashtable.Save(fout)
    else:
        fin = snap.TFIn(data_file)
        prank_hashtable.Load(fin)

    top_n = get_top_nodes_from_hashtable(prank_hashtable, n)
    top_n.sort(key=itemgetter(1))
    top_packages = []
    for pair in top_n:
        top_packages.append(id_pkg_dict[pair[0]])
    return top_packages