Esempio n. 1
0
def get_hits_venues():
    mapping = snap.TStrIntSH()
    t0 = time()
    file_output_1 = open("paper_venues_hits_hub.txt", 'w')
    file_output_2 = open("paper_venues_hits_auth.txt", 'w')
    G0 = snap.LoadEdgeListStr(snap.PNGraph, "paperid_venueid_ref.txt", 0, 1,
                              mapping)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G0, NIdHubH, NIdAuthH, 1000)
    print("HITS time:", round(time() - t0, 3), "s")
    for item in NIdHubH:
        file_output_1.write(
            str(mapping.GetKey(item)) + "," + str(NIdHubH[item]) + '\n')
    for item in NIdAuthH:
        file_output_2.write(
            str(mapping.GetKey(item)) + "," + str(NIdAuthH[item]) + '\n')
    # convert input string to node id
    # NodeId = mapping.GetKeyId("814DF491")
    # convert node id to input string
    # NodeName = mapping.GetKey(NodeId)
    # print "name", NodeName
    # print "id  ", NodeId
    print("finish hits!")
    file_output_1.close()
    file_output_2.close()
Esempio n. 2
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
Esempio n. 3
0
    def getNodeAttributes(self,UGraph):

        attriList=[]
        for index in range(UGraph.GetNodes()):
            nodelist=[]
            attriList.append(nodelist)
            
            #page rank
        PRankH = snap.TIntFltH()
        snap.GetPageRank(UGraph, PRankH)
        counter=0
        for item in PRankH:
            attriList[counter].append(PRankH[item])
            counter+=1
            #HIN
        counter=0
        NIdHubH = snap.TIntFltH()
        NIdAuthH = snap.TIntFltH()
        snap.GetHits(UGraph, NIdHubH, NIdAuthH)
        for item in NIdHubH:
            attriList[counter].append(NIdHubH[item])
            attriList[counter].append(NIdAuthH[item])
            counter+=1

            # Betweenness Centrality 
        counter=0
        Nodes = snap.TIntFltH()
        Edges = snap.TIntPrFltH()
        snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
        for node in Nodes:
            attriList[counter].append(Nodes[node])
            counter+=1

            # closeness centrality 
        counter=0
        for NI in UGraph.Nodes():
            CloseCentr = snap.GetClosenessCentr(UGraph, NI.GetId())
            attriList[counter].append(CloseCentr)
            counter+=1

            # farness centrality 
        counter=0
        for NI in UGraph.Nodes():
            FarCentr = snap.GetFarnessCentr(UGraph, NI.GetId())
            attriList[counter].append(FarCentr)
            counter+=1

            # node eccentricity
        counter=0
        for NI in UGraph.Nodes():
            attriList[counter].append(snap.GetNodeEcc(UGraph, NI.GetId(), True))
            counter+=1

        atrriMarix=np.array(attriList)

        return atrriMarix
Esempio n. 4
0
def hits(graph_filename):
    # create graph
    name_id_map = snap.TStrIntSH()
    graph = snap.LoadEdgeListStr(snap.PNGraph, graph_filename, 0, 1,
                                 name_id_map)

    # run HITS algo
    id_hub_map = snap.TIntFltH()
    id_auth_map = snap.TIntFltH()
    snap.GetHits(graph, id_hub_map, id_auth_map, 1000)  # iterate 1000 times

    return name_id_map, id_hub_map, id_auth_map
Esempio n. 5
0
def calc_HubAndAuthorityScores(Graph, node_to_g):
    ## calculate Hub and Authority scores for nodes in the graph.
    prot_to_hub = {}
    prot_to_authority = {}
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(Graph, NIdHubH, NIdAuthH)
    for node in NIdHubH:
        my_prot = node_to_g[node]
        prot_to_hub[my_prot] = NIdHubH[node]
    for node in NIdAuthH:
        my_prot = node_to_g[node]
        prot_to_authority[my_prot] = NIdAuthH[node]
    return (prot_to_hub, prot_to_authority)
Esempio n. 6
0
    def compute_hub_authority_score(self, graph):
        # A hash table of int keys and float values (output).
        # The keys are the node ids and the values are the hub scores as outputed by the HITS algorithm.
        # Type: snap.TIntFltH
        hub_scores = snap.TIntFltH()

        # A hash table of int keys and float values (output)
        # The keys are the node ids and the values are the authority scores as outputed by the HITS algorithm.
        # Type: snap.TIntFltH
        authority_scores = snap.TIntFltH()

        snap.GetHits(graph, hub_scores, authority_scores)

        return hub_scores, authority_scores
Esempio n. 7
0
def HITS(G):
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    max = 0.0
    for item in NIdHubH:
        if NIdHubH[item] > max:
            max = NIdHubH[item]
            print item, NIdHubH[item]

    max = 0.0
    for item in NIdAuthH:
        if NIdAuthH[item] > max:
            max = NIdAuthH[item]
            print item, NIdAuthH[item]
Esempio n. 8
0
def main():
    network = snap.LoadEdgeList(
        snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1)
    Components = snap.TCnComV()
    snap.GetWccs(network, Components)
    print("The number of weakly connected components is %d" % Components.Len())
    MxWcc = snap.GetMxWcc(network)
    print(
        "The number of edges is %d and the number of nodes is %d in the largest weakly connected component."
        % (MxWcc.GetNodes(), MxWcc.GetEdges()))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(network, PRankH)
    PRankH.SortByDat(False)
    num = 0
    print(
        "IDs of the top 3 most central nodes in the network by PagePank scores. "
    )
    for item in PRankH:
        print(item, PRankH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(network, NIdHubH, NIdAuthH)
    NIdHubH.SortByDat(False)
    print("IDs of the top 3 hubs in the network by HITS scores. ")
    for item in NIdHubH:
        print(item, NIdHubH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdAuthH.SortByDat(False)
    print("IDs of top 3 authorities in the network by HITS scores. ")
    for item in NIdAuthH:
        print(item, NIdAuthH[item])
        num += 1
        if num == 3:
            num = 0
            break
def compute_HITS(Graph):
    '''
    :param Graph: the graph to compute HITS on
    :return:
        1. list of tuple (hub_score, node_id) in descending order
        2. list of tuple (authority_score, node_id) in descending order
    '''
    NIdHubH = snap.TIntFltH()  # placeholder for hub
    NIdAuthH = snap.TIntFltH()  # placeholder for authority
    snap.GetHits(Graph, NIdHubH, NIdAuthH)

    listAuth = []
    listHub = []

    for item in NIdHubH:
        listHub.append((NIdHubH[item], item))
    for item in NIdAuthH:
        listAuth.append((NIdAuthH[item], item))

    return sorted(listHub)[::-1], sorted(listAuth)[::-1]
    def hubs_and_authorities_score(self, MaxIter=20):
        '''
        Computes the Hubs and Authorities score of every node in Graph

        return tuple of hubs score and authorrities score
        :param MaxIter: Maximum number of iterations.
        
        '''
        snap = self.snap

        ret1 = []
        ret2 = []
        NIdHubH = snap.TIntFlt64H()
        NIdAuthH = snap.TIntFlt64H()
        snap.GetHits(self.graph, NIdHubH, NIdAuthH, MaxIter)
        for item in NIdHubH:
            ret1.append((item, NIdHubH[item]))
        for item in NIdAuthH:
            ret2.append((item, NIdAuthH[item]))

        return ret1, ret2
def get_hits(net, label, outpath):
    """
    get hits centrality. For directed graph
    :param net:
    :param label:
    :param outpath:
    :return:
    """
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(net, NIdHubH, NIdAuthH)
    hub_file = open(outpath + label + '-hub', 'w')
    hub_top_file = open(outpath + label + '-hub-top100', 'w')
    authority_file = open(outpath + label + '-authority', 'w')
    authority_top_file = open(outpath + label + '-authority-top100', 'w')
    # process hub
    hub = {}
    for item in NIdHubH:
        hub[item] = NIdHubH[item]
    hub = sorted(hub.items(), key=operator.itemgetter(1), reverse=True)
    hub_id, hub_value = zip(*hub)
    for i in range(len(hub_id)):
        hub_file.write(str(hub_id[i]) + '\t' + str(hub_value[i]) + '\n')
    for i in range(100):
        hub_top_file.write(str(hub_id[i]) + '\t' + str(hub_value[i]) + '\n')
    # process authority
    authority = {}
    for item in NIdAuthH:
        authority[item] = NIdAuthH[item]
    authority = sorted(authority.items(), key=operator.itemgetter(1), reverse=True)
    authority_id, authority_value = zip(*authority)
    for i in range(len(authority_id)):
        authority_file.write(str(authority_id[i]) + '\t' + str(authority_value[i]) + '\n')
    for i in range(100):
        authority_top_file.write(str(authority_id[i]) + '\t' + str(authority_value[i]) + '\n')
    hub_file.close()
    hub_top_file.close()
    authority_file.close()
    authority_top_file.close()
    return hub, authority
Esempio n. 12
0
def q3():
    G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)

    components = snap.TCnComV()
    snap.GetWccs(G, components)
    print("Number of WCC: ", components.Len())

    MxComp = snap.GetMxWcc(G)
    cnt_mxc_node = 0
    cnt_mxc_edge = 0
    for _ in MxComp.Nodes():
        cnt_mxc_node += 1
    for _ in MxComp.Edges():
        cnt_mxc_edge += 1
    print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ',
          cnt_mxc_edge)

    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    scores = []
    for id in PRankH:
        scores.append((PRankH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 PageRank scores: ", res)

    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    scores = []
    for id in NIdHubH:
        scores.append((NIdHubH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 hubs by HITS scores: ", res)
    scores = []
    for id in NIdAuthH:
        scores.append((NIdAuthH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 authorities by HITS scores: ", res)
Esempio n. 13
0
def computeAuthHubScore(G, NodeAttributes):
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    HubNodes = []
    for nodeId in NIdHubH:
        HubNodes.append((nodeId, NIdHubH[nodeId]))
        NodeAttributes[nodeId]['HubScore'] = NIdHubH[nodeId]

    HubNodes.sort(key=lambda x: x[1], reverse=True)

    AuthNodes = []
    for nodeId in NIdAuthH:
        AuthNodes.append((nodeId, NIdAuthH[nodeId]))
        NodeAttributes[nodeId]['AuthScore'] = NIdAuthH[nodeId]

    AuthNodes.sort(key=lambda x: x[1], reverse=True)

    #print AuthNodes[0], AuthNodes[-1]
    #print HubNodes[0], HubNodes[-1]

    minAuthNodes = AuthNodes[-1][1]
    maxAuthNodes = AuthNodes[0][1]
    minHubNodes = HubNodes[-1][1]
    maxHubNodes = HubNodes[0][1]

    for (node, hubScore) in HubNodes:
        normHubScore = (hubScore - minHubNodes) / (maxHubNodes - minHubNodes)
        NodeAttributes[node]['normHubScore'] = normHubScore

    for (node, authScore) in AuthNodes:
        normAuthScore = (authScore - minAuthNodes) / (maxAuthNodes -
                                                      minAuthNodes)
        NodeAttributes[node]['normAuthScore'] = normAuthScore

    #print NodeAttributes[1874]
    #print NodeAttributes[893]
    return NodeAttributes
Esempio n. 14
0
def partThree():
    data_dir_StackOverFlow = './data/stackoverflow-Java.txt'
    sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t')

    Components = snap.TCnComV()
    snap.GetWccs(sofG, Components)
    print('1. The number of weakly connected components in the network.: '+str(Components.Len()))

    MxWcc = snap.GetMxWcc(sofG)
    num_node = MxWcc.GetNodes()
    num_deg = MxWcc.GetEdges()
    print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node))

    PRankH = snap.TIntFltH()
    snap.GetPageRank(sofG, PRankH)
    cnt = 0
    print('3. ')
    for item in PRankH:
        cnt += 1
        if cnt > 3:
            break
        print(item, PRankH[item])

    print('4. ')
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(sofG, NIdHubH, NIdAuthH)
    HubDict = {}
    AuthDict = {}
    for item in NIdHubH:
        HubDict[item] = NIdHubH[item]
    a = zip(HubDict.values(), HubDict.keys())
    print(list(sorted(a, reverse=True))[:3])
    for item in NIdAuthH:
        AuthDict[item] = NIdAuthH[item]
    b = zip(AuthDict.values(), AuthDict.keys())
    print(list(sorted(b, reverse=True))[:3])
Esempio n. 15
0
def stackoverflow():
    g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
    components = snap.TCnComV()
    snap.GetWccs(g, components)
    print "Num connected comp = ", components.Len()
    mxwcc = snap.GetMxWcc(g)
    print "Num edges in largest = ", mxwcc.GetEdges()
    print "Num nodes in largest = ", mxwcc.GetNodes()
    rank = snap.TIntFltH()
    snap.GetPageRank(g, rank)
    rank.SortByDat(False)
    count = 0
    for node in rank:
        if count >= 3:
            break
        count += 1
        print "largest page rank score nodes = ", node, " (score = ", rank[node]

    hubs = snap.TIntFltH()
    auths = snap.TIntFltH()
    snap.GetHits(g, hubs, auths)
    
    hubs.SortByDat(False)
    count = 0
    for node in hubs:
        if count >= 3:
            break
        count += 1
        print "largest hub score nodes = ", node, " (score = ", hubs[node]

    auths.SortByDat(False)
    count = 0
    for node in auths:
        if count >= 3:
            break
        count += 1
        print "largest auth score nodes = ", node, " (score = ", auths[node]
Esempio n. 16
0
def model_Hits(G):
    print('*********Computes the Hubs and Authorities score of every node in Graph********')
    node = []
    node2 = []
    score_NIdHubH = []
    score_NIdAuthH = []
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)

    for item in NIdHubH:
        node.append(item)
        score_NIdHubH.append(NIdHubH[item])

    for item in NIdAuthH:
        node2.append(item)
        score_NIdAuthH.append(NIdAuthH[item])

    data = pd.DataFrame({'node': node, 'score_NIdHubH': score_NIdHubH, 'node2': node2, 'score_NIdAuthH': score_NIdAuthH})
    data = data.sort_values(by="score_NIdHubH", ascending=True)

    # # 写入
    data.to_csv('./data/picture/model_Hits.csv')
    print('model_Hits = ', data[-5:])
Esempio n. 17
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    results = {}
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    n_self_edges = snap.CntSelfEdges(graph)
    n_directed_edges, n_undirected_edges = snap.CntUniqDirEdges(
        graph), snap.CntUniqUndirEdges(graph)
    n_reciprocated_edges = snap.CntUniqBiDirEdges(graph)
    n_zero_out_nodes, n_zero_in_nodes = snap.CntOutDegNodes(
        graph, 0), snap.CntInDegNodes(graph, 0)
    max_node_in = graph.GetNI(snap.GetMxInDegNId(graph)).GetDeg()
    max_node_out = graph.GetNI(snap.GetMxOutDegNId(graph)).GetDeg()
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    results["a. Nodes"] = n_nodes
    results["b. Edges"] = n_edges
    results["c. Self-edges"] = n_self_edges
    results["d. Directed edges"] = n_directed_edges
    results["e. Undirected edges"] = n_undirected_edges
    results["f. Reciprocated edges"] = n_reciprocated_edges
    results["g. 0 out-degree nodes"] = n_zero_out_nodes
    results["h. 0 in-degree nodes"] = n_zero_in_nodes
    results["i. Maximum node out-degree"] = max_node_out
    results["j. Maximum node in-degree"] = max_node_in
    results["k. Weakly connected components"] = components.Len()
    results["l. Nodes, edges of largest WCC"] = (max_wcc.GetNodes(),
                                                 max_wcc.GetEdges())
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges".format(n_nodes, n_edges))
    print("{} Self-edges ".format(n_self_edges))
    print("{} Directed edges, {} Undirected edges".format(
        n_directed_edges, n_undirected_edges))
    print("{} Reciprocated edges".format(n_reciprocated_edges))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        n_zero_out_nodes, n_zero_in_nodes))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        max_node_in, max_node_out))
    print("###")
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())

    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            name_from_index(sorted_prankH, 0, mydict),
            name_from_index(sorted_prankH, 1, mydict),
            name_from_index(sorted_prankH, 2, mydict)))
        print("Top 3 hubs: {}, {}, {}".format(
            name_from_index(sorted_NIdHubH, 0, mydict),
            name_from_index(sorted_NIdHubH, 1, mydict),
            name_from_index(sorted_NIdHubH, 2, mydict)))
        print("Top 3 authorities: {}, {}, {}".format(
            name_from_index(sorted_NIdAuthH, 0, mydict),
            name_from_index(sorted_NIdAuthH, 1, mydict),
            name_from_index(sorted_NIdAuthH, 2, mydict)))
        results["m. Three top PageRank"] = (name_from_index(
            sorted_prankH, 0, mydict), name_from_index(
                sorted_prankH, 1,
                mydict), name_from_index(sorted_prankH, 2, mydict))
        results["n. Three top hubs"] = (name_from_index(
            sorted_NIdHubH, 0,
            mydict), name_from_index(sorted_NIdHubH, 1, mydict),
                                        name_from_index(
                                            sorted_NIdHubH, 2, mydict))
        results["o. Three top authorities"] = (name_from_index(
            sorted_NIdAuthH, 0,
            mydict), name_from_index(sorted_NIdAuthH, 1, mydict),
                                               name_from_index(
                                                   sorted_NIdAuthH, 2, mydict))
    return results
Esempio n. 18
0
def manage_graphs(out_degree, nodes, max_minutes):
    rnd = snap.TRnd(1, 0)
    graph = snap.GenSmallWorld(nodes, out_degree, 0.7, rnd)
    print(40 * "#")
    print(f"Starting Graph for #{nodes} Nodes.")

    # Save the graph in order to reload it after manipulation
    output_filename = f"temporary_graphs/{nodes}_ws_graph.graph"
    f_out = snap.TFOut(output_filename)
    graph.Save(f_out)
    f_out.Flush()

    # Highest rank Node
    max_degree_node = graph.GetNI(snap.GetMxDegNId(graph))
    print(f"Highest Degree Node ID#{max_degree_node.GetId()}"
          f" with Degree={max_degree_node.GetDeg()}")

    # Gets Hubs and Authorities of all the nodes
    hubs_per_node = snap.TIntFltH()
    auths_per_node = snap.TIntFltH()
    snap.GetHits(graph, hubs_per_node, auths_per_node)

    max_hub_node = graph.GetNI(
        max(hubs_per_node, key=lambda h: hubs_per_node[h]))
    print(f"Highest Hub Score Node ID#{max_hub_node.GetId()}"
          f" with Score={hubs_per_node[max_hub_node.GetId()]}")

    max_authority_node = graph.GetNI(
        max(auths_per_node, key=lambda a: auths_per_node[a]))
    print(f"Highest Authority Score Node ID#{max_authority_node.GetId()}"
          f" with Score={hubs_per_node[max_authority_node.GetId()]}")

    exceed = False
    # CNM Community Detector
    cnm_community = snap.TCnComV()
    cnm_thread = threading.Thread(target=snap.CommunityCNM,
                                  args=(graph, cnm_community))
    cnm_start_time = time.time()

    try:
        cnm_thread.start()
        cnm_thread.join(max_minutes)

    except MemoryError:
        exceed = True

    finally:
        cnm_stop_time = time.time()
        cnm_community_exec_time = cnm_stop_time - cnm_start_time
        exceed |= max_minutes <= cnm_community_exec_time

    # GN Community Detector
    if max_minutes > cnm_community_exec_time and not exceed:
        gn_community = snap.TCnComV()
        gn_thread = threading.Thread(target=snap.CommunityGirvanNewman,
                                     args=(graph, gn_community))
        gn_start_time = time.time()

        try:
            gn_thread.start()
            gn_thread.join(max_minutes - cnm_community_exec_time)

        except MemoryError:
            exceed = True

        finally:
            gn_stop_time = time.time()
            gn_community_exec_time = gn_stop_time - gn_start_time
            exceed |= gn_community_exec_time >= max_minutes - cnm_community_exec_time
    else:
        gn_community_exec_time = 0.00
    if not exceed:
        print(
            f"Execution Time for CNM Communities Detector is {round(cnm_community_exec_time, 4):.4f}"
        )
        print(
            f"Execution Time for GN Communities Detector is {round(gn_community_exec_time, 4):.4f}"
        )
    else:
        print(
            f"Graph with Nodes#{nodes_num} exceeded the valid calculation limits."
        )
    print(40 * "#")

    # load graph in it's initial State
    f_in = snap.TFIn(output_filename)
    graph = snap.TUNGraph.Load(f_in)

    return graph, cnm_community_exec_time, gn_community_exec_time, exceed
Esempio n. 19
0
print("The largest weakly connected component in the SO network"
      "has %s nodes and %s edges." % (maxWeaklyConnectedComponent.GetNodes(),
                                      maxWeaklyConnectedComponent.GetEdges()))

# 3.3
TOPN = 3
SOPageRanks = snap.TIntFltH()
snap.GetPageRank(SOGraph, SOPageRanks, 0.85, 1e-4, 1000)
sortedSOPageRanks = sortTIntFltH(SOPageRanks)
print("The node IDs of the top %s most central nodes in the network "
      "by PageRank scores are %s with scores %s respectively." %
      (TOPN, tuple(t[0] for t in sortedSOPageRanks[:TOPN]),
       tuple(t[1] for t in sortedSOPageRanks[:TOPN])))

# 3.4
TOPN = 3
hubsScores = snap.TIntFltH()
authScores = snap.TIntFltH()
snap.GetHits(SOGraph, hubsScores, authScores, 100)
sortedHubScores = sortTIntFltH(hubsScores)
sortedAuthScores = sortTIntFltH(authScores)
print("The node IDs of the top %s hubs in the network by HITS scores "
      "are %s with scores %s respectively." %
      (TOPN, tuple(t[0] for t in sortedHubScores[:TOPN]),
       tuple(t[1] for t in sortedHubScores[:TOPN])))
print
print("The node IDs of the top %s authorities in the network by HITS "
      "scores are %s with score %s respectively." %
      (TOPN, tuple(t[0] for t in sortedAuthScores[:TOPN]),
       tuple(t[1] for t in sortedAuthScores[:TOPN])))
Esempio n. 20
0
edges = questions.Join("t1.AcceptedAnswerId", posts, "PostId")
t.show("join", edges)

# Create haskell-specific Q&A graph
# >>> graph = posts.graph('Asker', 'Expert', directed = True)
edges.SetSrcCol("t1_t2.Asker")
edges.SetDstCol("t1.Expert")
graph = snap.ToGraph(edges, snap.aaFirst)
t.show("graph", graph)

# Compute Authority score
# >>> hits = graph.hits('Authority', 'Hub')
# note: the code below creates a table (Node name, Authority score) - the hub score is not used
HTHub = snap.TIntFltH()
HTAuth = snap.TIntFltH()
snap.GetHits(graph, HTHub, HTAuth)
authority = snap.TTable.New("authority", HTAuth, "Expert", AUTHORITY_ATTRIBUTE,
                            context, snap.TBool(False))
t.show("authority score", authority)

# b) Compute comment scores

# Load comments
# >>> comments = ringo.load('comments.tsv')
S = snap.Schema()
S.Add(snap.TStrTAttrPr("UserId", snap.atInt))
S.Add(snap.TStrTAttrPr("PostId", snap.atInt))
comments = snap.TTable.LoadSS("comments", S, commentsFile, context, '\t',
                              snap.TBool(False))
t.show("load", comments)
Esempio n. 21
0
        # Find Top-30 nodes of PageRank
        page_rank_scores = snap.TIntFltH()
        snap.GetPageRank(largest_graph, page_rank_scores)
        top_thirty_nodes_ids = sorted(page_rank_scores,
                                      key=lambda n: page_rank_scores[n],
                                      reverse=True)[:30]
        top_thirty_nodes_ids.sort()
        top_thirty_nodes_page_rank = [
            page_rank_scores[node_id] for node_id in top_thirty_nodes_ids
        ]

        # Gets Hubs and Authorities of all the nodes
        hubs_per_node = snap.TIntFltH()
        auths_per_node = snap.TIntFltH()
        snap.GetHits(largest_graph, hubs_per_node, auths_per_node)
        top_thirty_hubs = [
            hubs_per_node[node_id] for node_id in top_thirty_nodes_ids
        ]
        top_thirty_authorities = [
            auths_per_node[node_id] for node_id in top_thirty_nodes_ids
        ]
        #
        # Find betweenness
        nodes_betweenness = snap.TIntFltH()
        edge_betweenness = snap.TIntPrFltH()
        betweenness_centrality = snap.GetBetweennessCentr(
            largest_graph, nodes_betweenness, edge_betweenness, 1.0)
        top_thirty_betweenness = [
            nodes_betweenness[node_id] for node_id in top_thirty_nodes_ids
        ]
def get_HITS_scores(G, n):
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    return NIdHubH[n], NIdAuthH[n]
Esempio n. 23
0
rankscoremap = snap.TIntFltH()
snap.GetPageRank(sof_g, rankscoremap)
maplen = len(rankscoremap)
rankscoremap.SortByDat()
cnt = 0
print("If you use PageRank score, then")
for item in rankscoremap:
    if cnt >= maplen - 3:
        print("\tTop " + str(maplen - cnt) + " node is " + str(item) +
              " with score " + str(rankscoremap[item]))
    cnt = cnt + 1

hubscore = snap.TIntFltH()
authscore = snap.TIntFltH()
snap.GetHits(sof_g, hubscore, authscore)
cnt = 0
hubscore.SortByDat()
authscore.SortByDat()
print("If you use HITS score, then")
for item in authscore:
    if cnt >= maplen - 3:
        print("\tTop " + str(maplen - cnt) + " node is " + str(item) +
              " with authority " + str(authscore[item]))
    cnt = cnt + 1

print('\t' + '-' * 45)

cnt = 0
for item in hubscore:
    if cnt >= maplen - 3:
Esempio n. 24
0
def main():

    parser = ArgumentParser("node_heu",formatter_class=ArgumentDefaultsHelpFormatter,conflict_handler='resolve')

    # Required arguments
    parser.add_argument("--network", type=str, required=True, help='The path and name of the .mat file containing the adjacency matrix and node labels of the input network')
    parser.add_argument("--edgelist", type=str, required=True, help='The path and name of the edgelist file with no weights containing the edgelist of the input network')
    parser.add_argument("--dataset", type=str, required=True, help='The name of your dataset (used for output)')

    # Optional arguments
    parser.add_argument("--adj_matrix_name", default='network', help='The name of the adjacency matrix inside the .mat file')
    parser.add_argument("--label_matrix_name", default='group', help='The name of the labels matrix inside the .mat file')
    args = parser.parse_args()

    print (args)

    mat, A, graph, labels_matrix, labels_count, indices = load_graph(args.network, args.adj_matrix_name, args.label_matrix_name)
    
    s_time = time.time()

    # Load edgelist as undirected graph in SNAP
    G = snap.LoadEdgeList(snap.PUNGraph, args.edgelist)
    print ("Loading graph in SNAP ... {}".format(str(args.edgelist)))

    # Load edgelist for networkx
    G_NETX = nx.read_edgelist(args.edgelist)
    print ("Loading graph in NetworkX .... {}".format(str(args.edgelist)))

    # Get Average Neighbor Degreeh from NetworkX (only time NetworkX is used)
    AvgNeighDe = nx.average_neighbor_degree(G_NETX)

    # Calculate Page Rank
    p_time = time.time()
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    print ("Finished in Page rank in {}".format(str(time.time()-p_time)))

    # Calculate Hub and Authrity Scores
    h_time = time.time()
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    print ("Finished in Hub and Auth Scores in {}".format(str(time.time()-h_time)))

    count = 0
    node_data = []
    fl_100 = time.time()
    print ("Num of nodes: {}".format(len(PRankH)))
    print ("Num of nodes with labels: {}".format(len(indices)))
    print ("Collecting other features for each node ...")
    for n in G.Nodes():
        nid = n.GetId()
        if nid in indices:
            node_data.append((nid, n.GetInDeg(), PRankH[n.GetId()], snap.GetNodeClustCf(G, nid), NIdHubH[n.GetId()], NIdAuthH[n.GetId()], AvgNeighDe[str(nid)], snap.GetNodeEcc(G, nid)))
            count = count + 1
            if count % 1000 == 0:
                print ("Processed {} nodes".format(str(count)))
                print (time.time() - fl_100)
                fl_100 = time.time()
                nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc'))
                nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False)
    		print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv")))

    nhdf = pd.DataFrame(node_data, columns=('NodeId', 'Degree', 'PageRankScore', 'NodeClustCf', 'HubScore', 'AuthScore', 'AverageNeighborDegree', 'NodeEcc'))
    nhdf.to_csv((args.network.replace(".mat", "") + "_node_heuristic_features.csv"), index=False)
    print ("File saved at {}".format((args.network.replace(".mat", "") + "_node_heuristic_features.csv")))


    print ("Finished in {}".format(str(time.time()-s_time)))
#    print(item, PRankH[item])
node_list = []
prank_list = []
for item in PRankH:
    node_list.append(item)
    prank_list.append(PRankH[item])
results = sorted(zip(prank_list, node_list), reverse=True)[:3]
print("Top 3 Central nodes and their Page Ranks are")
for result in results:
    a, b = result
    print(b, a)

#Question 4 - The top 3 hubs and top 3 authorities in the network by HITS scores.
NIdHubH = snap.TIntFltH()
NIdAuthH = snap.TIntFltH()
snap.GetHits(stackoverflow_graph, NIdHubH, NIdAuthH)
tmp_lst1 = []
tmp_lst2 = []
for item in NIdHubH:
    tmp_lst1.append(item)
    tmp_lst2.append(NIdHubH[item])
hub_results = sorted(zip(tmp_lst2, tmp_lst1), reverse=True)[:3]

tmp_lst3 = []
tmp_lst4 = []
for item in NIdAuthH:
    tmp_lst3.append(item)
    tmp_lst4.append(NIdAuthH[item])
auth_results = sorted(zip(tmp_lst4, tmp_lst3), reverse=True)[:3]

print("Top 3 Hubs and their hit scores are")
Esempio n. 26
0
def getAttribute(filename):
    UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1)
    UGraph.Dump()

    attributes = pd.DataFrame(np.zeros(shape=(UGraph.GetNodes(), 12)), 
                              columns=['Graph', 'Id', 'Degree', 'DegreeCentrality', 'NodeBetweennessCentrality', 
                                       'ClosenessCentrality', 'FarnessCentrality', 'PageRank', 'HubsScore', 
                                       'AuthoritiesScore', 'NodeEccentricity', 'EigenvectorCentrality'])
    
    attributes['Graph'] = [filename] * UGraph.GetNodes()
    
    # Degree
    id = []
    degree = []
    OutDegV = snap.TIntPrV()
    snap.GetNodeOutDegV(UGraph, OutDegV)
    for item in OutDegV:
        id.append(item.GetVal1())
        degree.append(item.GetVal2())
    attributes['Id'] = id
    attributes['Degree'] = degree

    # Degree, Closeness, Farness Centrality, Node Eccentricity
    degCentr = []
    cloCentr = []
    farCentr = []
    nodeEcc = []
    for NI in UGraph.Nodes():
        degCentr.append(snap.GetDegreeCentr(UGraph, NI.GetId()))
        cloCentr.append(snap.GetClosenessCentr(UGraph, NI.GetId()))
        farCentr.append(snap.GetFarnessCentr(UGraph, NI.GetId()))
        nodeEcc.append(snap.GetNodeEcc(UGraph, NI.GetId(), False))
    attributes['DegreeCentrality'] = degCentr
    attributes['ClosenessCentrality'] = cloCentr
    attributes['FarnessCentrality'] = farCentr
    attributes['NodeEccentricity'] = nodeEcc

    # Betweenness Centrality
    betCentr = []
    Nodes = snap.TIntFltH()
    Edges = snap.TIntPrFltH()
    snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
    for node in Nodes:
        betCentr.append(Nodes[node])
    attributes['NodeBetweennessCentrality'] = betCentr

    # PageRank
    pgRank = []
    PRankH = snap.TIntFltH()
    snap.GetPageRank(UGraph, PRankH)
    for item in PRankH:
        pgRank.append(PRankH[item])
    attributes['PageRank'] = pgRank

    # Hubs, Authorities score 
    hubs = []
    auth = []
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(UGraph, NIdHubH, NIdAuthH)
    for item in NIdHubH:
        hubs.append(NIdHubH[item])
    for item in NIdAuthH:
        auth.append(NIdAuthH[item])
    attributes['HubsScore'] = hubs
    attributes['AuthoritiesScore'] = auth

    # Eigenvector Centrality
    eigenCentr = []
    NIdEigenH = snap.TIntFltH()
    snap.GetEigenVectorCentr(UGraph, NIdEigenH)
    for item in NIdEigenH:
        eigenCentr.append(NIdEigenH[item])
    attributes['EigenvectorCentrality'] = eigenCentr

    return attributes
print("Number of MxWcc Nodes:", MxWcc.GetNodes())

# IDs of the top 3 most central nodes in the network by PagePank scores
PRankH = snap.TIntFlt64H()
snap.GetPageRank(data, PRankH)
PRankH.SortByDat(False)

i = 0
itr = PRankH.BegI()
print("The top 3 most central nodes in the network by PagePank scores:")
while i < 3:
    print("Node:", itr.GetKey())
    itr.Next()
    i += 1
print("")

# IDs of the top 3 hubs and top 3 authorities in the network by HITS scores.
NIdHubH = snap.TIntFlt64H()
NIdAuthH = snap.TIntFlt64H()
snap.GetHits(data, NIdHubH, NIdAuthH)
NIdHubH.SortByDat(False)

i = 0
itr = NIdHubH.BegI()
print("The top 3 hubs in the network by HITS score:")
while i < 3:
    print("Node:", itr.GetKey())
    itr.Next()
    i += 1

Esempio n. 28
0
print max_index[1]

max = 0
for ele in PRankH:
    if PRankH[ele] > max and ele != max_index[0] and ele != max_index[1]:
        max = PRankH[ele]
        max_index[2] = ele

print "Top 3 most central nodes by PageRank Score", max_index[2]

# 4)
g4 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
hub = snap.TIntFltH()
aut = snap.TIntFltH()
snap.GetHits(g4, hub, aut)

# Hubs
max = 0
max_index_hub = [0, 0, 0]
max_index_aut = [0, 0, 0]

for ele in hub:
    if hub[ele] > max:
        max = hub[ele]
        max_index_hub[0] = ele

max = 0
for ele in hub:
    if hub[ele] > max and ele != max_index_hub[0]:
        max = hub[ele]
Esempio n. 29
0
snap.GetPageRank(G1, PRankH)
count = 0

#sort the rank hash by data in descending order
PRankH.SortByDat(False)
iter = PRankH.BegI()
print "3. The top 3 most central nodes in the network by PagePank scores"
while (count < 3):
    print(iter.GetKey(), iter.GetDat())
    iter = iter.Next()
    count += 1

#4 The top 3 hubs and top 3 authorities in the network by HITS scores
NIdHubH = snap.TIntFltH()
NIdAuthH = snap.TIntFltH()
snap.GetHits(G1, NIdHubH, NIdAuthH)

print "4. The top 3 hubs and top 3 authorities in the network by HITS scores"
count = 0

#sort in decending order
NIdHubH.SortByDat(False)
iter = NIdHubH.BegI()
while (count < 3):
    print 'hub :', iter.GetKey(), iter.GetDat()
    iter = iter.Next()
    count += 1

count = 0
#sort in decending order
NIdAuthH.SortByDat(False)
def compute_graph_statistics(graph_path, overwrite, compute_betweenness=False):
    graph_abs_path = os.path.abspath(graph_path)
    graph_name = os.path.basename(graph_abs_path).replace(".graph", "")
    fin = snap.TFIn(graph_abs_path)
    graph = snap.TNEANet.Load(fin)

    # rebuild the id => pkg dictionary
    id_pkg_dict = {}
    for node in graph.Nodes():
        id_pkg_dict[node.GetId()] = graph.GetStrAttrDatN(node.GetId(), "pkg")
    directory = os.path.dirname(os.path.abspath(graph_path))
    json_path = os.path.join(directory, graph_name + "_statistics.json")
    if os.path.isfile(json_path):
        with open(json_path, "r") as f:
            statistics = json.load(f, object_pairs_hook=OrderedDict)
    else:
        statistics = OrderedDict()

    # snap.py doesn't suport absolute paths for some operations. Let's cd to the directory
    os.chdir(directory)

    # general statistics
    output = os.path.join(directory, graph_name + "_main_statistics.txt")
    if not os.path.isfile(output) or overwrite:
        print("{0} Computing general statistics".format(datetime.datetime.now()))
        snap.PrintInfo(graph, "Play Store Graph -- main statistics", output, False)

    # info about the nodes with the max in degree
    if "max_in_degree" not in statistics or overwrite:
        print("{0} Computing max indegree".format(datetime.datetime.now()))
        max_in_deg_id = snap.GetMxInDegNId(graph)
        iterator = graph.GetNI(max_in_deg_id)
        max_in_deg = iterator.GetInDeg()
        max_in_deg_pkg = graph.GetStrAttrDatN(max_in_deg_id, "pkg")
        statistics["max_in_degree"] = max_in_deg
        statistics["max_in_degree_id"] = max_in_deg_id
        statistics["max_in_degree_pkg"] = max_in_deg_pkg

    # info about the nodes with the max out degree
    if "max_out_degree" not in statistics or overwrite:
        print("{0} Computing max outdegree".format(datetime.datetime.now()))
        max_out_deg_id = snap.GetMxOutDegNId(graph)
        iterator = graph.GetNI(max_out_deg_id)
        max_out_deg = iterator.GetOutDeg()
        max_out_deg_pkg = graph.GetStrAttrDatN(max_out_deg_id, "pkg")
        statistics["max_out_degree"] = max_out_deg
        statistics["max_out_degree_id"] = max_out_deg_id
        statistics["max_out_degree_pkg"] = max_out_deg_pkg

    # pagerank statistics
    output = graph_name + "_topNpagerank.eps"
    if not os.path.isfile(output) or "top_n_pagerank" not in statistics or overwrite:
        print("{0} Computing top 20 nodes with highest pagerank".format(datetime.datetime.now()))
        data_file = graph_name + "_pageranks"
        prank_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file) or overwrite:
            # Damping Factor: 0.85, Convergence difference: 1e-4, MaxIter: 100
            snap.GetPageRank(graph, prank_hashtable, 0.85)
            fout = snap.TFOut(data_file)
            prank_hashtable.Save(fout)
        else:
            fin = snap.TFIn(data_file)
            prank_hashtable.Load(fin)

        top_n = get_top_nodes_from_hashtable(prank_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_pagerank" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_pagerank"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 pagerank nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(prank_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "PageRank",
                                  "Play Store Graph - top 20 PageRank nodes", output, "autumn_r")

    # betweeness statistics
    output = graph_name + "_topNbetweenness.eps"
    if compute_betweenness and (not os.path.isfile(output) or "betweenness" not in statistics or overwrite):
        print("{0} Computing top 20 nodes with highest betweenness".format(datetime.datetime.now()))
        data_file1 = graph_name + "_node_betweenness"
        data_file2 = graph_name + "_edge_betweenness"
        node_betwenness_hashtable = snap.TIntFltH()
        edge_betwenness_hashtable = snap.TIntPrFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            snap.GetBetweennessCentr(graph, node_betwenness_hashtable, edge_betwenness_hashtable, 0.85, True)
            fout = snap.TFOut(data_file1)
            node_betwenness_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            edge_betwenness_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            node_betwenness_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            edge_betwenness_hashtable.Load(fin)  # unused, as now

        top_n = get_top_nodes_from_hashtable(node_betwenness_hashtable)
        top_n.sort(key=itemgetter(1))
        if "top_n_betweenness" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_betweenness"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output) or overwrite:
            # let's build a subgraph induced on the top 20 betweenness nodes
            subgraph = get_subgraph(graph, [x[0] for x in top_n])
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(node_betwenness_hashtable, [x[0] for x in top_n])
            plot_subgraph_colored(subgraph, labels_dict, values, "Betweenness",
                                  "Play Store Graph - top 20 Betweenness nodes", output)

    # HITS statistics
    output_hub = graph_name + "_topNhitshubs.eps"
    output_auth = graph_name + "_topNhitsauth.eps"
    if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or "top_n_hits_hubs" not in statistics \
            or "top_n_hits_authorities" not in statistics or overwrite:
        print("{0} Computing top 20 HITS hubs and auths".format(datetime.datetime.now()))
        data_file1 = graph_name + "_hits_hubs"
        data_file2 = graph_name + "_hits_auth"
        hubs_hashtable = snap.TIntFltH()
        auth_hashtable = snap.TIntFltH()
        if not os.path.isfile(data_file1) or not os.path.isfile(data_file2) or overwrite:
            # MaxIter = 20
            snap.GetHits(graph, hubs_hashtable, auth_hashtable, 20)
            fout = snap.TFOut(data_file1)
            hubs_hashtable.Save(fout)
            fout = snap.TFOut(data_file2)
            auth_hashtable.Save(fout)

        else:
            fin = snap.TFIn(data_file1)
            hubs_hashtable.Load(fin)
            fin = snap.TFIn(data_file2)
            auth_hashtable.Load(fin)

        top_n_hubs = get_top_nodes_from_hashtable(hubs_hashtable)
        top_n_hubs.sort(key=itemgetter(1))
        if "top_n_hits_hubs" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_hubs:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_hubs"] = list(reversed(top_n_labeled))

        top_n_auth = get_top_nodes_from_hashtable(auth_hashtable)
        top_n_auth.sort(key=itemgetter(1))
        if "top_n_hits_authorities" not in statistics or overwrite:
            top_n_labeled = []
            for pair in top_n_auth:
                top_n_labeled.append((id_pkg_dict[pair[0]], pair[1]))
            statistics["top_n_hits_authorities"] = list(reversed(top_n_labeled))

        if not os.path.isfile(output_hub) or not os.path.isfile(output_auth) or overwrite:
            nodes_subset = set()
            for pair in top_n_hubs:
                nodes_subset.add(pair[0])
            for pair in top_n_auth:
                nodes_subset.add(pair[0])

            # let's build a subgraph induced on the top N HITS auths and hubs nodes
            subgraph = get_subgraph(graph, nodes_subset)
            labels_dict = get_labels_subset(id_pkg_dict, subgraph)
            values = snap_hashtable_to_dict(hubs_hashtable, nodes_subset)
            values2 = snap_hashtable_to_dict(auth_hashtable, nodes_subset)
            plot_subgraph_colored(subgraph, labels_dict, values, "HITS - Hub Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_hub, "bwr")
            plot_subgraph_colored(subgraph, labels_dict, values2, "HITS - Authority Index",
                                  "Play Store Graph - top 20 HITS hubs + top 20 HITS authorities", output_auth,
                                  "bwr_r")

    # indegree histogram
    output = graph_name + "_indegree"
    if not os.path.isfile("inDeg." + output + ".plt") or not os.path.isfile(
                            "inDeg." + output + ".tab") or not os.path.isfile("inDeg." + output + ".png") or overwrite:
        print("{0} Computing indegree distribution".format(datetime.datetime.now()))
        snap.PlotInDegDistr(graph, output, "Play Store Graph - in-degree Distribution")

    # outdegree histogram
    output = graph_name + "_outdegree"
    if not os.path.isfile("outDeg." + output + ".plt") or not os.path.isfile(
                            "outDeg." + output + ".tab") or not os.path.isfile(
                        "outDeg." + output + ".png") or overwrite:
        print("{0} Computing outdegree distribution".format(datetime.datetime.now()))
        snap.PlotOutDegDistr(graph, output, "Play Store Graph - out-degree Distribution")

    # strongly connected components print
    output = graph_name + "_scc"
    if not os.path.isfile("scc." + output + ".plt") or not os.path.isfile(
                            "scc." + output + ".tab") or not os.path.isfile("scc." + output + ".png") or overwrite:
        print("{0} Computing scc distribution".format(datetime.datetime.now()))
        snap.PlotSccDistr(graph, output, "Play Store Graph - strongly connected components distribution")

    # weakly connected components print
    output = graph_name + "_wcc"
    if not os.path.isfile("wcc." + output + ".plt") or not os.path.isfile(
                            "wcc." + output + ".tab") or not os.path.isfile("wcc." + output + ".png") or overwrite:
        print("{0} Computing wcc distribution".format(datetime.datetime.now()))
        snap.PlotWccDistr(graph, output, "Play Store Graph - weakly connected components distribution")

    # clustering coefficient distribution
    output = graph_name + "_cf"
    if not os.path.isfile("ccf." + output + ".plt") or not os.path.isfile(
                            "ccf." + output + ".tab") or not os.path.isfile("ccf." + output + ".png") or overwrite:
        print("{0} Computing cf distribution".format(datetime.datetime.now()))
        snap.PlotClustCf(graph, output, "Play Store Graph - clustering coefficient distribution")

    # shortest path distribution
    output = graph_name + "_hops"
    if not os.path.isfile("hop." + output + ".plt") or not os.path.isfile(
                            "hop." + output + ".tab") or not os.path.isfile("hop." + output + ".png") or overwrite:
        print("{0} Computing shortest path distribution".format(datetime.datetime.now()))
        snap.PlotHops(graph, output, "Play Store Graph - Cumulative Shortest Paths (hops) distribution", True)

    # k-core edges distribution
    output = graph_name + "_kcore_edges"
    if not os.path.isfile("coreEdges." + output + ".plt") or not os.path.isfile(
                            "coreEdges." + output + ".tab") or not os.path.isfile(
                        "coreEdges." + output + ".png") or overwrite:
        print("{0} Computing k-core edges distribution".format(datetime.datetime.now()))
        snap.PlotKCoreEdges(graph, output, "Play Store Graph - K-Core edges distribution")

    # k-core nodes distribution
    output = graph_name + "_kcore_nodes"
    if not os.path.isfile("coreNodes." + output + ".plt") or not os.path.isfile(
                            "coreNodes." + output + ".tab") or not os.path.isfile(
                        "coreNodes." + output + ".png") or overwrite:
        print("{0} Computing k-core nodes distribution".format(datetime.datetime.now()))
        snap.PlotKCoreNodes(graph, output, "Play Store Graph - K-Core nodes distribution")

    with open(json_path, 'w') as outfile:
        json.dump(statistics, outfile, indent=2)