def pageRank_components(g):
    print 'executing pagerank components ---- getting components for page rank'
    Components = snap.TCnComV()
    snap.GetWccs(g, Components)
    f = open('component_pr.txt', 'w')
    cgraphs = []
    for com in Components:
        v = snap.TIntV()
        for ni in com:
            v.Add(ni)
        cgraphs.append(snap.GetSubGraph_PNGraph(g, v))

    print 'components retrived for pagerank'
    f.write('Total components:' + str(len(cgraphs)) + '\n')
    for graph in cgraphs:
        if graph.GetNodes() == 2:
            continue
        sprank = snap.TIntFltH()
        snap.GetPageRank_PNGraph(graph, sprank)
        sprank.SortByDat(False)
        f.write(
            str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) +
            '\n')
    f.close()
    print 'finished writing pagerank components values'
Beispiel #2
0
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N):
    """
    Returns pairwise-stable network on N nodes. 
    
    D, Pi_minus, Pi_exo = outputs of gen_D().
    V_exo = 'exogenous' part of joint surplus (output of gen_V_exo).
    theta2 = transitivity parameter (theta[2]).
    """
    G = snap.GenRndGnm(snap.PUNGraph, N, 0)  # initialize empty graph
    Components = snap.TCnComV()
    snap.GetWccs(D, Components)  # collects components of D
    NIdV = snap.TIntV()  # initialize vector
    for C in Components:
        if C.Len() > 1:
            NIdV.Clr()
            for i in C:
                NIdV.Add(i)
            tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2)
            for edge in tempnet.Edges():
                G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    # add robust links
    for edge in Pi_exo.Edges():
        G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    return G
Beispiel #3
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
Beispiel #4
0
def computeWeaklyConnectedComponents(graph, outFile):
    logger.info("Computing Weakly Connected Components")
    fw_cc = open(outFile, 'w')
    Components = snap.TCnComV()
    snap.GetWccs(graph, Components)
    for CnCom in Components:
        for item in CnCom:
            fw_cc.write(str(item) + "\n")
        fw_cc.write("\n")
    logger.info("Weakly Connected Components Computed!")
    logger.info("Weakly Connected Components Exported to " + outFile)
def processNetwork(Graph, id_to_groups):
    with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f:
        f.write("RELATED GROUPS GRAPH:\n")
        f.write('Edges: %d\n' % Graph.GetEdges())
        f.write('Nodes: %d\n\n' % Graph.GetNodes())

        MxWcc = snap.GetMxWcc(Graph)
        f.write("MAX WCC:\n")
        f.write('Edges: %f ' % MxWcc.GetEdges())
        f.write('Nodes: %f \n' % MxWcc.GetNodes())
        f.write('Node List: ')
        for node in MxWcc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxWcc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL WCCs:")
        Components = snap.TCnComV()
        snap.GetWccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nWcc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        MxScc = snap.GetMxScc(Graph)
        f.write("\n\nMAX SCC:\n")
        f.write('Edges: %f ' % MxScc.GetEdges())
        f.write('Nodes: %f \n' % MxScc.GetNodes())
        f.write('Node List: ')
        for node in MxScc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxScc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL SCCs:")
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nScc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        f.write('\n\nCLUSTERING AND COMMUNITIES:\n')
        f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1))
        f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1))
        Nodes = snap.TIntV()
        for node in Graph.Nodes():
            Nodes.Add(node.GetId())
        f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
Beispiel #6
0
    def wccs(self, returnNodes=True):
        """
        Returns a list of sets of nodes, or just the IDs if returnNodes is false (note that getting the nodes
        themselves adds overhead)
        """
        wccs = snap.TCnComV()
        wccList = []

        snap.GetWccs(self.rawGraph, wccs)

        for wcc in wccs:
            wccList.append(SnapUtil.rawComponentToNodeSet(wcc, self, returnNodes))
        wccList.sort(key=lambda x: len(x),reverse=True)
        return wccList
Beispiel #7
0
def is_uniquely_connected(graph):
    def is_unique(components):
        return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1

    # First identify if there are strongly connected components in the graph
    s_components = snap.TCnComV()
    snap.GetSccs(graph, s_components)
    unique = is_unique(s_components)

    # if there is unique strongly connected component then we don't need to search
    # for the weakly because the graph is connected, otherwise implement the same search
    # on the weakly components.
    if not is_unique:
        w_components = snap.TCnComV()
        snap.GetWccs(graph, w_components)
        unique = is_unique(w_components)

    return unique
Beispiel #8
0
def main():
    network = snap.LoadEdgeList(
        snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1)
    Components = snap.TCnComV()
    snap.GetWccs(network, Components)
    print("The number of weakly connected components is %d" % Components.Len())
    MxWcc = snap.GetMxWcc(network)
    print(
        "The number of edges is %d and the number of nodes is %d in the largest weakly connected component."
        % (MxWcc.GetNodes(), MxWcc.GetEdges()))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(network, PRankH)
    PRankH.SortByDat(False)
    num = 0
    print(
        "IDs of the top 3 most central nodes in the network by PagePank scores. "
    )
    for item in PRankH:
        print(item, PRankH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(network, NIdHubH, NIdAuthH)
    NIdHubH.SortByDat(False)
    print("IDs of the top 3 hubs in the network by HITS scores. ")
    for item in NIdHubH:
        print(item, NIdHubH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdAuthH.SortByDat(False)
    print("IDs of top 3 authorities in the network by HITS scores. ")
    for item in NIdAuthH:
        print(item, NIdAuthH[item])
        num += 1
        if num == 3:
            num = 0
            break
Beispiel #9
0
def q3():
    G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)

    components = snap.TCnComV()
    snap.GetWccs(G, components)
    print("Number of WCC: ", components.Len())

    MxComp = snap.GetMxWcc(G)
    cnt_mxc_node = 0
    cnt_mxc_edge = 0
    for _ in MxComp.Nodes():
        cnt_mxc_node += 1
    for _ in MxComp.Edges():
        cnt_mxc_edge += 1
    print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ',
          cnt_mxc_edge)

    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    scores = []
    for id in PRankH:
        scores.append((PRankH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 PageRank scores: ", res)

    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    scores = []
    for id in NIdHubH:
        scores.append((NIdHubH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 hubs by HITS scores: ", res)
    scores = []
    for id in NIdAuthH:
        scores.append((NIdAuthH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 authorities by HITS scores: ", res)
Beispiel #10
0
def stackoverflow():
    g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
    components = snap.TCnComV()
    snap.GetWccs(g, components)
    print "Num connected comp = ", components.Len()
    mxwcc = snap.GetMxWcc(g)
    print "Num edges in largest = ", mxwcc.GetEdges()
    print "Num nodes in largest = ", mxwcc.GetNodes()
    rank = snap.TIntFltH()
    snap.GetPageRank(g, rank)
    rank.SortByDat(False)
    count = 0
    for node in rank:
        if count >= 3:
            break
        count += 1
        print "largest page rank score nodes = ", node, " (score = ", rank[node]

    hubs = snap.TIntFltH()
    auths = snap.TIntFltH()
    snap.GetHits(g, hubs, auths)
    
    hubs.SortByDat(False)
    count = 0
    for node in hubs:
        if count >= 3:
            break
        count += 1
        print "largest hub score nodes = ", node, " (score = ", hubs[node]

    auths.SortByDat(False)
    count = 0
    for node in auths:
        if count >= 3:
            break
        count += 1
        print "largest auth score nodes = ", node, " (score = ", auths[node]
Beispiel #11
0
def partThree():
    data_dir_StackOverFlow = './data/stackoverflow-Java.txt'
    sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t')

    Components = snap.TCnComV()
    snap.GetWccs(sofG, Components)
    print('1. The number of weakly connected components in the network.: '+str(Components.Len()))

    MxWcc = snap.GetMxWcc(sofG)
    num_node = MxWcc.GetNodes()
    num_deg = MxWcc.GetEdges()
    print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node))

    PRankH = snap.TIntFltH()
    snap.GetPageRank(sofG, PRankH)
    cnt = 0
    print('3. ')
    for item in PRankH:
        cnt += 1
        if cnt > 3:
            break
        print(item, PRankH[item])

    print('4. ')
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(sofG, NIdHubH, NIdAuthH)
    HubDict = {}
    AuthDict = {}
    for item in NIdHubH:
        HubDict[item] = NIdHubH[item]
    a = zip(HubDict.values(), HubDict.keys())
    print(list(sorted(a, reverse=True))[:3])
    for item in NIdAuthH:
        AuthDict[item] = NIdAuthH[item]
    b = zip(AuthDict.values(), AuthDict.keys())
    print(list(sorted(b, reverse=True))[:3])
Beispiel #12
0
# get the weakly connected component counts
WccSzCnt = snap.TIntPr64V()
snap.GetWccSzCnt(G, WccSzCnt)
#print (WccSzCnt[0],WccSzCnt[0].Val1,WccSzCnt[0].Val2)
for i in range(0, WccSzCnt.Len()):
    print("WccSzCnt[%d] = (%d, %d)" %
          (i, WccSzCnt[i].Val1.Val, WccSzCnt[i].Val2.Val))

# return nodes in the same weakly connected component as node 1
CnCom = snap.TInt64V()
snap.GetNodeWcc(G, 1, CnCom)
print("CnCom.Len() = %d" % (CnCom.Len()))

# get nodes in weakly connected components
WCnComV = snap.TCnComV()
snap.GetWccs(G, WCnComV)
for i in range(0, WCnComV.Len()):
    print("WCnComV[%d].Len() = %d" % (i, WCnComV[i].Len()))
    for j in range(0, WCnComV[i].Len()):
        print("WCnComV[%d][%d] = %d" % (i, j, WCnComV[i][j]))

# get the size of the maximum weakly connected component
MxWccSz = snap.GetMxWccSz(G)
print("MxWccSz = %.5f" % (MxWccSz))

# get the graph with the largest weakly connected component
GMx = snap.GetMxWcc(G)
print("GMx: GetNodes() = %d, GetEdges() = %d" %
      (GMx.GetNodes(), GMx.GetEdges()))

# get strongly connected components
Beispiel #13
0
def net_structure(dataset_dir, output_dir, net, IsDir, weight):
    print(
        "\n######################################################################\n"
    )
    if os.path.isfile(str(output_dir) + str(net) + "_connected_comp.json"):
        print("Arquivo já existe: " + str(output_dir) + str(net) +
              "_connected_comp.json")
    else:

        print("Componentes conectados - " + str(dataset_dir))

        cc = []  # Média do tamanho dos componentes conectados por rede-ego
        cc_normal = [
        ]  # Média (normalizada pelo número de vértices do grafo) do tamanho dos componentes conectados por rede-ego
        n_cc = []  # Média do número de componentes conectados por rede-ego
        n = []  # vetor com número de vértices para cada rede-ego
        e = []  # vetor com número de arestas para cada rede-ego
        i = 0

        for file in os.listdir(dataset_dir):

            i += 1
            print(
                str(output_dir) + str(net) + "/" + str(file) +
                " - Calculando propriedades para o ego " + str(i) + ": " +
                str(file))
            if IsDir is True:
                G = snap.LoadEdgeList(
                    snap.PNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')
            else:
                G = snap.LoadEdgeList(
                    snap.PUNGraph, dataset_dir + file, 0, 1
                )  # load from a text file - pode exigir um separador.: snap.LoadEdgeList(snap.PNGraph, file, 0, 1, '\t')


#			G.Dump()
#			time.sleep(5)

#####################################################################################

            n.append(G.GetNodes())  # Numero de vertices
            e.append(G.GetEdges())  # Numero de arestas
            n_nodes = G.GetNodes()
            n_edges = G.GetEdges()

            #####################################################################################
            if n_edges == 0:
                a = 0
                cc.append(a)
                cc_normal.append(a)
                n_cc.append(a)
                print("Nenhuma aresta encontrada para a rede-ego " + str(i) +
                      " - (" + str(file))
            else:
                Components = snap.TCnComV()
                snap.GetWccs(G, Components)
                _cc = []
                _cc_normal = []
                _n_cc = 0
                for CnCom in Components:
                    _cc.append(CnCom.Len())
                    b = float(CnCom.Len()) / float(n_nodes)
                    _cc_normal.append(b)
                    _n_cc += 1
                result = calc.calcular(_cc)
                cc.append(result['media'])

                result_normal = calc.calcular(_cc_normal)
                cc_normal.append(result_normal['media'])

                n_cc.append(_n_cc)
                print("Número de componentes conectados para o ego " +
                      str(i) + " (" + str(file) + "): " + str(_n_cc))
                print(
                    "Média do tamanho dos componentes conectados para o ego "
                    + str(i) + " (" + str(file) + "): " + str(result['media']))
                print(
                    "Média (normalizada) do tamanho dos componentes conectados para o ego "
                    + str(i) + " (" + str(file) + "): " +
                    str(result_normal['media']))
                print

        N_CC = calc.calcular_full(n_cc)
        CC = calc.calcular_full(cc)
        CC_NORMAL = calc.calcular_full(cc_normal)

        overview = {}
        overview['Len_ConnectedComponents'] = CC
        overview['Len_ConnectedComponents_Normal'] = CC_NORMAL
        overview['N_ConnectedComponents'] = N_CC

        with open(str(output_dir) + str(net) + "_connected_comp.json",
                  'w') as f:
            f.write(json.dumps(overview))

        with open(str(output_dir) + str(net) + "_connected_comp.txt",
                  'w') as f:
            f.write(
                "\n######################################################################\n"
            )
            f.write(
                "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao']))
            f.write(
                "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CC['media'], CC['variancia'], CC['desvio_padrao']))
            f.write(
                "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
                % (CC_NORMAL['media'], CC_NORMAL['variancia'],
                   CC_NORMAL['desvio_padrao']))
            f.write(
                "\n######################################################################\n"
            )

        print(
            "\n######################################################################\n"
        )
        print(
            "Number_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (N_CC['media'], N_CC['variancia'], N_CC['desvio_padrao']))
        print(
            "Length_Connected_Comp: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CC['media'], CC['variancia'], CC['desvio_padrao']))
        print(
            "Length_Connected_Comp_Normalized: Média: %5.3f -- Var:%5.3f -- Des. Padrão: %5.3f \n"
            % (CC_NORMAL['media'], CC_NORMAL['variancia'],
               CC_NORMAL['desvio_padrao']))
        print(
            "\n######################################################################\n"
        )
Beispiel #14
0
def connectedComponent(clusterCommands, Graph, conn, cur):
    Components = snap.TCnComV()
    snap.GetWccs(Graph, Components)
    createTable(clusterCommands, Components, conn, cur)
Beispiel #15
0
        print(line)
#Below addresses 1.f,g
g_outdeg = snap.TFltPr64V()
g_indeg = snap.TFltPr64V()
snap.GetOutDegCnt(g, g_outdeg)
snap.GetInDegCnt(g, g_indeg)
#g_outdeg is a vector of pairs of floats. Each pair is addressed like (Val1,Val2)
outdeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_outdeg))
indeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_indeg))
print(f'Nodes with outdegree > 10: {len(outdeg_gt_10)}')
print(f'Nodes with indegree > 10: {len(indeg_gt_10)}')
#Problem 2
so = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt")
#2.1
so_wcc = snap.TCnComV()
snap.GetWccs(so, so_wcc)
print(f'# of connected components: {len(so_wcc)}')
#2.2
so_mx_wcc = snap.GetMxWcc(so)
snap.PrintInfo(so_mx_wcc, "Largest connected component of StackOverflow-Java")
#2.3
so_pr = snap.TIntFlt64H()
snap.GetPageRank(so, so_pr)
so_pr.SortByDat(False)  #Ascending=False
#The code below might be a naive way to do it
#Mb try GetKeyV() for hashtable types then just grab top 3 elements
so_pr_ordered = []
so_pr_iter = so_pr.BegI()
while not so_pr_iter.IsEnd():
    so_pr_ordered.append((
        so_pr_iter.GetKey(),
Beispiel #16
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    results = {}
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    n_self_edges = snap.CntSelfEdges(graph)
    n_directed_edges, n_undirected_edges = snap.CntUniqDirEdges(
        graph), snap.CntUniqUndirEdges(graph)
    n_reciprocated_edges = snap.CntUniqBiDirEdges(graph)
    n_zero_out_nodes, n_zero_in_nodes = snap.CntOutDegNodes(
        graph, 0), snap.CntInDegNodes(graph, 0)
    max_node_in = graph.GetNI(snap.GetMxInDegNId(graph)).GetDeg()
    max_node_out = graph.GetNI(snap.GetMxOutDegNId(graph)).GetDeg()
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    results["a. Nodes"] = n_nodes
    results["b. Edges"] = n_edges
    results["c. Self-edges"] = n_self_edges
    results["d. Directed edges"] = n_directed_edges
    results["e. Undirected edges"] = n_undirected_edges
    results["f. Reciprocated edges"] = n_reciprocated_edges
    results["g. 0 out-degree nodes"] = n_zero_out_nodes
    results["h. 0 in-degree nodes"] = n_zero_in_nodes
    results["i. Maximum node out-degree"] = max_node_out
    results["j. Maximum node in-degree"] = max_node_in
    results["k. Weakly connected components"] = components.Len()
    results["l. Nodes, edges of largest WCC"] = (max_wcc.GetNodes(),
                                                 max_wcc.GetEdges())
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges".format(n_nodes, n_edges))
    print("{} Self-edges ".format(n_self_edges))
    print("{} Directed edges, {} Undirected edges".format(
        n_directed_edges, n_undirected_edges))
    print("{} Reciprocated edges".format(n_reciprocated_edges))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        n_zero_out_nodes, n_zero_in_nodes))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        max_node_in, max_node_out))
    print("###")
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())

    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            name_from_index(sorted_prankH, 0, mydict),
            name_from_index(sorted_prankH, 1, mydict),
            name_from_index(sorted_prankH, 2, mydict)))
        print("Top 3 hubs: {}, {}, {}".format(
            name_from_index(sorted_NIdHubH, 0, mydict),
            name_from_index(sorted_NIdHubH, 1, mydict),
            name_from_index(sorted_NIdHubH, 2, mydict)))
        print("Top 3 authorities: {}, {}, {}".format(
            name_from_index(sorted_NIdAuthH, 0, mydict),
            name_from_index(sorted_NIdAuthH, 1, mydict),
            name_from_index(sorted_NIdAuthH, 2, mydict)))
        results["m. Three top PageRank"] = (name_from_index(
            sorted_prankH, 0, mydict), name_from_index(
                sorted_prankH, 1,
                mydict), name_from_index(sorted_prankH, 2, mydict))
        results["n. Three top hubs"] = (name_from_index(
            sorted_NIdHubH, 0,
            mydict), name_from_index(sorted_NIdHubH, 1, mydict),
                                        name_from_index(
                                            sorted_NIdHubH, 2, mydict))
        results["o. Three top authorities"] = (name_from_index(
            sorted_NIdAuthH, 0,
            mydict), name_from_index(sorted_NIdAuthH, 1, mydict),
                                               name_from_index(
                                                   sorted_NIdAuthH, 2, mydict))
    return results
import snap

data = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1, '\t')

# The number of weakly connected components in the network.
Components = snap.TCnComV()
snap.GetWccs(data, Components)
print("Number of Weakly Connected Components:", Components.Len())

# The number of edges and the number of nodes in the largest weakly connected component
MxWcc = snap.GetMxWcc(data)
print("Number of MxWcc Edges:", MxWcc.GetEdges())
print("Number of MxWcc Nodes:", MxWcc.GetNodes())

# IDs of the top 3 most central nodes in the network by PagePank scores
PRankH = snap.TIntFlt64H()
snap.GetPageRank(data, PRankH)
PRankH.SortByDat(False)

i = 0
itr = PRankH.BegI()
print("The top 3 most central nodes in the network by PagePank scores:")
while i < 3:
    print("Node:", itr.GetKey())
    itr.Next()
    i += 1
print("")

# IDs of the top 3 hubs and top 3 authorities in the network by HITS scores.
NIdHubH = snap.TIntFlt64H()
NIdAuthH = snap.TIntFlt64H()
Beispiel #18
0
def get_weakly_connected_components_number(graph: snap.PNGraph):
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
Beispiel #19
0
"""
Created on Fri Jan  3 14:32:01 2020

@author: qiuwenjie
"""
'''
cs224w homework 0
Q3
'''

import snap as sp
import numpy as np
#load graph
G = sp.LoadEdgeList(sp.PNGraph, "stackoverflow-Java.txt", 0, 1)
Components = sp.TCnComV()
sp.GetWccs(G, Components)
WccsNum = len(Components)
print("Q3.1 The number of weakly connected components in the network: ",
      WccsNum)

MxWcc = sp.GetMxWcc(G)
MxWccNodeNum = MxWcc.GetNodes()
MxWccEdgesNum = MxWcc.GetEdges()
print("Q3.2 %d edges and %d nodes in the largest weakly connected component."\
      %(MxWccEdgesNum,MxWccNodeNum))

PRankH = sp.TIntFltH()
sp.GetPageRank(G, PRankH)
PRankHKey = []
PRankHVal = []
for item in PRankH:
Beispiel #20
0
import snap

#Load the stack overflow grap
G1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)

#1. Get the list of all weakly connected components
Components = snap.TCnComV()
snap.GetWccs(G1, Components)
wccCount = 0
for Cc in Components:
    wccCount = wccCount + 1

print "1. Number of Weakly Connected Components: ", wccCount

#2. Get The number of edges and the number
#   of nodes in the largest weakly connected component
maxWcc = snap.GetMxWcc(G1)
EdgeCount = 0
NodeCount = 0
for E in maxWcc.Edges():
    EdgeCount = EdgeCount + 1

for N in maxWcc.Nodes():
    NodeCount = NodeCount + 1

print "2. Number of edges and nodes in largest wcc"
print "EdgeCount : ", EdgeCount
print "NodeCount : ", NodeCount

#3 Get The top 3 most central nodes in the network by PagePank scores
PRankH = snap.TIntFltH()
# Rakshith Singh Assignment 1 - Stackoverflow Analysis
# https://snap.stanford.edu/snappy/doc/reference/index-ref.html
# https://stackoverflow.com/questions/10152131/how-do-i-index-the-3-highest-values-in-a-list
import snap
stackoverflow_graph = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt")

#Question 1 - The number of weakly connected components in the network.
Components = snap.TCnComV()
snap.GetWccs(stackoverflow_graph, Components)
print("Number of weakly connected components is ", len(Components))

#Question 2 - The number of edges and the number of nodes in the largest weakly connected component.
MxWcc = snap.GetMxWcc(stackoverflow_graph)
print("The number of nodes in the largest Wcc is ", MxWcc.GetNodes())
print("The number of edges in the largest Wcc is ", MxWcc.GetEdges())

#Question 3 - The top 3 most central nodes in the network by PagePank scores.
PRankH = snap.TIntFltH()
snap.GetPageRank(stackoverflow_graph, PRankH)
first = 3496478
second = 3600470
third = 3399766
#for item in PRankH:
#    print(item, PRankH[item])
node_list = []
prank_list = []
for item in PRankH:
    node_list.append(item)
    prank_list.append(PRankH[item])
results = sorted(zip(prank_list, node_list), reverse=True)[:3]
print("Top 3 Central nodes and their Page Ranks are")
Beispiel #22
0
plt.ylabel(r'$\log{Count}$')
# plt.xlim(right=numpy.amax(x), left=0.0)
# plt.ylim(top=max(numpy.amax(y), numpy.amax(y_reg)), bottom=0.0)
handle_datpnt = plt.scatter(x, y, label='datapoint')
handle_reg, = plt.plot(x, y_reg, color='red', label='least-square regression')
plt.legend([handle_datpnt, handle_reg],
           ['datapoint', 'least-square regression'])
plt.show()

# Section 3 #
print('*' * 10 + ' Section III ' + '*' * 10)
# sof for stackoverflow
sof_g = snap.LoadEdgeListStr(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
# connected components vector
wcc_vec = snap.TCnComV()
snap.GetWccs(sof_g, wcc_vec)
print("The stackoverflow-Java graph has " + str(len(wcc_vec)) +
      " weakly connected components.")
#
largest_wcc = snap.GetMxWcc(sof_g)
print('The largest weekly connected component of stackoverflow graph has ' +
      str(largest_wcc.GetNodes()) + ' nodes and ' +
      str(snap.CntUniqDirEdges(sof_g)) + ' edges.')

rankscoremap = snap.TIntFltH()
snap.GetPageRank(sof_g, rankscoremap)
maplen = len(rankscoremap)
rankscoremap.SortByDat()
cnt = 0
print("If you use PageRank score, then")
for item in rankscoremap:
Beispiel #23
0
import snap
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression

DATA_PATH = './stackoverflow-Java.txt'

if __name__ == '__main__':

    # Build Java Stackoverflow Graph
    G1 = snap.LoadEdgeList(snap.PNGraph, DATA_PATH, 0, 1)

    # Get the number of Wcc
    Wccs = snap.TCnComV()
    snap.GetWccs(G1, Wccs)

    print("Weakly connected components: {:d}".format(len(list(Wccs))))

    # Get the number of nodes and edges on the largest Wcc
    MxWcc = snap.GetMxWcc(G1)
    num_nodes = len(list(MxWcc.Nodes()))
    num_edges = len(list(MxWcc.Edges()))

    print("The largest WCC has {:d} nodes; {:d} edges".format(
        num_nodes, num_edges))

    # Get top 3 RageRank IDs
    PRankH = snap.TIntFltH()
    snap.GetPageRank(G1, PRankH)
Beispiel #24
0
# Load the network
SOURCE_FILE = './data/stackoverflow-Java.txt'
SOGraph = snap.LoadEdgeList(snap.PNGraph, SOURCE_FILE, 0, 1)
assert 146874 == SOGraph.GetNodes()
assert 333606 == SOGraph.GetEdges()


def sortTIntFltH(mapping, desc=True):
    return sorted([(nodeId, mapping[nodeId]) for nodeId in mapping],
                  reverse=desc,
                  key=lambda x: x[1])


# 3.1
components = snap.TCnComV()
snap.GetWccs(SOGraph, components)

print("The number of weakly connected components in the SO network"
      "is %s." % (len(components)))

# 3.2
maxWeaklyConnectedComponent = snap.GetMxWcc(SOGraph)
print("The largest weakly connected component in the SO network"
      "has %s nodes and %s edges." % (maxWeaklyConnectedComponent.GetNodes(),
                                      maxWeaklyConnectedComponent.GetEdges()))

# 3.3
TOPN = 3
SOPageRanks = snap.TIntFltH()
snap.GetPageRank(SOGraph, SOPageRanks, 0.85, 1e-4, 1000)
sortedSOPageRanks = sortTIntFltH(SOPageRanks)
Beispiel #25
0
'''
This script is for hw0-q2
'''

import snap

g = snap.LoadEdgeList(snap.PNGraph, 'wiki-Vote.txt', 0, 1)

wccgv = snap.TCnComV()
snap.GetWccs(g, wccgv)
print wccgv.Len()

wccg = snap.GetMxWcc(g)
print wccg.GetNodes()
print wccg.GetEdges()

PRankH = snap.TIntFltH()
snap.GetPageRank(g, PRankH)
PRankH.SortByDat(False)
a = 0
for item in PRankH:
    if (a < 3):
        print item, PRankH[item]
        a = a + 1
    else:
        break
print "\n"

HubH = snap.TIntFltH()
AutH = snap.TIntFltH()
snap.GetHits(g, HubH, AutH)
import snap
# import os

# Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
# print os.system("pwd")
Graph = snap.LoadEdgeList(snap.PNGraph, "../bitcoin_computed/txedgeunique.txt",
                          0, 1)
G_Nodes = Graph.GetNodes()
G_Edges = Graph.GetEdges()
print "Graph: Nodes %d, Edges %d" % (G_Nodes, G_Edges)

SCComponents = snap.TCnComV()
WCComponents = snap.TCnComV()

snap.GetSccs(Graph, SCComponents)
snap.GetWccs(Graph, WCComponents)

MaxWCCNodes = WCComponents[0]
MaxSCCNodes = SCComponents[0]
# print type(MaxSccNodes)
print MaxSCCNodes.Len()
print MaxWCCNodes.Len()

# Iterate over each edge and check for In, Out

SCCHashmap = snap.TIntH()
for node in MaxSCCNodes:
    SCCHashmap.AddKey(node)

InOutHashmap = snap.TIntH()
for node in MaxWCCNodes:
Beispiel #27
0
def Get_Connected_Components(G):
    Components = snap.TCnComV()
    snap.GetWccs(G, Components)

    for CnCom in Components:
        print("Size of component: %d" % CnCom.Len())
Beispiel #28
0
# P3 of hw1
import snap

# 1) The number of weakly connected components
g1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
Components = snap.TCnComV()
snap.GetWccs(g1, Components)
cnt = 0

for ele in Components:
    #print "Size of Component: %d" % (ele.Len())
    cnt += 1
print "1) number of weakly connected components: %d" % (cnt)

# 2) The number of edges and the number of nodes
g2 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
MxWcc = snap.GetMxWcc(g2)
cnt_edge = 0
cnt_node = 0

for ele in MxWcc.Edges():
    cnt_edge += 1
for ele in MxWcc.Nodes():
    cnt_node += 1
print "2) Edges = %d, Node = %d" % (cnt_edge, cnt_node)

# 3)
g3 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
PRankH = snap.TIntFltH()
snap.GetPageRank(g3, PRankH)
max = 0
Beispiel #29
0
def CombineWords(allwords, orderby=['T', 'C']):
    page_cands = {}  # pageid : [OCRwords]
    page_words = {}  # pageid : [[cand1, cand2], [cand1, cand2, cand3]..]
    # where
    page_cands_order = {
    }  # ocrid : pageid : cand_sub : ori_ocr_order  (inverted index, only for "orderby" ocrs)
    for ocrid in allwords:
        index = allwords[ocrid]
        page_cands_order[ocrid] = {}
        for pageid in index:
            if pageid not in page_cands:
                page_cands[pageid] = []
            if pageid not in page_cands_order[ocrid]:
                page_cands_order[ocrid][pageid] = {}

            for ocr_sub in range(0, len(index[pageid])):
                pair = index[pageid][ocr_sub]
                box = pair[0]
                word = pair[1]
                # New: handle C bad outputs
                if box.GetLeft() == 0 and box.GetUp() == 0 and ocrid == 'C':
                    print >> sys.stderr, box.GetPrinted(), word.GetContent()
                    continue  # Ignore this candidate

                page_cands[pageid].append(
                    (ocrid, ) + pair)  # (ocrid, box, word)

                # if ocrid == orderby:  # slow... TODO
                cand_sub = len(page_cands[pageid]) - 1
                page_cands_order[ocrid][pageid][cand_sub] = ocr_sub
                # print page_cands_order
                # raw_input()

    wccsizes = {}
    for pageid in page_cands:
        nodes = page_cands[pageid]  # [(ocrid, box, word), ...]
        graph = snap.PUNGraph.New()  # Undirected graph
        for i in range(0, len(nodes)):
            _ret = graph.AddNode(i)

        # Add overlapping edges in undirected graph
        for i in range(0, len(nodes)):
            for j in range(i + 1, len(nodes)):
                b1 = nodes[i][1]
                b2 = nodes[j][1]
                if b1.IsOverlapSamePage(b2):
                    graph.AddEdge(i, j)

        # TODO After aggregate by WCCs, we need a proper order.
        # Now picking order by first word in Tesseract Order!
        # If not appearing in Tesseract, pick order as Cuneiform order. (how?) or neglect???

        words = [
        ]  # candidates separated: [ [order, [cand1, cand2]], [order, [cand1]], ...]
        wccs = snap.TCnComV()
        snap.GetWccs(graph, wccs)
        order_comp = {}

        for comp in wccs:
            wccsz = comp.Len()
            if wccsz not in wccsizes:
                wccsizes[wccsz] = 0
            wccsizes[wccsz] += 1

            # print "Size of component: %d" % comp.Len()
            # for arr in [[nodes[nid][0], nodes[nid][1].GetPrinted(), nodes[nid][2].GetContent()] for nid in comp]:
            #   print ' ', '\t'.join(arr)

            this_cc = [nodes[nid] for nid in comp]
            this_order = [NO_ORDER_MARK] * len(orderby)
            for nid in comp:
                ocrid = nodes[nid][0]
                ocrorder = page_cands_order[ocrid][pageid][nid]
                if ocrid in orderby:
                    i = orderby.index(ocrid)
                    if this_order[
                            i] == NO_ORDER_MARK or this_order[i] > ocrorder:
                        this_order[i] = ocrorder

            # else:
            #   words.append( (this_order, [nodes[nid] for nid in comp]) )
            words.append((this_order, [nodes[nid] for nid in comp]))
            # raw_input()

        words.sort(cmp=order_compare, key=lambda word: word[0])

        # print 'Top 100 Orders:', [w[0] for w in words][:100]
        # print 'None Orders:'
        # for l in [
        #   [w[0]
        #     # +[w[1][0][2].GetContent()]
        #     for w in words[i - 3 : i + 3]]
        #   for i in range(3, len(words) - 3)
        #   if None in words[i][0]
        # ]:
        #   print l

        # Test case:
        # python alignment/Align.py /Users/Robin/Documents/repos/deepdive_ocr/deepdive_danny/app/ocr/data/html-labels-accurate/data-labeled/ocroutput/JOURNAL_28971.pdf.task/
        # [7, 6], [8, 7], [9, 9], [12, 13], [13, 10], [14, None], [15, None], [16, None], [17, None], [18, None]
        # [19, None], ... , [94, None]
        # [95, None], [96, None], [97, None], [98, None], [None, 14], [None, 15], [99, 16], [102, 17], [104, 18], [105, 19]

        test_orders = [w[0] for w in words]
        words = [w[1] for w in words]  # remove order
        for i in range(len(words)):
            word = words[i]
            order = test_orders[i]

            # if order[0] == None:
            #   print 'Order:', order
            #   print len(word), 'candidates...'
            #   for can in word:
            #     print '\t'.join([can[0], can[1].GetPrinted(), can[2].GetContent()])
            #   # if len(word) != 2 or len(word) >= 2 and word[0][2].GetContent() != word[1][2].GetContent():
            #   #   raw_input()
            #   if len(word) == 1:
            #     raw_input()

        page_words[pageid] = words

    return page_words, wccsizes