Ejemplo n.º 1
0
def computeGraphMetrics(GRep, GModel):
  nddDiff = euclidDist(ndDist(GRep), ndDist(GModel))
  knnDiff = euclidDist(knnDist(GRep), knnDist(GModel))
  dkDiff = euclidDist(dkDist(GRep), dkDist(GModel))
  ccDiff = euclidDist(ccDist(GRep), ccDist(GModel))
  ASVals = (ASCoeff(GRep), ASCoeff(GModel))
  MxWccVals = (snap.GetMxWcc(GRep).GetNodes(), snap.GetMxWcc(GModel).GetNodes())
  effDVals = (snap.GetBfsEffDiam(GRep, 1000, False), 
              snap.GetBfsEffDiam(GModel, 1000, False))
  return nddDiff, knnDiff, dkDiff, ccDiff, ASVals, MxWccVals, effDVals
Ejemplo n.º 2
0
def q1_3_grpah(Graph):
    n_nodes = Graph.GetNodes()
    MxWcc = snap.GetMxWcc(Graph)
    MxScc = snap.GetMxScc(Graph)
    n_MxWcc = MxWcc.GetNodes()
    n_MxScc = MxScc.GetNodes()
    print(" TOTAL          : ", n_nodes)
    print(" DISCONNECTED   : ", n_nodes - n_MxWcc)
    print(" SCC            : ", n_MxScc)

    SCC_nodes = []
    for NI in MxScc.Nodes():
        SCC_nodes.append(NI.GetId())

    num_test = 100
    random_sampled_scc = random.sample(SCC_nodes, num_test)

    num_out = []
    num_in = []
    for i in range(0, num_test):
        NodeId = random_sampled_scc[i]
        BfsTreeOut = snap.GetBfsTree(Graph, NodeId, True, False)
        BfsTreeIn = snap.GetBfsTree(Graph, NodeId, False, True)
        num_out.append(BfsTreeOut.GetNodes())  # roughly SCC + OUT
        num_in.append(BfsTreeIn.GetNodes())  # roughly SCC + IN
    num_out.sort()
    num_in.sort()

    print(" OUT            : ", num_out[-1] - n_MxScc)
    print(" IN             : ", num_in[-1] - n_MxScc)

    num_tendrils = n_MxWcc - n_MxScc - (num_out[-1] - n_MxScc) - (num_in[-1] -
                                                                  n_MxScc)
    print(" TENDRILS+TUBES : ", num_tendrils)
def bowtie_components(graph, name):
    """Give sizes of DISCONNECTED, IN, OUT, SCC"""
    results = {}

    N = graph.GetNodes()

    SCC = snap.GetMxScc(graph)
    n = SCC.GetRndNId()

    disc = N - snap.GetMxWcc(graph).GetNodes()
    scc = SCC.GetNodes()
    SCC_in = snap.GetBfsTree(graph, n, False, True)
    SCC_out = snap.GetBfsTree(graph, n, True, False)
    in1 = SCC_in.GetNodes() - scc
    out = SCC_out.GetNodes() - scc
    tt = N - disc - scc - in1 - out

    results["a. SCC"] = scc
    results["b. IN"] = in1
    results["c. OUT"] = out
    results["d. TENDRILS + TUBES"] = tt
    results["e. DISCONNECTED"] = disc

    print 'Total nodes in {} network: {}'.format(name, N)
    print 'DISCONNECTED: {}'.format(disc)
    print 'SCC: {}'.format(scc)
    print 'IN: {}'.format(in1)
    print 'OUT: {}'.format(out)
    print 'TENDRILS + TUBES: {}'.format(tt)

    return results
Ejemplo n.º 4
0
def SizeOfBowtieRegions(Graph, sccNodeID):
    '''
    Given a Graph with a BowTie structure as described in
    http://snap.stanford.edu/class/cs224w-readings/broder00bowtie.pdf
    and an sccNodeID of a node known to belong to the central SCC,
    determines the size of each component.
    
    returns: tuple of sizes (SCC, IN, OUT, TENDRILS, DISCONNECTED)
    '''
    totalNodes = Graph.GetNodes()
    wcc = snap.GetMxWcc(Graph)
    assert wcc.IsNode(sccNodeID)
    wccNodes = wcc.GetNodes()
    disconnectedNodes = totalNodes - wccNodes

    scc = snap.GetMxScc(Graph)
    # Sanity check the input.
    assert scc.IsNode(sccNodeID)
    sccNodes = scc.GetNodes()

    sccAndOutNodes = snap.GetBfsTree(Graph, sccNodeID, True, False).GetNodes()
    sccAndInNodes = snap.GetBfsTree(Graph, sccNodeID, False, True).GetNodes()

    inNodes = sccAndInNodes - sccNodes
    outNodes = sccAndOutNodes - sccNodes
    tendrilNodes = wccNodes - (inNodes + outNodes + sccNodes)

    nodes = (sccNodes, inNodes, outNodes, tendrilNodes, disconnectedNodes)
    assert sum(nodes) == Graph.GetNodes()
    return nodes
Ejemplo n.º 5
0
    def initNetwork(self,Ajen,keyList):

        self.Ajen=Ajen
        self.keyList=keyList
        self.myGraph = snap.TNEANet.New()
        self.nid2id=dict()
        self.id2nid=dict()

        length=len(keyList)
        for i in range(length):
            theKey=keyList[i]
            nid=self.myGraph.AddNode(i)
            self.myGraph.AddStrAttrDatN(nid, theKey, 'key')
            self.nid2id[nid]=theKey
            self.id2nid[theKey]=nid


        self.outputList=[]
        for i in range(length):
            for j in range(i+1,length):
                if Ajen[i,j]>0:
                    eid=self.myGraph.AddEdge(i, j)
                    self.myGraph.AddFltAttrDatE(eid, Ajen[i,j], 'weigth')
                    # eid=self.myGraph.AddEdge(j, i)
                    # self.myGraph.AddFltAttrDatE(eid, Ajen[j,i], 'weigth')
                    self.outputList.append([keyList[i],keyList[j], Ajen[i,j]])

        print '-original: '+str(self.myGraph.GetEdges())+' '+str(self.myGraph.GetNodes())
        self.MxWcc = snap.GetMxWcc(self.myGraph)
        print '-mxWcc: '+str(self.MxWcc.GetEdges())+' '+str(self.MxWcc.GetNodes())
def generate_steam_edge_list():
    FIn = snap.TFIn("graph/steam.graph")
    G = snap.TUNGraph.Load(FIn)

    G = snap.GetMxWcc(G)

    user_node_array = []  #88310
    with open('graph/user_node.txt', 'r') as f:
        for line in f:
            user_node_array.append(int(line))

    game_node_array = []  #10978
    with open('graph/game_node.txt', 'r') as f:
        for line in f:
            game_node_array.append(int(line))

    with open('graph/steam_edge_list.csv', 'w') as f:
        writer = csv.writer(f, delimiter=',')
        for edge in G.Edges():
            # eid = edge.GetId()
            id1 = edge.GetSrcNId()
            id2 = edge.GetDstNId()
            if id1 in user_node_array:
                row = [str(id1), 'g' + str(id2)]
            else:
                row = [str(id2), 'g' + str(id1)]
            writer.writerow(row)
Ejemplo n.º 7
0
def preproc_graph(filename):
    ''' get connected graph 
	I beleive this is done after we remap the nodes to 
	consecutive order in map_nodes_new.py
	'''
    print "Working on %s \n" % filename
    print "Generating graph from edge list..."
    # laod edge list into snap
    Graph0 = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1, '\t')
    # get edges
    V0 = Graph0.GetNodes()
    # delete zero degree nodes
    snap.DelZeroDegNodes(Graph0)
    print "Done generating graph!"

    # get max weakly connected component
    print "Generating connected graph..."
    Graph = snap.GetMxWcc(Graph0)
    V = Graph.GetNodes()
    E = Graph.GetEdges()
    print "Done generating graph with V = %i, E = %i!, V0 = %i" % (V, E, V0)

    # get nodes included in weakly connected graph (which could be
    # a proper subset of original set)
    # Find one edge in graph and find all connected nodes
    for EI in Graph.Edges():
        conn_node = EI.GetSrcNId()  # start with one edge
        break  # only need one edge since connected
    CnCom = snap.TIntV()
    snap.GetNodeWcc(Graph, conn_node, CnCom)
    conn_node_ids = sort(array([node for node in CnCom]))

    return Graph, conn_node_ids, V, E, V0
Ejemplo n.º 8
0
def main(version):

    starttime = datetime.datetime.now()

    codePath = sys.path[0]
    s = codePath.split('\\')
    workPath = s[0] + '\\' + s[1] + '\\' + s[
        2] + '\\data\\flixster\\commondata\\'  #f:\project\somproject
    filePath1 = workPath + 'finalSocial' + version + '.txt'

    # transfer node string to num      2131313 to 1
    # use the index of list to represent the node

    totalNodeList = []
    G1 = snap.TUNGraph.New()
    for line in open(filePath1):
        if line == '':
            break
        linkPair = line[:-1].split('\t')
        node1 = int(linkPair[0])
        node2 = int(linkPair[1])
        if node1 not in totalNodeList:
            totalNodeList.append(node1)
        if node2 not in totalNodeList:
            totalNodeList.append(node2)

        node1MapNum = totalNodeList.index(node1)
        node2MapNum = totalNodeList.index(node2)
        if not G1.IsNode(node1MapNum):
            G1.AddNode(node1MapNum)
        if not G1.IsNode(node2MapNum):
            G1.AddNode(node2MapNum)
        G1.AddEdge(node1MapNum, node2MapNum)

    print 'get the max connected component...'
    MxWcc = snap.GetMxWcc(G1)
    print 'the max connected component node num is  %d ' % MxWcc.GetNodes()

    print MxWcc.GetEdges()
    # filePath2=workPath+'finalUserID.txt'
    # finalNodeList=[]
    # for line in open(filePath2):
    # if line=='':
    # break
    # nodeStr=line[:-1]
    # node=int(nodeStr)
    # nodeMapNum=totalNodeList.index(node)

    # if MxWcc.IsNode(nodeMapNum):
    # finalNodeList.append(node)

    # print 'the final user num is %d' %len(finalNodeList)

    FOut = snap.TFOut(workPath + 'finalSocial' + version + '.graph')
    MxWcc.Save(FOut)
    FOut.Flush()

    print 'finished'
    endtime = datetime.datetime.now()
    print 'passed time is %d s' % (endtime - starttime).seconds
Ejemplo n.º 9
0
def main():
    starttime = datetime.datetime.now()

    codePath = sys.path[0]
    s = codePath.split('\\')
    workPath = s[0] + '\\' + s[1] + '\\' + s[
        2] + '\\data\\baidu\\'  #f:\project\somproject

    # transfer node string to num      2131313 to 1
    # use the index of list to represent the node

    print 'use social data to build the graph... '
    filePath1 = workPath + 'commondata\\rawSocial.txt'
    totalNodeList = []
    G1 = snap.TUNGraph.New()
    for line in open(filePath1):
        if line == '':
            break
        linkPair = line[:-1].split('\t')
        node1 = int(linkPair[0])
        node2 = int(linkPair[1])
        if node1 not in totalNodeList:
            totalNodeList.append(node1)
        if node2 not in totalNodeList:
            totalNodeList.append(node2)

        node1MapNum = totalNodeList.index(node1)
        node2MapNum = totalNodeList.index(node2)
        if not G1.IsNode(node1MapNum):
            G1.AddNode(node1MapNum)
        if not G1.IsNode(node2MapNum):
            G1.AddNode(node2MapNum)
        G1.AddEdge(node1MapNum, node2MapNum)

    print 'get the max connected component...'
    MxWcc = snap.GetMxWcc(G1)
    print 'the max  connected component node num is  %d ' % MxWcc.GetNodes()

    print 'get  user id   in  the  max connected component... '

    writer2 = open(workPath + 'commondata\\coreUserID.txt', 'w')
    filePath2 = workPath + 'commondata\\rawCoreUserID.txt'
    coreUserList = []
    for line in open(filePath2):
        if line == '':
            break
        nodeStr = line[:-1]
        node = int(nodeStr)
        nodeMapNum = totalNodeList.index(node)
        if MxWcc.IsNode(nodeMapNum):
            coreUserList.append(node)
            nodeLine = str(node) + '\n'
            writer2.write(nodeLine)
    writer2.close()

    print 'the core user num is %d' % len(coreUserList)

    print 'finished'
    endtime = datetime.datetime.now()
    print 'passed time is %d s' % (endtime - starttime).seconds
Ejemplo n.º 10
0
def createGraph(nodes, edges):
	G = snap.TUNGraph.New()
	renumbered = {}
	idToOsmid = {}
	counter = 0

	for osmid in edges:
		refs = edges[osmid]

		for i in xrange(0, len(refs) - 1):
			start = refs[i]
			end = refs[i+1]

			# not all edges in a way are in nodes in the graph if at the boundary
			if start not in nodes or end not in nodes:
				continue

			# if way is a road, add nodes if they haven't been added before
			if start not in renumbered:
				renumbered[start] = counter
				idToOsmid[counter] = start
				G.AddNode(counter)
				counter += 1
			if end not in renumbered:
				renumbered[end] = counter
				idToOsmid[counter] = end
				G.AddNode(counter)
				counter += 1

			G.AddEdge(renumbered[start], renumbered[end])

	G = snap.GetMxWcc(G)

	return G, idToOsmid
Ejemplo n.º 11
0
def Q2_4():
    epinions, email = loadNetworks()
    for trial in xrange(TRIALS):
        for (name, network) in [("Epinions", epinions), ("Email", email)]:
            print("Probability of path for entire %s is %s." %
                  (name, ProbabilityOfPath(network)))
            print("Probability of path in largest WCC of %s is %s." %
                  (name, ProbabilityOfPath(snap.GetMxWcc(network))))
Ejemplo n.º 12
0
 def __init__(self, nodes, edges, edge_list=None):
     if edge_list is None:
         G = snap.GenRndGnm(snap.PUNGraph, nodes, edges)
         self.graph = snap.GetMxWcc(G)
     else:
         self.graph = snap.LoadEdgeList(snap.PUNGraph, edge_list, 0, 1)
     self.assignment = {}
     self.max_type, self.min_type = None, None
Ejemplo n.º 13
0
def quick_properties(graph, name, dic_path):
    """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """
    n_edges = graph.GetEdges()
    n_nodes = graph.GetNodes()
    print("##########")
    print("Quick overview of {} Network".format(name))
    print("##########")
    print("{} Nodes, {} Edges").format(n_nodes, n_edges)
    print("{} Self-edges ".format(snap.CntSelfEdges(graph)))
    print("{} Directed edges, {} Undirected edges".format(
        snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph)))
    print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph)))
    print("{} 0-out-degree nodes, {} 0-in-degree nodes".format(
        snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0)))
    node_in = graph.GetNI(snap.GetMxInDegNId(graph))
    node_out = graph.GetNI(snap.GetMxOutDegNId(graph))
    print("Maximum node in-degree: {}, maximum node out-degree: {}".format(
        node_in.GetDeg(), node_out.GetDeg()))
    print("###")
    components = snap.TCnComV()
    snap.GetWccs(graph, components)
    max_wcc = snap.GetMxWcc(graph)
    print "{} Weakly connected components".format(components.Len())
    print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(),
                                                   max_wcc.GetEdges())
    prankH = snap.TIntFltH()
    snap.GetPageRank(graph, prankH)
    sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True)
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(graph, NIdHubH, NIdAuthH)
    sorted_NIdHubH = sorted(NIdHubH,
                            key=lambda key: NIdHubH[key],
                            reverse=True)
    sorted_NIdAuthH = sorted(NIdAuthH,
                             key=lambda key: NIdAuthH[key],
                             reverse=True)
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        print("3 most central players by PageRank scores: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])],
            list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_prankH[2])]))
        print("Top 3 hubs: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdHubH[2])]))
        print("Top 3 authorities: {}, {}, {}".format(
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[0])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[1])],
            list(mydict.keys())[list(mydict.values()).index(
                sorted_NIdAuthH[2])]))
Ejemplo n.º 14
0
def max_wcc_info(edges_file, key_file, valuefn):
  print '\nLoading edge list...'
  G      = snap.LoadEdgeList(snap.PUNGraph, edges_file, 0, 1)
  MaxWCC = snap.GetMxWcc(G)

  print '\nBuilding legend...'
  legend, full_lines = build_legends(key_file, valuefn, '\t')

  return MaxWCC, legend, full_lines
Ejemplo n.º 15
0
def main(args):
    review_file = args.review
    review_maxwcc_file = args.review_maxwcc

    # load graph
    G = snap.LoadEdgeList(snap.PUNGraph, review_file, 0, 1)

    # get wcc
    MxWcc = snap.GetMxWcc(G)

    # save
    snap.SaveEdgeList(MxWcc, review_maxwcc_file)
Ejemplo n.º 16
0
def get_connected_component(graph):
    if isinstance(graph, snap.PNGraph):
        lcc = snap.GetMxScc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PNGraph, lcc, True)
    elif isinstance(graph, snap.PUNGraph):
        lcc = snap.GetMxWcc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True)
    else:
        raise NotAGraphError(graph)
    return lcc
def processNetwork(Graph, id_to_groups):
    with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f:
        f.write("RELATED GROUPS GRAPH:\n")
        f.write('Edges: %d\n' % Graph.GetEdges())
        f.write('Nodes: %d\n\n' % Graph.GetNodes())

        MxWcc = snap.GetMxWcc(Graph)
        f.write("MAX WCC:\n")
        f.write('Edges: %f ' % MxWcc.GetEdges())
        f.write('Nodes: %f \n' % MxWcc.GetNodes())
        f.write('Node List: ')
        for node in MxWcc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxWcc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL WCCs:")
        Components = snap.TCnComV()
        snap.GetWccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nWcc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        MxScc = snap.GetMxScc(Graph)
        f.write("\n\nMAX SCC:\n")
        f.write('Edges: %f ' % MxScc.GetEdges())
        f.write('Nodes: %f \n' % MxScc.GetNodes())
        f.write('Node List: ')
        for node in MxScc.Nodes():
            f.write('%d, ' % node.GetId())
        f.write('\n')
        for node in MxScc.Nodes():
            f.write('%s, ' % id_to_groups[node.GetId()])

        f.write("\n\nALL SCCs:")
        Components = snap.TCnComV()
        snap.GetSccs(Graph, Components)
        for i, CnCom in enumerate(Components):
            if CnCom.Len() < 10: continue
            f.write('\nScc%d: ' % i)
            for nodeid in CnCom:
                f.write('%d, ' % nodeid)

        f.write('\n\nCLUSTERING AND COMMUNITIES:\n')
        f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1))
        f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1))
        Nodes = snap.TIntV()
        for node in Graph.Nodes():
            Nodes.Add(node.GetId())
        f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
Ejemplo n.º 18
0
 def enumerate_graphs(self, k):
     for seq in itertools.product("01", repeat=k*(k-1)):
         g = snap.TNGraph.New()
         for i in range(k): g.AddNode(i)
         for i,e in enumerate(seq):
             if e=='1':
                 start_node = i/(k-1)
                 end_node = i % (k-1)
                 if end_node >= start_node:
                     end_node += 1
                 g.AddEdge(start_node, end_node)
         if snap.GetMxWcc(g).GetNodes()==k:
             yield g 
Ejemplo n.º 19
0
def q2_3_util(dataset_name):
        # G = load_graph("email")
    G = load_graph(dataset_name)
    MxWcc = snap.GetMxWcc(G)
    total_size = G.GetNodes()
    wcc_size = MxWcc.GetNodes()
    disconnected_size = total_size - wcc_size
    print 'Total size: ', total_size
    print 'WCC size: ', wcc_size
    print 'DISCONNECTED: ', disconnected_size
    Rnd = snap.TRnd(42)
    Rnd.Randomize()
    MxScc = snap.GetMxScc(G)
    scc_size = MxScc.GetNodes()
    number_of_trials = 1
    scc_plus_out = 0
    scc_plus_in = 0
    out_size = 0
    in_size = 0
    tendrils_plus_tubes = 0
    for i in xrange(number_of_trials):
        NId = MxScc.GetRndNId(Rnd)
        # print 'Random node id', NId
        outward_set = set()
        BfsTree = snap.GetBfsTree(G, NId, True, False)
        for EI in BfsTree.Edges():
            outward_set.add(EI.GetDstNId())
        scc_plus_out = max(scc_plus_out, len(outward_set))
        out_size = max( out_size, scc_plus_out - scc_size)
        #
        inward_set = set()
        BfsTree = snap.GetBfsTree(G, NId, False, True)
        for EI in BfsTree.Edges():
            inward_set.add(EI.GetDstNId())
        scc_plus_in = max(scc_plus_in, len(inward_set))
        in_size = max(in_size, scc_plus_in - scc_size)
        tendrils_plus_tubes = max(tendrils_plus_tubes, wcc_size - in_size - out_size)

    print 'IN: ', in_size
    print 'scc_size', scc_size
    print 'scc + out: ', scc_plus_out
    print 'OUT: ', out_size
    print 'scc + in: ', scc_plus_in
    print 'TENDRILS + TUBES', tendrils_plus_tubes
    print '------------------'
Ejemplo n.º 20
0
def calculateWccSimilarity(G, partition):
    MxWcc = snap.GetMxWcc(G)
    numNodes = MxWcc.GetNodes()

    intersection = 0
    total = 0

    for NI in G.Nodes():
        if NI.GetId() in partition[0]:
            intersection += 1
        total += 1

    for NId in partition[0]:
        if not G.IsNode(NId):
            total += 1

    Jaccard = intersection * 1.0 / total

    return numNodes, Jaccard
Ejemplo n.º 21
0
def main():
    network = snap.LoadEdgeList(
        snap.PNEANet, "/Users/qingyuan/CS224W/stackoverflow-Java.txt", 0, 1)
    Components = snap.TCnComV()
    snap.GetWccs(network, Components)
    print("The number of weakly connected components is %d" % Components.Len())
    MxWcc = snap.GetMxWcc(network)
    print(
        "The number of edges is %d and the number of nodes is %d in the largest weakly connected component."
        % (MxWcc.GetNodes(), MxWcc.GetEdges()))
    PRankH = snap.TIntFltH()
    snap.GetPageRank(network, PRankH)
    PRankH.SortByDat(False)
    num = 0
    print(
        "IDs of the top 3 most central nodes in the network by PagePank scores. "
    )
    for item in PRankH:
        print(item, PRankH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(network, NIdHubH, NIdAuthH)
    NIdHubH.SortByDat(False)
    print("IDs of the top 3 hubs in the network by HITS scores. ")
    for item in NIdHubH:
        print(item, NIdHubH[item])
        num += 1
        if num == 3:
            num = 0
            break
    NIdAuthH.SortByDat(False)
    print("IDs of top 3 authorities in the network by HITS scores. ")
    for item in NIdAuthH:
        print(item, NIdAuthH[item])
        num += 1
        if num == 3:
            num = 0
            break
Ejemplo n.º 22
0
def analyze(graph):

    n = graph.GetNodes()
    m = graph.GetEdges()

    maxSCCsize = snap.GetMxSccSz(graph)
    maxWCCsize = snap.GetMxWccSz(graph)
    avgDegree = (m * float(2)) / n

    # estimate power law exponent
    degs = []
    degCounts = []
    DegToCntV = snap.TIntPrV()
    snap.GetDegCnt(graph, DegToCntV)
    for item in DegToCntV:
        degs.append(item.GetVal1())
        degCounts.append(item.GetVal2())
    xMin = min(degs) - 0.5
    m = graph.GetNodes()
    alphaMLLE = 1 + (m / (sum([np.log(i / xMin) * degCounts[degs.index(i)] for i in degs])))

    # erdos-renyi clustering coefficient
    graphER = snap.GenRndGnm(snap.PUNGraph, n, m)
    avgClustCoeffER = snap.GetClustCf(graphER, -1)

    # average shortest path
    graphWCC = snap.GetMxWcc(graph)
    avgClustCoeff = snap.GetClustCf(graphWCC, -1)
    numSamples = min(graphWCC.GetNodes(), 617) # all nodes or sample size
    Rnd = snap.TRnd(42)
    Rnd.Randomize()
    shortPathList = []
    for i in xrange(numSamples):
        s = graphWCC.GetRndNId(Rnd)
        NIdToDistH = snap.TIntH()
        snap.GetShortPath(graphWCC, s, NIdToDistH)
        for item in NIdToDistH:
            shortPathList.append(NIdToDistH[item])
    avgShortPath = np.mean(shortPathList)

    return avgClustCoeff, maxSCCsize, maxWCCsize, avgDegree, alphaMLLE, avgClustCoeffER, avgShortPath
Ejemplo n.º 23
0
def q2_3_aux(name):
    G = load_graph(name)

    SCC = snap.GetMxScc(G).GetNodes()
    wcc = snap.GetMxWcc(G).GetNodes()

    inexplosionVect = emIn if name == "email" else epIn
    outexplosionVect = emOut if name == "email" else epOut
    ineexpl = inexplosionVect[-1]
    outeexpl = outexplosionVect[-1]

    IN = ineexpl - SCC
    OUT = outeexpl - SCC

    DISCONNECTED = G.GetNodes()-wcc


    TENDRILS_AND_TUBES = wcc - IN - OUT - SCC
    print name,"DISCONNECTED:",DISCONNECTED,"IN:",IN,"OUT:",OUT,"SCC:",SCC,"TENDRILS + TUBES:",TENDRILS_AND_TUBES

    return
Ejemplo n.º 24
0
def q3():
    G = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)

    components = snap.TCnComV()
    snap.GetWccs(G, components)
    print("Number of WCC: ", components.Len())

    MxComp = snap.GetMxWcc(G)
    cnt_mxc_node = 0
    cnt_mxc_edge = 0
    for _ in MxComp.Nodes():
        cnt_mxc_node += 1
    for _ in MxComp.Edges():
        cnt_mxc_edge += 1
    print("Number of edges and nodes in MxWCC: ", cnt_mxc_node, ' ',
          cnt_mxc_edge)

    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    scores = []
    for id in PRankH:
        scores.append((PRankH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 PageRank scores: ", res)

    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(G, NIdHubH, NIdAuthH)
    scores = []
    for id in NIdHubH:
        scores.append((NIdHubH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 hubs by HITS scores: ", res)
    scores = []
    for id in NIdAuthH:
        scores.append((NIdAuthH[id], id))
    res = sorted(scores, reverse=True)[:3]
    print("IDs of top 3 authorities by HITS scores: ", res)
def graph_cleaning(file_path):
    Graph, H = load_graph(file_path)
    Graph = snap.GetMxWcc(Graph)
    snap.DelSelfEdges(Graph)
    nodes_set = set()
    for NI in Graph.Nodes():
        nodes_set.add(NI.GetId())
    with open(file_path, 'r') as f:
        raw_list = f.read().split('\n')
        edges_list = [edge_str.split() for edge_str in raw_list]
    with open(file_path, 'w') as f:
        print '-----clear'
    with open(file_path, 'a') as f:
        for edge in edges_list:
            if len(edge) == 0:
                continue
            if H.GetKeyId(edge[0]) not in nodes_set:
                continue
            edge_cleaned = list()
            for node in edge:
                if H.GetKeyId(node) in nodes_set:
                    edge_cleaned.append(node)
            f.write(' '.join(edge_cleaned) + '\n')
Ejemplo n.º 26
0
def stackoverflow():
    g = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)
    components = snap.TCnComV()
    snap.GetWccs(g, components)
    print "Num connected comp = ", components.Len()
    mxwcc = snap.GetMxWcc(g)
    print "Num edges in largest = ", mxwcc.GetEdges()
    print "Num nodes in largest = ", mxwcc.GetNodes()
    rank = snap.TIntFltH()
    snap.GetPageRank(g, rank)
    rank.SortByDat(False)
    count = 0
    for node in rank:
        if count >= 3:
            break
        count += 1
        print "largest page rank score nodes = ", node, " (score = ", rank[node]

    hubs = snap.TIntFltH()
    auths = snap.TIntFltH()
    snap.GetHits(g, hubs, auths)
    
    hubs.SortByDat(False)
    count = 0
    for node in hubs:
        if count >= 3:
            break
        count += 1
        print "largest hub score nodes = ", node, " (score = ", hubs[node]

    auths.SortByDat(False)
    count = 0
    for node in auths:
        if count >= 3:
            break
        count += 1
        print "largest auth score nodes = ", node, " (score = ", auths[node]
Ejemplo n.º 27
0
def partThree():
    data_dir_StackOverFlow = './data/stackoverflow-Java.txt'
    sofG = snap.LoadEdgeList(snap.PNGraph, data_dir_StackOverFlow, 0, 1, '\t')

    Components = snap.TCnComV()
    snap.GetWccs(sofG, Components)
    print('1. The number of weakly connected components in the network.: '+str(Components.Len()))

    MxWcc = snap.GetMxWcc(sofG)
    num_node = MxWcc.GetNodes()
    num_deg = MxWcc.GetEdges()
    print('2. The number of edges is {} and the number of nodes is {}'.format(num_deg, num_node))

    PRankH = snap.TIntFltH()
    snap.GetPageRank(sofG, PRankH)
    cnt = 0
    print('3. ')
    for item in PRankH:
        cnt += 1
        if cnt > 3:
            break
        print(item, PRankH[item])

    print('4. ')
    NIdHubH = snap.TIntFltH()
    NIdAuthH = snap.TIntFltH()
    snap.GetHits(sofG, NIdHubH, NIdAuthH)
    HubDict = {}
    AuthDict = {}
    for item in NIdHubH:
        HubDict[item] = NIdHubH[item]
    a = zip(HubDict.values(), HubDict.keys())
    print(list(sorted(a, reverse=True))[:3])
    for item in NIdAuthH:
        AuthDict[item] = NIdAuthH[item]
    b = zip(AuthDict.values(), AuthDict.keys())
    print(list(sorted(b, reverse=True))[:3])
Ejemplo n.º 28
0
def analyze_graph(G):
    WCC = snap.GetMxWcc(G)
    SCC = snap.GetMxScc(G)

    id = SCC.GetRndNId()
    out_tree = snap.GetBfsTree(G, id, True, False)
    in_tree = snap.GetBfsTree(G, id, False, True)

    G_size = G.GetNodes()
    SCC_size = SCC.GetNodes()
    WCC_size = WCC.GetNodes()
    DISCONNECTED_size = G_size - WCC_size
    in_size = in_tree.GetNodes() - SCC_size
    out_size = out_tree.GetNodes() - SCC_size
    Tendril_size = G_size - SCC_size - DISCONNECTED_size - in_size - out_size

    print 'Total Graph Size: %d' % G_size
    print 'SCC Size: %d' % SCC_size
    print 'WCC Size: %d' % WCC_size
    print 'IN Size: %d' % in_size
    print 'OUT Size: %d' % out_size
    print 'DISCONNECTED Size: %d' % DISCONNECTED_size
    print 'Tendril tube size (remaining): %d' % Tendril_size
    print()
Ejemplo n.º 29
0
    def per_graph(graph, name):
        mxWcc = snap.GetMxWcc(graph)
        mxScc = snap.GetMxScc(graph)
        print ''
        print 'Size analysis on {}'.format(name)
        print 'Disconnected size = {}'.format(graph.GetNodes() - mxWcc.GetNodes())
        print 'SCC size = {}'.format(mxScc.GetNodes())
        
        trials = 200
        avg_reached_out = 0
        avg_reached_in = 0
        for _ in range(trials):
            nodeId = mxScc.GetRndNId()
            avg_reached_out += snap.GetBfsTree(graph, nodeId, True, False).GetNodes()
            avg_reached_in += snap.GetBfsTree(graph, nodeId, False, True).GetNodes()

        scc_out = float(avg_reached_out) / trials
        scc_in = float(avg_reached_in) / trials

        out_sz = scc_out - mxScc.GetNodes()
        in_sz = scc_in - mxScc.GetNodes()
        print 'OUT size = {}'.format(out_sz)
        print 'IN size = {}'.format(in_sz)
        print 'Tendrils/Tubes size = {}'.format(mxWcc.GetNodes() - mxScc.GetNodes() - out_sz - in_sz)
Ejemplo n.º 30
0
#Load the stack overflow grap
G1 = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt", 0, 1)

#1. Get the list of all weakly connected components
Components = snap.TCnComV()
snap.GetWccs(G1, Components)
wccCount = 0
for Cc in Components:
    wccCount = wccCount + 1

print "1. Number of Weakly Connected Components: ", wccCount

#2. Get The number of edges and the number
#   of nodes in the largest weakly connected component
maxWcc = snap.GetMxWcc(G1)
EdgeCount = 0
NodeCount = 0
for E in maxWcc.Edges():
    EdgeCount = EdgeCount + 1

for N in maxWcc.Nodes():
    NodeCount = NodeCount + 1

print "2. Number of edges and nodes in largest wcc"
print "EdgeCount : ", EdgeCount
print "NodeCount : ", NodeCount

#3 Get The top 3 most central nodes in the network by PagePank scores
PRankH = snap.TIntFltH()
snap.GetPageRank(G1, PRankH)