Beispiel #1
0
def solve_degree_based_questions(G, GName):

    #Number of nodes with degre seven

    CntV = snap.TIntPrV()

    snap.GetOutDegCnt(G, CntV)
    flag = 0
    for p in CntV:
        if p.GetVal1() == 7:
            flag = p.GetVal2()
            break

    print "Number of nodes with degree=7 in %s: %d" % (GName[:-10], flag)

    #To find the number of nodes with maximum degree and thier IDs
    MaxDegree = CntV[len(CntV) - 1].GetVal1()
    Nodes_with_max_deg = []

    for NI in G.Nodes():
        if NI.GetOutDeg() == MaxDegree:
            Nodes_with_max_deg.append(str(NI.GetId()))

    string_of_nodes_with_max_deg = ",".join(Nodes_with_max_deg)
    print "Node id (s) with highest degree in {0}: {1}".format(
        GName[:-10], string_of_nodes_with_max_deg)

    #Plots the Degree Distribution

    filename = "outDeg." + GName[:-10] + ".png"
    snap.PlotOutDegDistr(G, GName[:-10],
                         GName[:-10] + " - out-degree Distribution")
    print "Degree distribution of {0} is in: {1}".format(GName[:-10], filename)
Beispiel #2
0
def get_out_dists(G):
    deg_counts = []
    degs = []
    deg_vect = snap.TIntPrV()
    snap.GetOutDegCnt(G, deg_vect)
    for item in deg_vect:
        deg = item.GetVal1()
        cnt = item.GetVal2()
        deg_counts.append(cnt)
        degs.append(deg)

    out_deg = []
    out_counts = []
    cur_deg = min(degs)
    for deg, cnt in zip(degs, deg_counts):
        # while cur_deg < deg:
        #     out_deg.append(cur_deg)
        #     out_counts.append(0)
        #     cur_deg += 1
        out_deg.append(deg)
        out_counts.append(cnt)
        cur_deg += 1

    deg_counts = np.asarray(out_counts)
    degs = np.asarray(out_deg)
    pdf = deg_counts.astype(float) / sum(deg_counts)
    cdf = np.cumsum(pdf)
    cdf = np.insert(cdf, 0, 0)
    ccdf = 1 - cdf
    return deg_counts, degs, cdf, ccdf, pdf
Beispiel #3
0
def OutDeg(graph):
	outdir = 'temp/'

	tmp_arr = []
	out_arr = snap.TIntPrV()
	snap.GetOutDegCnt(graph, out_arr)
	for item in out_arr:
		cnt = item.GetVal2()
		deg = item.GetVal1()
		tmp_arr.append((deg, cnt))
	tmp_arr = np.array(tmp_arr)

	out_fname = os.path.join('temp', 'outdegdistr.png')

	plt.clf()
	plt.figure(1)
	plt.subplots_adjust(left=0.075, bottom=0.075, right=1., top=1., wspace=0., hspace=0.)
	plt.plot(tmp_arr[:, 0], tmp_arr[:, 1], '-x')
	plt.yscale('log')

	if tmp_arr[:, 0].max() > MAX_XTICKS_NUM:
		skip = int(tmp_arr[:, 0].max()) / MAX_XTICKS_NUM
		plt.xticks( np.arange(0, tmp_arr[:, 0].max() + 1 + skip, skip) )
	else:
		plt.xticks(np.arange(tmp_arr[:, 0].max() + 1))

	plt.xlim(0, tmp_arr[:, 0].max())
	plt.ylim(0, tmp_arr[:, 1].max())
	plt.xlabel('Out-degrees', fontsize=16)
	plt.ylabel('Number of nodes', fontsize=16)
	plt.grid(True)
	plt.savefig(out_fname, dpi=300, format='png')
def avgDegreeDist(family, direction, numSamples, apiGraph):
    path = 'data/graphs/' + family + '/'
    files = os.listdir(path)
    if apiGraph:
        graph_files = filter(lambda x: '.apigraph' in x, files)
    else:
        graph_files = filter(lambda x: '.edges' in x, files)
    random.shuffle(graph_files)
    maxdeg = 0
    if apiGraph:
        Gs = [snap.TNEANet.Load(snap.TFIn(path + f)) for f in graph_files[:numSamples]]
    else:
        Gs = [snap.LoadEdgeList(snap.PNEANet, path + f, 0, 1) for f in graph_files[:numSamples]]
    if direction == 'in':
        maxdeg = max([G.GetNI((snap.GetMxInDegNId(G))).GetInDeg() for G in Gs])
    else:
        maxdeg = max([G.GetNI((snap.GetMxOutDegNId(G))).GetOutDeg() for G in Gs])

    avg_deg_dist = np.zeros(maxdeg + 1)
    for G in Gs:
        DegToCntV = snap.TIntPrV()
        if direction == 'in':
            snap.GetInDegCnt(G, DegToCntV)
        else:
            snap.GetOutDegCnt(G, DegToCntV)

        for item in DegToCntV:
            deg = item.GetVal1()
            avg_deg_dist[deg] += item.GetVal2()
    avg_deg_dist = avg_deg_dist / numSamples
    return avg_deg_dist
Beispiel #5
0
def plotDegreeDistribution(G):
    #
    # Get Degree Distribution
    #
    OutDegToCntV = snap.TIntPrV()
    snap.GetOutDegCnt(G, OutDegToCntV)
    count = 0
    nodeList = []
    degreeList = []
    for item in OutDegToCntV:
        (n, d) = (item.GetVal2(), item.GetVal1())
        nodeList.append(n)
        degreeList.append(d)
    x = np.array([
        np.log10(item.GetVal1()) for itemm in OutDegToCntV
        if item.GetVal1() > 0
    ])
    y = np.array([
        np.log10(item.GetVal2()) for item in OutDegToCntV if item.GetVal2() > 0
    ])
    #
    # Plot Degree Distribution
    #
    plt.figure(figsize=(15, 15))
    loglog(degreeList, nodeList, 'bo')
    #plt.plot(x_plot, 10**b*x_plot**a, 'r-')
    plt.title("LogLog plot of out-degree distribution")
    plt.show()
    return
 def f():
     snap = self.snap
     DegToCntV = snap.TFltPr64V()
     snap.GetOutDegCnt(self.graph, DegToCntV)
     ret = []
     for item in DegToCntV:
         ret.append((item.GetVal1(), item.GetVal2()))
     return ret
def get_out_degree_distribution(Graph):
    snap.GetOutDegCnt(Graph, DegToCntV)
    num_node = Graph.GetNodes()
    XO, YO = [], []
    for item in DegToCntV:
        if item.GetVal1() == 0 or item.GetVal2() == 0:
            continue
        XO.append(item.GetVal1())
        YO.append(item.GetVal2() * 1.0 / num_node)
    return XO, YO
Beispiel #8
0
def getDataPointsToPlot(Graph, degType):
    """
    return values:
    X: list of degrees
    Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
    """
    ############################################################################
    DegToCntV = snap.TIntPrV()

    if degType == "In":
        snap.GetInDegCnt(Graph, DegToCntV)
    elif degType == "Out":
        snap.GetOutDegCnt(Graph, DegToCntV)
    elif degType == "Total":
        snap.GetDegCnt(Graph, DegToCntV)
    else:
        raise ValueError("Invalid degree type: please use 'In', 'Out' or 'Total'.")

    NumNodes = Graph.GetNodes()
    DegToFrqV = { item.GetVal1() : float(item.GetVal2())/NumNodes for item in DegToCntV }
    DegToFrqV = sorted(DegToFrqV.items())
    X, Y = zip(*DegToFrqV)
    ############################################################################
    return X, Y

    def plot_graph(name):
        G = load_graph(name)
        print "{} graph nodes: {}".format(name, G.GetNodes()) 
        print "{} graph edges: {}".format(name, G.GetEdges()) 

        x_in, y_in = getDataPointsToPlot(G, 'In')
        plt.loglog(x_in, y_in, marker=',', color = 'y', label = 'In Degree')

        x_out, yout = getDataPointsToPlot(G, 'Out')
        plt.loglog(x_out, y_out, marker=',', color = 'r', label = 'Out Degree')

        x_total, y_total = getDataPointsToPlot(G, 'Total')
        plt.loglog(x_total, y_total, marker=',', color = 'b', label = 'Total Degree')  #linestyle = 'dotted'

        plt.xlabel('Node Degree (log)')
        plt.ylabel('Proportion of Nodes with a Given Degree (log)')
        plt.title('Degree Distribution of In, Out, and Total degree for {} network'.format(name))
        plt.legend()
        plt.show()


    if __name__ == "__main__":
        # Plot distribution graphs for RT, MT, RE, Social networks
        plot_graph("retweet")
        plot_graph("mention")
        plot_graph("reply")
        plot_graph("social")
Beispiel #9
0
def Get_Out_Degree_Distribution(G):
    Deg_dist = snap.TIntPrV()
    snap.GetOutDegCnt(G, Deg_dist)

    degree = np.empty((1, 0))
    count = np.empty((1, 0))

    for node_degree_pr in Deg_dist:

        if node_degree_pr.GetVal1() > 0:

            degree = np.append(degree, node_degree_pr.GetVal1())

            count = np.append(count, node_degree_pr.GetVal2())
    '''
Beispiel #10
0
def plot_degree_distribution(G, name):
    filename = '../analysis/' + name + '_DegDistr'
    description = name + ': Degree Distribution'
    X, Y = [], []
    DegToCntV = snap.TIntPrV()
    snap.GetOutDegCnt(G, DegToCntV)
    for item in DegToCntV:
        X.append(item.GetVal1())
        Y.append(item.GetVal2())
    plt.xlabel('Node Degree')
    plt.ylabel('Number of Nodes with a Given Degree')
    plt.title(description)
    plt.plot(X, Y, 'ro')
    plt.savefig(filename)
    plt.show()
Beispiel #11
0
def getInDegDistr(G, outdeg):
    degHistogram = snap.TIntPrV()
    if outdeg:
        snap.GetOutDegCnt(G, degHistogram)
    else:
        snap.GetInDegCnt(G, degHistogram)
    degDistr = [(pair.GetVal1(), pair.GetVal2()) for pair in degHistogram]
    degDistr = sorted(degDistr, key=lambda pair: pair[0], reverse=False)
    degrees = []
    counts = []
    for pair in degDistr:
        #first = degree
        degrees.append(pair[0])
        #second = #nodes of degree - normalize by total nodes to get proportion of nodes
        counts.append(1.0 * pair[1] / G.GetNodes())
    return (degrees, counts)
Beispiel #12
0
def get_deg_dist(g):
    # extract vertices degree distribution of graph (g)
    CntV = snap.TIntPrV()
    snap.GetOutDegCnt(g, CntV)

    deg_dist = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV],
                            columns=["deg", "cnt"])
    deg_dist['type'] = 'out_deg'

    CntV = snap.TIntPrV()
    snap.GetInDegCnt(g, CntV)

    deg_dist2 = pd.DataFrame([(p.GetVal1(), p.GetVal2()) for p in CntV],
                             columns=["deg", "cnt"])
    deg_dist2['type'] = 'in_deg'

    all_deg = pd.concat((deg_dist, deg_dist2))

    return all_deg
Beispiel #13
0
def getDataPointsToPlot(Graph):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return values:
    X: list of degrees
    Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
    """
    ############################################################################
    # TODO: Your code here!
    X, Y = [], []
    CntV = snap.TIntPrV()
    snap.GetOutDegCnt(Graph, CntV)
    for p in CntV:
        X.append(p.GetVal1())
        Y.append(p.GetVal2())
        # print("degree %d: count %d" % (p.GetVal1(), p.GetVal2()))
    ############################################################################
    return X, Y
Beispiel #14
0
    def getDataPointsToPlot(self, Graph):
        """
		:param - Graph: snap.PUNGraph object representing an undirected graph
		
		return values:
		X: list of degrees
		Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
		"""
        ############################################################################
        # TODO: Your code here!
        X, Y = [], []

        degree_vec = snap.TIntPrV()  #degree vector
        snap.GetOutDegCnt(Graph, degree_vec)

        X = [item.GetVal1() for item in degree_vec]
        counts = [item.GetVal2() for item in degree_vec]

        degree_tot = sum(counts)
        Y = [item.GetVal2() / (1.0 * degree_tot) for item in degree_vec]

        ############################################################################
        return X, Y
Beispiel #15
0
# 1.5
print("The number of reciprocated edges is %s." % (
    snap.CntUniqDirEdges(wikiGraph) - snap.CntUniqUndirEdges(wikiGraph)))

# 1.6
print("The number of nodes of zero out-degree is %s." % (
    snap.CntOutDegNodes(wikiGraph, 0)))

# 1.7
print("The number of nodes of zero in-degree is %s." % (
    snap.CntInDegNodes(wikiGraph, 0)))

# 1.8
outDegreeToCount = snap.TIntPrV()
snap.GetOutDegCnt(wikiGraph, outDegreeToCount)
numNodesLargeOutDegree = sum([item.GetVal2()
                              for item in outDegreeToCount
                              if item.GetVal1() > DEGREE_BOUNDARY])
print("The number of nodes with more than %s outgoing edges is %s." % (
    DEGREE_BOUNDARY, numNodesLargeOutDegree))

# 1.9
inDegreeCount = snap.TIntPrV()
snap.GetInDegCnt(wikiGraph, inDegreeCount)
numNodesSmallInDegree = sum([item.GetVal2()
                             for item in inDegreeCount
                             if item.GetVal1() < DEGREE_BOUNDARY])
print("The number of nodes with less than %s incoming edges is %s." % (
    DEGREE_BOUNDARY, numNodesSmallInDegree))
def basic_analysis():


	FIn = snap.TFIn("../graphs/ph_simple.graph")
	G = snap.TUNGraph.Load(FIn)

	numNodes = G.GetNodes()
	print "num nodes: ", numNodes
	numEdges = G.GetEdges()
	print "num edges: ", numEdges

	# clustering coefficient
	print "\nclustering coefficient"

	print "Clustering G: ", snap.GetClustCf(G)

	ER = snap.GenRndGnm(snap.PUNGraph, numNodes, numEdges)

	print "Clustering ER: ", snap.GetClustCf(ER)

	# degree distribution histogram

	print "\ndegree distribution histogram"

	x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(ER)
	plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'g', label = 'Erdos Renyi Network')

	x_smallWorld, y_smallWorld = getDataPointsToPlot(G)
	plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'b', label = 'PH Agency Network')

	plt.xlabel('Node Degree (log)')
	plt.ylabel('Proportion of Nodes with a Given Degree (log)')
	plt.title('Degree Distribution of Erdos Renyi and PH Agency Network')
	plt.legend()
	plt.show()

	# degree
	print "\ndegree distribution"

	deg_sum = 0.0

	CntV = snap.TIntPrV()
	snap.GetOutDegCnt(G, CntV)
	for p in CntV:
		deg_sum += p.GetVal1() * p.GetVal2()

	max_node = G.GetNI(snap.GetMxDegNId(G))

	deg_sum /= float(numNodes)

	print "average degree: ", deg_sum # same for G and ER

	print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId()

	deg_sum = 0.0
	
	max_node = ER.GetNI(snap.GetMxDegNId(ER))

	print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId()

	# diameter
	print "\ndiameter"

	diam = snap.GetBfsFullDiam(G, 10)

	print "Diameter: ", diam

	print "ER Diameter: ", snap.GetBfsFullDiam(ER, 10)

	# triads
	print "\ntriads"

	print "Triads: ", snap.GetTriads(G)

	print "ER Triads: ", snap.GetTriads(ER)

	# centrality
	print "\ncentrality"

	max_dc = 0.0
	maxId = -1

	all_centr = []

	for NI in G.Nodes():
		DegCentr = snap.GetDegreeCentr(G, NI.GetId())
		all_centr.append(DegCentr)
		if DegCentr > max_dc:
			max_dc = DegCentr
			maxId = NI.GetId() 
	
	print "max"
	print "node: %d centrality: %f" % (maxId, max_dc)
	print "average centrality: ", np.mean(all_centr)

	print "ER"
	max_dc = 0.0
	maxId = -1

	all_centr = []

	for NI in ER.Nodes():
		DegCentr = snap.GetDegreeCentr(ER, NI.GetId())
		all_centr.append(DegCentr)
		if DegCentr > max_dc:
			max_dc = DegCentr
			maxId = NI.GetId() 
			
	print "max"
	print "node: %d centrality: %f" % (maxId, max_dc)
	print "average centrality: ", np.mean(all_centr)
Beispiel #17
0
def degreeDistribution(graph):
    numNodes = float(graph.GetNodes())

    # in degree dist
    DegToCntV = snap.TIntPrV()
    snap.GetInDegCnt(graph, DegToCntV)
    xIn = []
    yIn = []
    for item in DegToCntV:
        xIn.append(item.GetVal1())
        yIn.append(item.GetVal2() / numNodes)
    print 'max in degree:', max(xIn)
    print 'min in degree:', min(xIn)

    # out degree dist
    DegToCntV = snap.TIntPrV()
    snap.GetOutDegCnt(graph, DegToCntV)
    xOut = []
    yOut = []
    for item in DegToCntV:
        xOut.append(item.GetVal1())
        yOut.append(item.GetVal2() / numNodes)
    print 'max out degree:', max(xOut)
    print 'min out degree:', min(xOut)

    # degree dist
    DegToCntV = snap.TIntPrV()
    snap.GetDegCnt(graph, DegToCntV)
    x = []
    y = []
    x1 = []  # after pruning outliers
    y1 = []  # after pruning outliers
    outLimit = 10**2.5  # 2.5 for prelim
    for item in DegToCntV:
        x.append(item.GetVal1())
        y.append(item.GetVal2() / numNodes)
        if item.GetVal1() < outLimit:
            x1.append(item.GetVal1())
            y1.append(item.GetVal2() / numNodes)

    xMin = min(x) - 0.5
    print 'max total degree:', max(x)
    print 'min total degree:', xMin

    # test
    # DegToCntV2 = snap.TIntPrV()
    # snap.GetDegCnt(graph2, DegToCntV2)
    # xG = []
    # yG = []
    # for item in DegToCntV2:
    #     xG.append(item.GetVal1())
    #     yG.append(item.GetVal2() / float(graph2.GetNodes()))
    # print xG
    # print yG
    # exit(1)

    # lse
    x1 = [math.log10(float(i)) for i in x1]
    y1 = [math.log10(float(i)) for i in y1]
    fit = np.polyfit(x1, y1, deg=1)
    print 'a: ' + str(fit[0]) + ', b: ' + str(fit[1])
    x1 = np.linspace(1, 10**4, len(x))
    y1 = [i**fit[0] * 10**fit[1] for i in x1]

    #
    # print len(x)
    # print np.dot(x, y)
    # print graph.GetNodes()
    # exit(1)
    m = graph.GetNodes()

    # todo try dict of x, y

    # mlle
    # for each x, sum over it y times where y is the num of occurrences (not proportion)
    alphaMLLE = 1 + (graph.GetNodes() /
                     (sum([np.log(i / xMin) * y[x.index(i)] * m for i in x])))
    print alphaMLLE

    x2 = np.linspace(1, 10**4, len(x))
    y2 = [((alphaMLLE - 1) / xMin) * ((i / xMin)**(-1 * alphaMLLE))
          for i in x2]

    dSum = 0
    numSamples = m
    for key in x:
        dSum += np.log(key) * y[x.index(key)] * m
    mlle = 1 + numSamples / float(dSum)
    print mlle

    # theoretical power pdf
    yPdf = [1 / float(i**2) for i in x2]

    # plot
    # plt.loglog(xIn, yIn, color='black', ls='None', marker='.', label='in degree')
    # plt.loglog(xOut, yOut, color='red', ls='None', marker='.', label='out degree')
    plt.loglog(x,
               y,
               color='blue',
               ls='None',
               marker='.',
               label='Degree Distribution')
    plt.loglog(x1,
               y1,
               color='red',
               ls='solid',
               marker='None',
               label='Least Squares Estimate')
    plt.loglog(x2,
               y2,
               color='green',
               ls='solid',
               marker='None',
               label='Max Log-Likelihood Estimate')
    # plt.loglog(xG, yG, color='black', ls='None', marker='.', label='generated power dist')
    # plt.loglog(x2, yPdf, color='black', ls='solid', marker='None', label='theoretical power law pdf')
    plt.xlabel('Node Degree')
    plt.ylabel('Proportion of Nodes')
    plt.title('Degree Distribution of BTCtalk and BTC subreddit')
    plt.legend()
    plt.show()
    return
Beispiel #18
0
        pass
    try:
        G.AddNode(node2)
    except:
        pass
    G.AddEdge(node1, node2)

fd_in.close()

# Output Sentences
print("Number of nodes: {}".format(G.GetNodes()))
print("Number of edges: {}".format(G.GetEdges()))

# [2] Degree of nodes in the network
DegToCnt = snap.TIntPrV()
snap.GetOutDegCnt(G, DegToCnt)
degree_count = {}
for item in DegToCnt:
    degree_count[item.GetVal1()] = item.GetVal2()

OutDeg = snap.TIntPrV()
snap.GetNodeOutDegV(G, OutDeg)
node_deg = {}
for item in OutDeg:
    node_deg[item.GetVal1()] = item.GetVal2()

max_deg_nodes = [k for k, v in node_deg.items() if v == max(node_deg.values())]

# Output sentences
print("Number of nodes with degree=7: {}".format(snap.CntOutDegNodes(G, 7)))
print("Node id(s) with highest degree: ", end=" ")
def get_graph_overview(G, Gd=None):
    '''
	G here is an undirected graph
	'''

    # degree distribution
    CntV = snap.TIntPrV()
    snap.GetOutDegCnt(G, CntV)
    deg_x, deg_y = [], []
    max_deg = 0
    for item in CntV:
        max_deg = max(max_deg, item.GetVal1())
        deg_x.append(item.GetVal1())
        deg_y.append(item.GetVal2())
        # print item.GetVal1(), item.GetVal2()
    print 'max_deg = ', max_deg
    deg_cnt = np.zeros(max_deg + 1)
    for item in CntV:
        deg_cnt[item.GetVal1()] = item.GetVal2()
    print deg_cnt
    # plt.loglog(deg_x, deg_y)
    # plt.xlabel('Degree of nodes')
    # plt.ylabel('Number of nodes')
    # plt.savefig('Giu_deg_dist.png')
    # plt.clf()

    # clustering coefficient distribution
    cf = snap.GetClustCf(G)
    print 'average cf =', cf
    NIdCCfH = snap.TIntFltH()
    snap.GetNodeClustCf(G, NIdCCfH)
    ccf_sum = np.zeros(max_deg + 1)
    for item in NIdCCfH:
        ccf_sum[G.GetNI(item).GetDeg()] += NIdCCfH[item]
        # print item, NIdCCfH[item]
    ccf_x, ccf_y = [], []
    for i in range(max_deg + 1):
        if deg_cnt[i] != 0:
            ccf_sum[i] /= deg_cnt[i]
            ccf_x.append(i)
            ccf_y.append(ccf_sum[i])
    print ccf_y
    # plt.loglog(ccf_x, ccf_y)
    # plt.xlabel('Degree of nodes')
    # plt.ylabel('Average clustering coefficient of nodes with the degree')
    # plt.savefig('Giu_ccf_dist.png')
    # plt.clf()
    # snap.PlotClustCf(G, 'investor_network', 'Distribution of clustering coefficients')

    # diameter and shortest path distribution
    diam = snap.GetBfsFullDiam(G, 100)
    print diam
    # snap.PlotShortPathDistr(G, 'investor_network', 'Distribution of shortest path length')
    # rewired_diams = []
    # for i in range(100):
    # 	print 'rewire: ', i
    # 	G_config = rewire_undirected_graph(G)
    # 	rewired_diams.append(snap.GetBfsFullDiam(G_config, 400))
    # print rewired_diams
    # print 'null model diam mean: ', np.mean(rewired_diams)
    # print 'null model diam std: ', np.std(rewired_diams)

    # wcc and scc size distribution
    WccSzCnt = snap.TIntPrV()
    snap.GetWccSzCnt(G, WccSzCnt)
    print 'Distribution of wcc:'
    for item in WccSzCnt:
        print item.GetVal1(), item.GetVal2()

    if Gd != None:
        print 'Distribution of scc:'
        ComponentDist = snap.TIntPrV()
        snap.GetSccSzCnt(Gd, ComponentDist)
        for item in ComponentDist:
            print item.GetVal1(), item.GetVal2()
snap.PlotInDegDistr(G1, "Indeg", "Directed graph - in-degree")
snap.PlotOutDegDistr(G1, "Outdeg", "Directed graph - out-degree")

# vector of pairs of integers (size, count)
ComponentDist = snap.TIntPrV()
# get distribution of connected components (component size, count)
snap.GetWccSzCnt(G1, ComponentDist)
for comp in ComponentDist:
    print "Size: %d - Number of Components: %d" % (comp.GetVal1(),
                                                   comp.GetVal2())
Count = snap.CntUniqDirEdges(G1)
print "Directed Graph: Count of unique directed edges is %d" % Count

# get degree distribution pairs (degree, count)
snap.GetOutDegCnt(G1, ComponentDist)
print "Degree Distribution Pairs-"
xval = []
yval = []
for item in ComponentDist:
    print "%d nodes with out-degree %d" % (item.GetVal2(), item.GetVal1())
    xval.append(item.GetVal1())
    yval.append(item.GetVal2())
bins = np.arange(len(yval))
plt.hist(yval, xval, alpha=0.5, label='Nodes with Out degree')
plt.title('Distribution of Out degree by Nodes')
plt.xlabel('Out degree')
plt.ylabel('Number of Nodes')
plt.xticks(bins, rotation=90)
plt.show()
Beispiel #21
0
# delete nodes of out degree 3 and in degree 2
snap.DelDegKNodes(G8, 3, 2)

# create a directed random graph on 10k nodes and 1k edges
G9 = snap.GenRndGnm(snap.PNGraph, 10000, 1000)
print "G9: Nodes %d, Edges %d" % (G9.GetNodes(), G9.GetEdges())

# define a vector of pairs of integers (size, count) and
# get a distribution of connected components (component size, count)
CntV = snap.TIntPrV()
snap.GetWccSzCnt(G9, CntV)
for p in CntV:
    print "size %d: count %d" % (p.GetVal1(), p.GetVal2())

# get degree distribution pairs (out-degree, count):
snap.GetOutDegCnt(G9, CntV)
for p in CntV:
    print "degree %d: count %d" % (p.GetVal1(), p.GetVal2())

# generate a Preferential Attachment graph on 100 nodes and out-degree of 3
G10 = snap.GenPrefAttach(100, 3)
print "G10: Nodes %d, Edges %d" % (G10.GetNodes(), G10.GetEdges())

# define a vector of floats and get first eigenvector of graph adjacency matrix
EigV = snap.TFltV()
snap.GetEigVec(G10, EigV)
nr = 0
for f in EigV:
    nr += 1
    print "%d: %.6f" % (nr, f)
Beispiel #22
0
import snap
from math import floor
from itertools import islice, cycle
#Problem 1
g = snap.LoadEdgeList(snap.PNGraph, "p2p-Gnutella08.txt", 0, 1)
#1.a-e
info_filename = "gnutella_info.txt"
snap.PrintInfo(g, 'Gnutella P2P network 2008', info_filename, False)
with open(info_filename, 'r') as inf:
    for line in inf:
        print(line)
#Below addresses 1.f,g
g_outdeg = snap.TFltPr64V()
g_indeg = snap.TFltPr64V()
snap.GetOutDegCnt(g, g_outdeg)
snap.GetInDegCnt(g, g_indeg)
#g_outdeg is a vector of pairs of floats. Each pair is addressed like (Val1,Val2)
outdeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_outdeg))
indeg_gt_10 = list(filter(lambda x: x.GetVal2() > 10, g_indeg))
print(f'Nodes with outdegree > 10: {len(outdeg_gt_10)}')
print(f'Nodes with indegree > 10: {len(indeg_gt_10)}')
#Problem 2
so = snap.LoadEdgeList(snap.PNGraph, "stackoverflow-Java.txt")
#2.1
so_wcc = snap.TCnComV()
snap.GetWccs(so, so_wcc)
print(f'# of connected components: {len(so_wcc)}')
#2.2
so_mx_wcc = snap.GetMxWcc(so)
snap.PrintInfo(so_mx_wcc, "Largest connected component of StackOverflow-Java")
#2.3
Beispiel #23
0
import snap
import numpy as np
import matplotlib.pyplot as plt

# P2 of HW1

G1 = snap.LoadEdgeList(snap.PNGraph, "wiki-Vote.txt", 0, 1)
CntV = snap.TIntPrV()
snap.GetOutDegCnt(G1, CntV)
degs = {}
for p in CntV:
    deg = p.GetVal1()
    degs[deg] = p.GetVal2()
ps = sorted(degs.items())
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter([k for (k, v) in ps], [v for (k, v) in ps])
ax.set_xscale('log')
ax.set_yscale('log')
plt.xlim((1e-1, 1e3))
plt.ylim((1e-1, 1e4))
plt.title("Distribution of out-degree of nodes in the network")
fig.savefig("p2.png")
snap.DrawGViz(u_rndm_graph, snap.gvlNeato, "graph_rdm_undirected.png", "Undirected Random Graph", True)

# Plot the out degree distrib
snap.PlotOutDegDistr(u_rndm_graph, "graph_rdm_undirected", "Undirected graph - out-degree Distribution")


# Compute and print the list of all edges
for vertex_in in u_rndm_graph.Nodes():
    for vertex_out_id in vertex_in.GetOutEdges():
        print "edge (%d %d)" % (vertex_in.GetId(), vertex_out_id)
# Save it to an external file
snap.SaveEdgeList(u_rndm_graph, "Rndm_graph.txt", "Save as tab-separated list of edges")

# Compute degree distribution and save it to an external textfile
degree_vertex_count = snap.TIntPrV()
s.GetOutDegCnt(u_rndm_graph, degree_vertex_count)
file = open("graph_rdm_undirected_degree_distrib.txt", "w")
file.write("#----------------------------------\n")
file.write("#       Degree Distribution        \n")
file.write("#----------------------------------\n")
file.write("\n")
for pairs in degree_vertex_count:
     file.write("vertex degree %d: nmbr vertices with such degree %d \n" % (pairs.GetVal1(), pairs.GetVal2()))
file.close()


# Compute the sizes of the connected component and save it to an external file
Components = snap.TCnComV()
snap.GetSccs(u_rndm_graph, Components)
file_2 = open("graph_rdm_undirected_connected_compo_sizes.txt", "w")
file_2.write("#----------------------------------\n")
Beispiel #25
0
        print "size %d, number of components %d" % (comp.GetVal1(),
                                                    comp.GetVal2())

    MxWcc = snap.GetMxWcc(G)
    print "\nmax wcc nodes %d, edges %d" % (MxWcc.GetNodes(), MxWcc.GetEdges())

    InDegCntV = snap.TIntPrV()
    snap.GetInDegCnt(G, InDegCntV)

    print "\n# of different in-degrees", InDegCntV.Len()
    for item in InDegCntV:
        print "in-degree %d, number of nodes %d" % (item.GetVal1(),
                                                    item.GetVal2())

    OutDegCntV = snap.TIntPrV()
    snap.GetOutDegCnt(G, OutDegCntV)

    print "\n# of different out-degrees", OutDegCntV.Len()
    for item in OutDegCntV:
        print "out-degree %d, number of nodes %d" % (item.GetVal1(),
                                                     item.GetVal2())

    PRankH = snap.TIntFltH()
    snap.GetPageRank(G, PRankH)
    #for item in PRankH:
    #    print item, PRankH[item]

    slist = sorted(PRankH, key=lambda key: PRankH[key], reverse=True)
    print "\ntop 10 experts by PageRank"
    for item in slist[:10]:
        print "id %7s, pagerank %.6f" % (item, PRankH[item])
Beispiel #26
0
def outdegSNAP( graph ):
    DegToCntV = snap.TIntPrV()
    snap.GetOutDegCnt(graph, DegToCntV)
    deg = [ dg.GetVal1() for dg in DegToCntV ]
    cnt = [ dg.GetVal2() for dg in DegToCntV ]
    return [deg, cnt]
Beispiel #27
0
def intro():

    # create a graph PNGraph
    G1 = snap.TNGraph.New()
    G1.AddNode(1)
    G1.AddNode(5)
    G1.AddNode(32)
    G1.AddEdge(1, 5)
    G1.AddEdge(5, 1)
    G1.AddEdge(5, 32)
    print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges()))

    # create a directed random graph on 100 nodes and 1k edges
    G2 = snap.GenRndGnm(snap.PNGraph, 100, 1000)
    print("G2: Nodes %d, Edges %d" % (G2.GetNodes(), G2.GetEdges()))

    # traverse the nodes
    for NI in G2.Nodes():
        print("node id %d with out-degree %d and in-degree %d" %
              (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()))
    # traverse the edges
    for EI in G2.Edges():
        print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId()))

    # traverse the edges by nodes
    for NI in G2.Nodes():
        for Id in NI.GetOutEdges():
            print("edge (%d %d)" % (NI.GetId(), Id))

    # generate a network using Forest Fire model
    G3 = snap.GenForestFire(1000, 0.35, 0.35)
    print("G3: Nodes %d, Edges %d" % (G3.GetNodes(), G3.GetEdges()))

    # save and load binary
    FOut = snap.TFOut("test.graph")
    G3.Save(FOut)
    FOut.Flush()
    FIn = snap.TFIn("test.graph")
    G4 = snap.TNGraph.Load(FIn)
    print("G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges()))

    # save and load from a text file
    snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges")
    G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1)
    print("G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges()))

    # generate a network using Forest Fire model
    G6 = snap.GenForestFire(1000, 0.35, 0.35)
    print("G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges()))
    # convert to undirected graph
    G7 = snap.ConvertGraph(snap.PUNGraph, G6)
    print("G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges()))
    # get largest weakly connected component of G
    WccG = snap.GetMxWcc(G6)
    # get a subgraph induced on nodes {0,1,2,3,4,5}
    SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4))
    # get 3-core of G
    Core3 = snap.GetKCore(G6, 3)
    # delete nodes of out degree 10 and in degree 5
    snap.DelDegKNodes(G6, 10, 5)
    print("G6a: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges()))

    # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3
    G8 = snap.GenPrefAttach(1000, 3)
    print("G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges()))
    # vector of pairs of integers (size, count)
    CntV = snap.TIntPrV()
    # get distribution of connected components (component size, count)
    snap.GetWccSzCnt(G8, CntV)
    # get degree distribution pairs (degree, count)
    snap.GetOutDegCnt(G8, CntV)
    # vector of floats
    EigV = snap.TFltV()
    # get first eigenvector of graph adjacency matrix
    snap.GetEigVec(G8, EigV)
    # get diameter of G8
    snap.GetBfsFullDiam(G8, 100)
    # count the number of triads in G8, get the clustering coefficient of G8
    snap.GetTriads(G8)
    snap.GetClustCf(G8)
Beispiel #28
0

DATA_PATH = './Wiki-Vote.txt'


if __name__ == '__main__':

    # Build Wiki Graph
    G1 = snap.LoadEdgeList(snap.PNGraph, DATA_PATH, 0, 1)

    # use Snap.py own plot tools, but not shown.
    snap.PlotOutDegDistr(G1, 'Wiki', 'Wiki')

    # So I draw everything by my own.
    DegToCntV = snap.TIntPrV()
    snap.GetOutDegCnt(G1, DegToCntV)

    out_deg = []
    deg_cnt = []

    for item in DegToCntV:
        deg_cnt.append(item.GetVal2())
        out_deg.append(item.GetVal1())

    out_deg_dis = pd.DataFrame({'Out_Degree_Value': out_deg, "Out_Degree_Cnt": deg_cnt})
    out_deg_dis.drop(index=0, inplace=True)

    # print(out_deg_dis.head(10))
    # print(out_deg_dis.shape)

    # As polyfit and poly1d does not work, I try to use liear reression to get the coefficient and intercept
Beispiel #29
0
# convert to undirected graph
G7 = snap.ConvertGraph(snap.PUNGraph, G6)
WccG = snap.GetMxWcc(G6)
# get a subgraph induced on nodes {0,1,2,3,4,5}
SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4))
# get 3-core of G
Core3 = snap.GetKCore(G6, 3)
# delete nodes of out degree 10 and in degree 5
snap.DelDegKNodes(G6, 10, 5)

# %%
# stats
# generate a Preferential Attachment graph on 1000 nodes and node out degree of 3
G8 = snap.GenPrefAttach(1000, 3)
# vector of pairs of integers (size, count)
CntV = snap.TIntPrV()
# get distribution of connected components (component size, count)
snap.GetWccSzCnt(G8, CntV)
# get degree distribution pairs (degree, count)
snap.GetOutDegCnt(G8, CntV)
# vector of floats
EigV = snap.TFltV()
# get first eigenvector of graph adjacency matrix
snap.GetEigVec(G8, EigV)
# get diameter of G8
snap.GetBfsFullDiam(G8, 100)
# count the number of triads in G8, get the clustering coefficient of G8
snap.GetTriads(G8)
snap.GetClustCf(G8)
# %%
Beispiel #30
0
snap.GetInDegCnt(graph, DegToCntV)
for item in DegToCntV:
    Y.append(item.GetVal2())
    X.append(item.GetVal1())

# Need proportion
total = float(sum(Y))
Y = [y / total for y in Y]

# Now plot it
plt.loglog(X, Y, color = 'r', label = 'GitHub User-PR Network - In Degree')

# Out
X, Y = [], []
DegToCntV = snap.TIntPrV()
snap.GetOutDegCnt(graph, DegToCntV)
for item in DegToCntV:
    Y.append(item.GetVal2())
    X.append(item.GetVal1())

# Need proportion
total = float(sum(Y))
Y = [y / total for y in Y]

# Now plot it
plt.loglog(X, Y, color = 'y', label = 'GitHub User-PR Network - Out Degree')

# All plotting
plt.xlabel('Node Degree (log)')
plt.ylabel('Proportion of Nodes with a Given Degree (log)')
plt.title('Degree Distribution of GitHub User-PR Network')