Python cdf Examples, powerlaw.cdf Python Examples

Example #1

0

Show file

File: drawa2.py Project: SeMorgana/thesisProject

def draw_cdf_JI_helper(ases_in_use, mode, color, marker): #mode is for title
    #plt.title(title)

    colors = ['b']
    lis_legend = []
    lis = []
    total_dots = 0
    #names = ("BGP","iBGP","eBGP")
    for count,as_ in enumerate(ases_in_use):
        print count,len(as_)
        #c2 = 0
        for lg1,lg2 in itertools.combinations(as_.get_attr(LGS),2):
            total_dots += 1
            #print c2
            #c2 += 1
            lg1.update_prefix_as_path()
            p1 = lg1.get_prefixes()
            lg2.update_prefix_as_path()
            p2 = lg2.get_prefixes()
            lis.append(JI(p1,p2))
            if JI(p1,p2) == 0:
                print lg1,lg2

            lg1.release()
            lg2.release()

    print "total dots",total_dots#658
    dist,prob = pl.cdf(lis)
    #plt.scatter(dist,prob,c='b',marker='x')
    return plt.scatter(dist,prob,c=color,marker=marker)

    '''

Example #2

0

Show file

def plot_aggregated_counts_distributions_ccdf():
    #agg_distributions = read_pickle(HOME+'output/aggregated_counts_distribution.obj')

    #for  i in agg_distributions.values():
    #    print len(i)

    colors = {'source_article': 'r', 'target_article': 'b'}
    labels = {
        'source_article': 'source article',
        'target_article': 'target article'
    }
    fig = plt.figure()
    ax = fig.add_subplot(111)

    #for category in ['source_article', 'target_article']:
    #    data = agg_distributions[category]
    #    data = [int(x[0]) for x in data]
    #    powerlaw.plot_ccdf(data, ax, label=labels[category],color=colors[category])

    category_distributions = read_pickle(
        HOME + 'output/category_counts_distribution.obj')
    data = category_distributions['counts']
    data = [int(x[0]) for x in data]
    #to consider the edges that have zero transitions we substract the number transitions from the number of edges in wikipeida
    number_of_edges = 339463340
    listofzeros = [0] * (number_of_edges - len(data))
    print len(data)
    print len(listofzeros)
    zeros = np.zeros((number_of_edges - len(data)))
    data = np.append(zeros, data)
    #data = data.extend(listofzeros)
    print data
    #hist, bin_edges = np.histogram(data, bins=100, normed=True)
    #ones = np.ones(100)
    #ccdf = ones - np.cumsum(data)

    #cdf = np.cumsum(hist)
    #print cdf
    #print ccdf
    bins, CDF = powerlaw.cdf(data, survival=True)
    plt.plot(bins, CDF)
    plt.xscale('symlog')

    #powerlaw.plot_cdf(data, ax, label='transitions', color='r')
    # further plotting
    #ax.set_xlabel(r'Number of transitions $n$')
    #ax.set_ylabel(r'$P(X \geq n)$')
    plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5})

    #leg = plt.gca().get_legend()
    #ltext  = leg.get_texts()  # all the text.Text instance in the legend
    #plt.setp(ltext, fontsize='small')    # the legend text fontsize
    plt.tight_layout()
    plt.savefig('output/agg_counts_distributions.pdf', bbox_inches='tight')

Example #3

0

Show file

File: click_distributions.py Project: linksuccess/linksuccess

def plot_aggregated_counts_distributions_ccdf():
    #agg_distributions = read_pickle(HOME+'output/aggregated_counts_distribution.obj')

    #for  i in agg_distributions.values():
    #    print len(i)

    colors= {'source_article':'r','target_article':'b'}
    labels = {'source_article': 'source article', 'target_article':'target article'}
    fig = plt.figure()
    ax = fig.add_subplot(111)

    #for category in ['source_article', 'target_article']:
    #    data = agg_distributions[category]
    #    data = [int(x[0]) for x in data]
    #    powerlaw.plot_ccdf(data, ax, label=labels[category],color=colors[category])

    category_distributions = read_pickle(HOME+'output/category_counts_distribution.obj')
    data = category_distributions['counts']
    data = [int(x[0]) for x in data]
    #to consider the edges that have zero transitions we substract the number transitions from the number of edges in wikipeida
    number_of_edges = 339463340
    listofzeros = [0] * (number_of_edges - len(data))
    print len(data)
    print len(listofzeros)
    zeros = np.zeros((number_of_edges - len(data)))
    data = np.append(zeros, data)
    #data = data.extend(listofzeros)
    print data
    #hist, bin_edges = np.histogram(data, bins=100, normed=True)
    #ones = np.ones(100)
    #ccdf = ones - np.cumsum(data)

    #cdf = np.cumsum(hist)
    #print cdf
    #print ccdf
    bins, CDF = powerlaw.cdf(data, survival=True)
    plt.plot(bins, CDF)
    plt.xscale('symlog')

    #powerlaw.plot_cdf(data, ax, label='transitions', color='r')
    # further plotting
    #ax.set_xlabel(r'Number of transitions $n$')
    #ax.set_ylabel(r'$P(X \geq n)$')
    plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size':5})

    #leg = plt.gca().get_legend()
    #ltext  = leg.get_texts()  # all the text.Text instance in the legend
    #plt.setp(ltext, fontsize='small')    # the legend text fontsize
    plt.tight_layout()
    plt.savefig('output/agg_counts_distributions.pdf', bbox_inches='tight')

Example #4

0

Show file

File: drawa2.py Project: SeMorgana/thesisProject

def draw_avg_geo_as_dist_helper(lg_pairs, d, color, mode):

    values = []
    for lg1,lg2 in lg_pairs:
        lg1.update_prefix_as_path()
        p1 = lg1.get_prefixes()
        lg2.update_prefix_as_path()
        p2 = lg2.get_prefixes()
        overlap = p1 & p2
        if len(overlap) == 0:#no overlapping prefixes
            continue

        if mode == GEODIST:
            for p in overlap:
                coord_p = d[p]
                coord_lg1 = getattr(lg1,LAT_LON)
                coord_lg2 = getattr(lg2,LAT_LON)
                if coord_lg1 == (0.0, 0.0) or coord_lg2 == (0.0, 0.0) or coord_p == (0.0 , 0.0):#if one of the location is unknown
                    continue
                dist1 = geopy.distance.great_circle(coord_lg1,coord_p).km
                dist2 = geopy.distance.great_circle(coord_lg2,coord_p).km
                avg_dist = (dist1+dist2) / 2.0
                #print lg1,lg2,p,avg_dist

                values.append(avg_dist)

        elif mode == ASDIST:
            for p in overlap:
                s1 = lg1.get_as_paths_set(p)
                s2 = lg2.get_as_paths_set(p)
                len1 = len(s1)
                len2 = len(s2)

                avg_asdist = (len1+len2) / 2.0
                #print lg1,lg2,p,avg_asdist

                values.append(avg_asdist)
        else:
            print "wrong!"
            return

        lg1.release()
        lg2.release()

    dist,prob = pl.cdf(values)
    return plt.scatter(dist,prob,c=color,marker='x')

Example #5

0

Show file

File: drawm.py Project: SeMorgana/thesisProject

def draw_cdf(ases, title, lower=-1, upper=100):
    '''
    args:
        ases:
            list of AS objects
        title:
            title of the figure
        lower:
            lower bound of num of LGs in the ASes to be selected
        upper:
            upper bound of num of LGs in the ASes to be selected
    '''

    ases_in_use = []
    for as_ in ases:
        if len(as_) >= lower and len(as_) <= upper:
            ases_in_use.append(as_)

    #plt.title(title)
    plt.xlabel("number of neighbors")
    plt.ylabel("cdf")

    colors = ['r','b','g']
    lis_legend = []
    names = ("BGP","iBGP","eBGP")
    lgs = []
    for as_ in ases_in_use:
        lgs += as_.get_attr(LGS)

    for i in range(len(names)):
        lis = []
        for lg in lgs:
            n = len(lg.get_attr(NEI_SET)[i])
            lis.append(float(n))
        dist,prob = pl.cdf(lis)

        lis_legend.append(plt.scatter(dist,prob,c=colors[i],marker='x'))

    plt.legend(tuple(lis_legend),names,scatterpoints=1,loc="lower right")
    plt.autoscale()
    plt.margins(0.03)
    #plt.title("Degree distribution based on BGP, eBGP, and iBGP neighbors")
    #plt.xlim(0,15000)
    #plt.ylim(0,1)
    plt.show()

Example #6

0

Show file

def analyze_pk(g, figfile):
    ks = [x[1] for x in g.degree()]
    results = powerlaw.Fit(ks, discrete=True)

    figPDF = results.power_law.plot_pdf(label=r"fit($\alpha$: %.2f, $x_{min}$: %d)" % (results.alpha, results.xmin)) 
    figPDF.set_ylabel("P(k)")
    figPDF.set_xlabel("k")

    cdf = {z[0]:z[1] for z in zip(*powerlaw.cdf(ks))}
    cdf_xmin = cdf[results.xmin]

    bin_edges, prob = powerlaw.pdf(ks)
    x = [ (bin_edges[i]+bin_edges[i+1])/2.0 for i in range(0, len(bin_edges)-1) ]
    plt.plot(x, prob/(1.0-cdf_xmin), label="data")
    plt.legend()
    plt.savefig(figfile)

    return (results.power_law.alpha, results.power_law.xmin)

Example #7

0

Show file

#Used powerlaw package: https://github.com/jeffalstott/powerlaw
import networkx as nx
import matplotlib.pyplot as plt
import powerlaw as pl

#The graph is read as a weighted edgelist
G = nx.Graph()
G = nx.read_weighted_edgelist('as-22july06.mtx')

#Each node in the dataset has a corresponding degree - deg references to each
#node and iterates through the degrees of the graph and sorts it
sorted_degree = sorted([deg for node, deg in G.degree()])

#cdf - cumulative distribution function is a function under the powerlaw function
pl.cdf(data=sorted_degree, survival=False)

# plot_cdf function plots the cdf - also under the powerlaw package
pl.plot_cdf(sorted_degree)

plt.show()