Example #1
0
def test_scale_free():
    import random
    import data
    import graph_representation
    import plfit
    import numpy
    corpus_path = '../data/air/problem_descriptions_text'
    (documents, labels) = data.read_files(corpus_path)
    g = graph_representation.construct_cooccurrence_network(documents[0],context='sentence')
    degree_sequence=sorted(nx.degree(g).values(),reverse=True) # degree sequence
    dmax=max(degree_sequence)

    degree_sequence = numpy.array(degree_sequence)
    print degree_sequence
    pl = plfit.plfit(degree_sequence)
    p,ksv = pl.test_pl()
    print
    print
    print
    print

    seq = [random.randrange(0,100) for i in range(len(degree_sequence))]
    degree_sequence = numpy.array(seq)
    print degree_sequence
    pl = plfit.plfit(degree_sequence)
    p,ksv = pl.test_pl()
    print
    print
    print
    print
Example #2
0
def test_scale_free():
    import random
    import data
    import graph_representation
    import plfit
    import numpy
    corpus_path = '../data/air/problem_descriptions_text'
    (documents, labels) = data.read_files(corpus_path)
    g = graph_representation.construct_cooccurrence_network(documents[0],
                                                            context='sentence')
    degree_sequence = sorted(nx.degree(g).values(),
                             reverse=True)  # degree sequence
    dmax = max(degree_sequence)

    degree_sequence = numpy.array(degree_sequence)
    print degree_sequence
    pl = plfit.plfit(degree_sequence)
    p, ksv = pl.test_pl()
    print
    print
    print
    print

    seq = [random.randrange(0, 100) for i in range(len(degree_sequence))]
    degree_sequence = numpy.array(seq)
    print degree_sequence
    pl = plfit.plfit(degree_sequence)
    p, ksv = pl.test_pl()
    print
    print
    print
    print
Example #3
0
def gamma(network_input, output_name=None):
    #######################
    ##### Variant 1   #####
    #######################
    distribution = network_distribution(network_input)
    ##    if USE_LOG_BINNING:
    ##        # log-binning
    ##        log_distribution = []
    ##        log_rng = []
    ##        power2 = 1
    ##        # <= или < ???
    ##        while power2 <= len(distribution):
    ##            log_rng.append((power2 + (power2 * 100)) / 2)
    ##            log_distribution.append(0)
    ##            power2 *= 2
    ##        for i in range(len(distribution)):
    ##            # ceil и minus-1
    ##            if i == 0 or distribution[i] == 0:
    ##                continue
    ##            group = math.ceil(math.log(i, 2) - 0.99)
    ##            log_distribution[group] += distribution[i]
    ##        while log_distribution[-1] == 0:
    ##            log_distribution = log_distribution[:-1]
    result = plfit(distribution, discrete=True)
    answer = result._alpha
    write_file(answer, output_name=output_name)
    return (answer)
Example #4
0
def power_law(x, variable, subject, radius = 0.5, number_of_sets = 100):
    print "- Fitting power law to empirical data: %s" % variable
    if sum(x-numpy.floor(x)):
        print "  CONTINUOUS"
        alpha_range = None
    else:
        alpha, xmin = plfit0.plfit0(x)
        print "  DISCRETE"
        print "  Approximate estimator for the scaling parameter of the discrete power law:"
        print "  * Scaling parameter: alpha %g" % alpha
        print "  * Lower bound: xmin %g" % xmin
        alpha_range = numpy.arange(round(alpha)-radius, round(alpha)+radius, 0.001)
        alpha_range = alpha_range[alpha_range > 1] # distributions with alpha <=1 are not normalizable
    alpha, xmin, L = plfit.plfit(x, vec = alpha_range, nosmall = False, finite = True)
    print "  Numerical maximization of the logarithm of the likelihood function L:"
    print "  * Scaling parameter: alpha %g" % alpha
    try:
        if alpha == min(alpha_range) or alpha == max(alpha_range):
            print "    WARNING alpha_range"
    except TypeError:
        pass
    print "  * Lower bound: xmin %g" % xmin
    print "  * Logarithm of the likelihood function: L %g" % L
    p, gof = plpva.plpva(x, xmin, vec=alpha_range, reps=number_of_sets, quiet=True)
    print "  Generation of %d power-law distributed synthetic data sets:" % number_of_sets
    print "  * Fraction of data sets with worse KS statistic than the empirical data: p-value %g" % p
    print "  * KS statistic of the empirical data: D %g" % gof
    png = "plplot_"+subject
    plplot.plplot(x, xmin, alpha, variable, p, png)
Example #5
0
    def setChannel(self, channel):
        if self.channel is not None:
            self.mean = self.total / len(self.allData)
            self.stdDev = sqrt((self.sumOfSquares / len(self.allData) -
                                self.mean * self.mean))
            spikes = []
            startedSpike = False
            tStart = 0
            tEnd = 0
            #Find all the spikes, and record their start and end times
            for sample in self.allData:
                if (sample[2] > self.mean +
                        decimal.Decimal(self.stdDev * self.threshScale)
                        or sample[2] < self.mean -
                        decimal.Decimal(self.stdDev * self.threshScale)
                    ) and not startedSpike:
                    #Crossed threshold, and not already in a spike
                    tStart = sample[1]
                    startedSpike = True
                elif startedSpike:
                    #A spike started, but now is over, record endtime
                    tEnd = sample[1]
                    spikes.append((tStart, tEnd))
                    startedSpike = False
            #Get a list of inter-spike timings
            gaps = []
            for index in range(1, len(spikes)):
                gap = spikes[index][0] - spikes[index - 1][0]
                gaps.append(gap)
            axes = zip(*self.allData)

            #Fit the list of inter-spike timing to a power law distribution
            #Only do it if there ARE enough inter-spike timings
            pLaw = [0, 0, 0]

            if len(gaps) > 0:
                try:
                    pLaw = plfit.plfit(gaps, 'finite')
                except ValueError:
                    print "VALUE ERROR fitting power law to {0} points".format(
                        len(gaps))
                    plaw = [0, 0, 0]
            else:
                pLaw = [0, 0, 0]
            #Save the summary
            self.channelSummaries[channel] = [
                self.mean, self.stdDev, spikes, gaps, axes[1], axes[2], pLaw
            ]

        #Reset everything
        self.channel = channel
        self.allData = []
        self.total = 0
        self.sumOfSquares = 0
        self.mean = 0
        self.stdDev = 0
Example #6
0
def enhance_h(key):
    cluster = Cluster()
    session = cluster.connect('demo')
    key = uuid.UUID(key)

    res = session.execute(
        """
        select * from harvest where uuid = %s
        """,
        (key, )
    )

    for r in res:
        harvest = r

    tweets = session.execute(
        """
        select * from tweet where history = %s ALLOW FILTERING
        """,
        (key, )
    )

    tweet_texts = []
    tweet_users = {}

    for tweet in tweets:
        if tweet.user in tweet_users:
            tweet_users[tweet.user] += 1
        else:
            tweet_users[tweet.user] = 1
        tweet_texts.append(tweet.content)

    sorted_users = sorted(tweet_users.items(), key=operator.itemgetter(1))

    tweet_count = len(tweet_texts)
    user_count = len(tweet_users)

    user_tweet_numbers = [item[1] for item in sorted_users]

    p = plfit.plfit(user_tweet_numbers)
    return {
        'harvest': harvest,
        'tweet_count': tweet_count,
        'vocabulary': get_vocabulary(tweet_texts),
        'clusters': get_clusters(tweet_texts),
        'users': {
            'count': user_count,
            'max_posts_per_user': sorted_users[-1][1],
            'avg_posts_per_user': tweet_count / user_count
        },
        "power_fit": (p._xmin, p._alpha)
    }
Example #7
0
 def setChannel(self, channel):
     if self.channel is not None:
         self.mean = self.total/len(self.allData)
         self.stdDev = sqrt((self.sumOfSquares/len(self.allData) - self.mean * self.mean))
         spikes = []
         startedSpike = False
         tStart = 0
         tEnd = 0
         #Find all the spikes, and record their start and end times
         for sample in self.allData:
             if (sample[2] > self.mean + decimal.Decimal(self.stdDev * self.threshScale) or sample[2] < self.mean - decimal.Decimal(self.stdDev * self.threshScale)) and not startedSpike:
                 #Crossed threshold, and not already in a spike
                 tStart = sample[1]
                 startedSpike = True
             elif startedSpike:
                 #A spike started, but now is over, record endtime
                 tEnd = sample[1]
                 spikes.append((tStart, tEnd))
                 startedSpike = False
         #Get a list of inter-spike timings
         gaps = []
         for index in range(1, len(spikes)):
             gap = spikes[index][0] - spikes[index-1][0]
             gaps.append(gap)
         axes = zip(*self.allData)
         
         #Fit the list of inter-spike timing to a power law distribution
         #Only do it if there ARE enough inter-spike timings
         pLaw = [0,0,0]
         
         if len(gaps) > 0: 
             try:
                 pLaw = plfit.plfit(gaps, 'finite')
             except ValueError:
                 print "VALUE ERROR fitting power law to {0} points".format(len(gaps))
                 plaw = [0,0,0]
         else:
             pLaw = [0,0,0]
         #Save the summary
         self.channelSummaries[channel] = [self.mean, self.stdDev, spikes, gaps, axes[1], axes[2], pLaw]
         
     #Reset everything
     self.channel = channel
     self.allData = []
     self.total = 0
     self.sumOfSquares = 0
     self.mean = 0
     self.stdDev = 0
Example #8
0
def random_network(n, m):

    G = nx.erdos_renyi_graph(n, 2 * m / n * (n - 1), seed=None)

    d = nx.degree(G)
    # calculate the giant componet return is a subgraph
    giant = max(nx.connected_component_subgraphs(G), key=len)
    degree = list(G.degree().values())
    results = plfit.plfit(degree)
    S = float(giant.number_of_nodes()) / G.number_of_nodes()
    print("{0},{1},{2},{3},{4} ".format(G.number_of_nodes(),
                                        G.number_of_edges(), S,
                                        results.plfit()[1],
                                        results.plfit()[0]))
    distribution = []
    for d in set(degree):
        distribution.append([d, degree.count(d)])

    # orgin degree dirstribution
    distribution = np.asarray(distribution)
    # log scale
    distribution_log = np.log(distribution)
    # cumulative distribution
    cumulative = list(
        map(lambda i: [distribution[i, 0],
                       np.sum(distribution[i:, 1])],
            np.arange(distribution.shape[0])))
    cumulative = np.log(cumulative)
    data = []
    data.append(distribution)
    data.append(distribution_log)
    data.append(cumulative)
    plotdistributuin(data, "img/erd{0}_{1}.png".format(n, m))
    # remove the graph
    data = []
    d = G.degree()
    # draw and show graph
    fig = plt.figure(figsize=(16, 12))
    plt.clf()
    nx.draw_spring(
        G,
        nodelist=d.keys(),
        node_color="#FF8800",
        node_size=[10 * v / np.mean(list(d.values())) for v in d.values()])
    plt.savefig('erd_net{0}_{1}.png'.format(n, m))
Example #9
0
def plfitDegreeDistr(G, outputFile):
    """ Power-law fit degree distribution
    """
    # fit with power-law
    D = getDegreeToCount(G)
    degreeV = D['degreeV']
    countV = D['countV']
    L = []
    for i in range(len(degreeV)):
        t = [ degreeV[i] ] * countV[i]
        L.extend(t)
    fit = plfit.plfit(L)
    
    # plfit fitted parameters
    with open(outputFile, 'a') as fout:
        fout.write("\nplfit fitted parameters:\n")
        fout.write("\txmin = " + str(fit._xmin) + "\n")
        fout.write("\talpha = " + str(fit._alpha) + "\n")
Example #10
0
def powerlaw_network(alpha, size):
    while True:
        s = []
        while len(s) < size:
            nextval = int(nx.utils.powerlaw_sequence(
                1, alpha)[0])  #100 nodes, power-law exponent 2.5
            if nextval != 0:
                s.append(nextval)
        if sum(s) % 2 == 0:
            break
    G = nx.configuration_model(s)
    G = nx.Graph(G)  # remove parallel edges
    G.remove_edges_from(G.selfloop_edges())

    d = nx.degree(G)
    # calculate the giant componet return is a subgraph
    giant = max(nx.connected_component_subgraphs(G), key=len)
    degree = list(G.degree().values())
    results = plfit.plfit(degree)
    S = float(giant.number_of_nodes()) / G.number_of_nodes()
    print("{0},{1},{2},{3} ".format(alpha, G.number_of_nodes(),
                                    G.number_of_edges(), S, nx.radius(G)))
Example #11
0
def make_figure3_data():
    """
    Figure 3 - Power law fits on degree of (r,p) graphs
    """
    with open("../results/fig_3_data.csv", 'w') as f:
        writer = csv.writer(f)
        tmp = writer.writerow(['type', 'r', 'p', 'type2', 'id', 'alpha'])
        for graph in glob(util.data_path + '/synth_graphs/g-*-u-*.csv'):
            # read data
            degs = []
            with open(graph, 'r') as f_in:
                reader = csv.reader(f_in)
                tmp = next(reader, None)  # skip the header
                for row in reader:
                    degs += [row[1], row[2]]
            degs = list(Counter(degs).values())
            # run plfit
            alpha = plfit.plfit(degs, discrete=True).plfit()[1]
            # some fits are unstable, skip
            if alpha > 6:
                continue
            # write results
            graph_id = graph[:-4].split('/')[-1].split('-')
            tmp = writer.writerow(graph_id + [alpha])
Example #12
0
import os
from plfit import plfit
from numpy import dtype, loadtxt
import numpy

print os.getcwd()

#PDF_FILE = 'no_sync_clock_no_sync_agent_1352849705/pdf.csv'
#CCDF_FILE = 'no_sync_clock_no_sync_agent_1352849705/ccdf.csv'
#
#data_record = dtype([('nodes', float), ('ab', float), ('nx', float)])
#
#PDF_DATA = loadtxt(PDF_FILE, dtype=data_record,
#                   skiprows=11, delimiter=',')
#
#CCDF_DATA = loadtxt(CCDF_FILE, dtype=data_record,
#                   skiprows=11, delimiter=',')

FILE_NAME = 'no_sync_clock_1352885864/raw_degrees.csv'
RAW_DEGREES = loadtxt(FILE_NAME, dtype=float)

print plfit(RAW_DEGREES.astype(int).tolist())
get_ipython().magic(u"paste")
get_ipython().magic(u"paste")
get_ipython().magic(u"paste")
help(hist)
get_ipython().magic(u"paste")
get_ipython().magic(u"paste")
get_ipython().magic(u"paste")
import plfit
from agpy import plfit
pf = plfit.plfit.plfit((bgpsv2['flux']))
pf = plfit.plfit.plfit(bgpsv2['flux'])
bgpsv2['flux']
bgpsv2['flux'].astype('float')
array(bgpsv2['flux'])
pf = plfit.plfit.plfit(array(bgpsv2['flux']))
pf = plfit.plfit(array(bgpsv2['flux']))
bgpsv2['flux']
pf = plfit.plfit.plfit(array(bgpsv2['flux'][bgpsv2['flux']==bgpsv2['flux']))
pf = plfit.plfit.plfit(array(bgpsv2['flux'][bgpsv2['flux']==bgpsv2['flux']]))
pf = plfit.plfit(array(bgpsv2['flux'][bgpsv2['flux']==bgpsv2['flux']]))
np.seterr(all='ignore')
pf = plfit.plfit(array(bgpsv2['flux'][bgpsv2['flux']==bgpsv2['flux']]))
pf.plfit()
get_ipython().magic(u"run ~/work/bgps_pipeline/bolocat/bolocat_v1_powerlaws.py")
l30 = (bolocat['glon_max'] < 31)*(bolocat['glon_max'] > 30)
get_ipython().magic(u"paste")
get_ipython().magic(u"paste")
show()
get_ipython().system(u"ls -F /Users/adam/work/bolocam/bolocat/")
p._alpha
p._alphaerr
			dr.writerow(d)

source_dir = 'doc/interactions_over_platforms/tables/deg_seq'
figure_dir = 'doc/interactions_over_platforms/figures/deg_seq_fitness'
if not os.path.exists(figure_dir):
	os.makedirs(figure_dir)
degSeq_files = os.listdir(source_dir)
degFits = list()

for degSeq_file in degSeq_files:
	if not os.path.isdir(degSeq_file):
		readFile = os.path.join(source_dir, degSeq_file)
		dataset, trace_type, direction = re.sub(r'\.csv$', '', degSeq_file).split('_')
		d = scan(readFile)
		dd = np.array(filter(lambda x: x > 0, d))
		p = plfit.plfit(np.array(dd, dtype = 'float64'), usefortran = False, discrete = True)
		# tested values collected
		pdata = OrderedDict()
		pdata['dataset'] = dataset
		pdata['type'] = trace_type
		pdata['dir'] = direction
		pdata['xmin'] = p._xmin
		pdata['alpha'] = p._alpha
		pdata['D'] = p._ks
		#pdata['ksP'] = p._ks_prob
		degFits.append(pdata)
		# diagnosis plots
		clf()
		p.xminvsks()
		savefig(os.path.join(figure_dir, "%s_%s_%s_kstest.pdf" % (dataset, trace_type, direction)))
		clf()
Example #15
0
    if 1==0:
        o = open('output.txt','w')
        for i in range(1,30):
            hits = get_roi_hits('SourceData\\panoramio.csv', 'Cluster25\\Cluster_25.%d.alpha.shp' % i)
            print i,
            print hits
            o.write('%d,%s\n' %(i,str(hits)))
        o.close()
    
    import plfit
    hits = [42, 35, 24, 25, 41, 20, 30, 22, 37, 45, 53, 52, 14, 52, 51, 32, 14, 30, 31, 88, 8, 54, 67, 48, 14, 80, 32, 35, 31, 57, 34, 22, 15, 74, 8, 44, 32, 22, 339, 59, 117, 63, 127, 47, 200, 39]
    hits = [42, 35, 24, 25, 41, 20, 30, 22, 37, 45, 53, 52, 14, 52, 51, 32, 14, 30, 31, 88, 8, 54, 67, 48, 14, 80, 32, 35, 31, 
57, 34, 22, 15, 74, 8, 44, 32, 22, 59, 63, 47, 39, 64, 47, 80, 54, 12, 8, 27, 45, 47, 45, 38, 48, 60, 61, 75, 62, 40, 
68, 61, 19, 10, 16, 111, 21, 13, 32, 31, 92, 88]
    hits.sort(reverse=True)
    pl = plfit.plfit(hits, quiet=False, silent=True)
    pl.plotcdf()
    
    ######################################
    min_sup = 0.01
    ranks = []
    for i in range(11,12):
        travel_routes = regenerate_travel_route('SourceData\\panoramio.csv', 
                                                'Cluster25\\Cluster_25.%d.alpha.shp'% i,
                                                day_constrain=7,
                                                save = True)
        F_K = mining_travel(travel_routes, min_sup=min_sup,load=False, load_filename='travel_routes.pkl')
        ranks.append(F_K.values())
    myplfit = plfit.plfit(ranks[0],quiet=True,silent=True)
    myplfit.plotcdf()
    plfit.pylab.show()
Example #16
0
def fit(degrees, **kwargs):
    gamma, xmin, L = plfit(degrees)
    log('(gamma=%(gamma)s, xmin=%(xmin)s, L=%(L)s)',
        args=locals(), **kwargs)
    return gamma, xmin, L
Example #17
0
'''
Inciso d)
'''
#Guardamos kminimo y gamma de cada red para ver cuantitavente las estimaciones del ejercicio c).

kminimo = []

gammas = []

for i in np.arange(len(grafos)):

    grados = grafos[i].degree

    x_degree = [j for k, j in grados]

    fit = plfit.plfit(x_degree)

    plt.figure()

    fit.plotpdf()

    plt.title('Ajuste ' + filename[i])

    plt.xlabel('k (log scale)')

    plt.ylabel('P(k) (log scale)')

    plt.savefig(path2 + filename[i] + '_ajuste.png')

    plt.show()
Example #18
0
def fit(degrees, **kwargs):
    gamma, xmin, L = plfit(degrees)
    log('(gamma=%(gamma)s, xmin=%(xmin)s, L=%(L)s)', args=locals(), **kwargs)
    return gamma, xmin, L
Example #19
0
import plfit
import pylab as plt

xmins = np.logspace(-1, 1)
nel = 2000

fig1 = plt.figure(1)
fig1.clf()
ax1 = fig1.add_subplot(2, 1, 1)
ax2 = fig1.add_subplot(2, 1, 2)

for alpha in (1.5, 2.5, 3.5):
    results = []
    for xmin in xmins:
        data = plfit.plexp_inv(np.random.rand(nel), xmin, alpha)
        result = plfit.plfit(data, quiet=True, silent=True)
        results.append(result)

    fitted_xmins = [r._xmin for r in results]
    fitted_alphas = [r._alpha for r in results]
    fitted_alpha_errors = [r._alphaerr for r in results]

    ax1.loglog(xmins,
               fitted_xmins,
               's',
               label='$\\alpha={0}$'.format(alpha),
               alpha=0.5)
    ax1.loglog(xmins, xmins, 'k--', alpha=0.5, zorder=-1)

    ax2.errorbar(xmins,
                 fitted_alphas,
from math import exp
import plfit

if len(sys.argv) < 2:
    print("Usage: ./check_power_law.py series [-float]")
    sys.exit(0)

data = None
if len(sys.argv) >= 3 and sys.argv[2] == '-float':
    data = load_proportions(sys.argv[1])
else:
    data = load_counts(sys.argv[1])

dt = [i + 1 for i, amt in enumerate(data) for j in range(amt)]

mf = plfit.plfit(dt, xmin=1)
"""
rank = [i+1 for i in range(len(data))]

logRank = make_log(rank)
logData = make_log(data)

xmin = 1 #min(data)

lgs = [cnt*log((i+1)/xmin) for i, cnt in enumerate(data)]

n = sum(data)

alpha = 1 + n/sum(lgs)

stdErr = (alpha - 1) / sqrt(n)
Example #21
0
# Guardar data sobre los ajustes por powerlaw via método Shalizi-Newman-Clauset

# Para niter = 100, tarda 14 segundos en hacer un test_pl
# Para niter = 1000, debería tardar 30 min en total
niter = 1000

plfits = []
kmins = np.zeros((2, 4))
alphas = np.zeros((2, 4))
pvals = np.zeros((2, 4))
ks_statistics = np.zeros((2, 4, niter))
for i, (gs, tipo_grafo) in enumerate(zip([gs_hip, gs_lsa], ['Hipervínculos', 'LSA'])):
    for j, (g, date) in enumerate(zip(gs, dates)):
        grados = list(dict(g.degree).values())
        grados = [k for k in grados if k > 0]
        myplfit = plfit.plfit(grados, discrete=True)
        plfits.append(myplfit)
        kmins[i, j], alphas[i, j] = myplfit.plfit()
        pvals[i, j], ks_statistics[i, j, :] = myplfit.test_pl(niter=niter)

# p(1000) = 0.000
# p(1000) = 0.000
# p(1000) = 0.000
# p(1000) = 0.000
# /home/gabo/anaconda3/lib/python3.6/site-packages/plfit/plfit.py:940: RuntimeWarning: divide by zero encountered in log
#   L_of_alpha = -1*nn*log(zeta) - alpha * sum_log_data
# p(1000) = 0.000
# p(1000) = 0.001
# p(1000) = 0.000
# p(1000) = 0.000
#
ax1.set_xscale('log')
ax1.set_xlim(3e-4, 10**0.22)
#ax1.set_xlim(l[:-1][hOther>0].min()/1.1, l[1:][(hM>0)|(hN>0)].max()*1.1)
#ax1.set_ylim(0.6, 15)
pl.setp(ax1.get_xticklabels(), rotation='horizontal', fontsize=20)
pl.setp(ax1.get_yticklabels(), rotation='vertical', fontsize=20)
ax1.set_xlabel("$S_{3 mm}$ (Jy)", fontsize=22)
ax1.set_ylabel("$N(cores)$", fontsize=22)
pl.legend(loc='best', fontsize=20)

pl.savefig(paths.fpath("core_peak_fluxdensity_coloredbycluster.pdf"),
           bbox_inches='tight')

#fig1.clf()
ax1 = fig1.gca()
plf = plfit.plfit(peak_fluxdens[~hii])
plf.plfit(discrete=False, verbose=True)
plf.plotpdf(dohist=False, histcolor='none', plcolor='navy')

pl.setp(ax1.get_xticklabels(), rotation='horizontal')  #, fontsize=10)
pl.setp(ax1.get_yticklabels(), rotation='vertical')  #, fontsize=10)
ax1.set_xlabel("$S_{3 mm}$ (Jy)")  #, fontsize=12)
ax1.set_ylabel("$N(cores)$")  #, fontsize=12)
ax1.set_ylim(0.5, 30)
ax1.set_yscale('linear')
ax1.set_xlim(0.0003, 2)

fig1.savefig(paths.fpath('core_peak_fluxdensity_powerlawfit.pdf'),
             bbox_inches='tight')
p, ksv = plf.test_pl()
print("All Data Consistent with power-law? p={0}".format(p))
Example #23
0
from agpy import readcol
import plfit
from pylab import *

blackouts = readcol('blackouts.txt')
cities = readcol('cities.txt')
earthquakes = readcol('earthquakes.txt')
melville = readcol('melville.txt')
solarflares = readcol('solarflares.txt')
terrorism = readcol('terrorism.txt')

#print "quakes 0.00 -7.14 0.00 11.6 0.00 -7.09 0.00 -24.4 0.00 with cut-off"
#earthquakeP = plfit.plfit(earthquakes)


pl = plfit.plfit(cities.ravel() / 1e3, usefortran=True, verbose=True)
print "Cities (me)     : n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (pl.data.shape[0], pl.data.mean(), pl.data.std(), pl.data.max(), pl._xmin, pl._alpha, pl._alphaerr, pl._ngtx, pl._ks_prob)
print "Cities (Clauset): n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (19447,9.00,77.83,8009,52.46,2.37,0.08,580,0.76)
figure(1)
clf()
title("Cities")
subplot(131)
pl.plotpdf()
subplot(132)
title("Cities")
pl.xminvsks()
subplot(133)
pl.alphavsks()
savefig("figures/cities_kstests.png")

figure(2)
Example #24
0
try:
    ne = int(sys.argv[1])
    seed(1)
    X = plfit.plexp_inv(rand(ne), 1, 2.5)
    X[:100] = X[100:200]
except ValueError:
    X = readcol(sys.argv[1])

if len(sys.argv) > 2:
    discrete = bool(sys.argv[2])
else:
    discrete = None

print("Cython")
t1 = time.time()
p3 = plfit.plfit(X, discrete=discrete, usefortran=False, usecy=True)
print(time.time() - t1)
print("Fortran")
t1 = time.time()
p1 = plfit.plfit(X, discrete=discrete, usefortran=True)
print(time.time() - t1)
print("Numpy")
t1 = time.time()
p3 = plfit.plfit(X, discrete=discrete, usefortran=False)
print(time.time() - t1)

print("Jeff Alcott's Powerlaw")
t5 = time.time()
p5 = powerlaw.Fit(X, discrete=discrete)
print(time.time() - t5)
Example #25
0
import os

if not os.path.exists('tst.csv'):
    import requests
    result = requests.get(
        'https://gist.githubusercontent.com/vfilimonov/1072e402e922712ad980/raw/27cc61d65590b382ec39120a1d25d4bd3abcfb4d/tst.csv'
    )
    with open('tst.csv', 'w') as f:
        f.write(result.content)

import numpy as np
import plfit

y = np.genfromtxt('tst.csv', delimiter=',')
tst_fit_py = plfit.plfit(y, usecy=False, usefortran=False, discrete=False)
tst_fit_fo = plfit.plfit(y, usecy=False, usefortran=True, discrete=False)
tst_fit_cy = plfit.plfit(y, usecy=True, usefortran=False, discrete=False)

print("py: ", tst_fit_py._xmins.shape, tst_fit_py._xmin_kstest.shape)
print("cy: ", tst_fit_cy._xmins.shape, tst_fit_cy._xmin_kstest.shape)
print("fo: ", tst_fit_fo._xmins.shape, tst_fit_fo._xmin_kstest.shape)


def func(xmin):
    ff = plfit.plfit(y, xmin=xmin, quiet=True, silent=True)
    return ff._ks


tst_KS_plfit = [func(xmin) for xmin in tst_fit_py._xmins]

import pylab as pl
Example #26
0
    peaks_dt = differences(peaksX)
    #print(time_data)

    #    print k
    #survival_times_crude = cluster_survival(time_clean, 1) #cluster survivals simple cut data
    survival_times_crude = cluster_survival(
        peaks_dt, 1)  #cluster survivals simple cut data
    #print min(survival_times_crude)

    ########################################################################
    ####################### Power Law  fits ################################
    ########################################################################
    # choose val in plfit.plfit(val) to fit power law to preferred dataset
    # details of fit should be outputted in console (xmin, \alpha etc.)
    #
    crudelaw = plfit.plfit(survival_times_crude)  #fit power law

    figure(1)

    title('CDF (Simple Cut-off)')
    xlabel('Jamming Duration (s)')
    ylabel('P (Jamming Duration)')
    crudelaw.plotcdf(pointcolor='g', pointmarker='o')  #cumulative
    savefig('cdf68XZ.png')

    #figure(2)

    #title('PDF (Simple Cut-off)')
    #xlabel('Jamming Duration (s)')
    #ylabel('P (Jamming Duration)')
    #crudelaw.plotpdf() #cumulative
Example #27
0
def func(xmin):
    ff = plfit.plfit(y, xmin=xmin, quiet=True, silent=True)
    return ff._ks
Example #28
0
def func(xmin):
    ff = plfit.plfit(y, xmin=xmin, quiet=True, silent=True)
    return ff._ks
Example #29
0
import scipy.io
import plfit
import time

m = scipy.io.loadmat('AUD_Ret_1000.mat')
A = m['A'].squeeze()
Pnpy = plfit.plfit(A)
Pfor = plfit.plfit(A)
Pfor_nosmall = plfit.plfit(A)
Pcy = plfit.plfit(A)
Py = plfit.plfit_py(A)

# for comparison
r_for_nosmall = Pfor_nosmall.plfit(usefortran=True,
                                   verbose=True,
                                   quiet=False,
                                   discrete=False,
                                   nosmall=True)

t0 = time.time()
r_for = Pfor.plfit(usefortran=True,
                   verbose=True,
                   quiet=False,
                   discrete=False,
                   nosmall=False)
t1 = time.time()
r_cy = Pcy.plfit(usecy=True,
                 verbose=True,
                 quiet=False,
                 discrete=False,
                 nosmall=False)
Example #30
0
    line = f.readline()
    while len(line) > 0:
        data = line.strip()
        data = data[1:-1].split(',')
        data = [float(j) for j in data]
        data = np.sort(data)
        data = data/np.linalg.norm(data)
        dataset.append(data)
        line = f.readline()
    f.close()
    return dataset
    
if __name__ == "__main__":
    alpha_set = []
    xmin_set = []
    error_set = []
    
    dataset = get_roi_pop_dataset()
    #dataset = get_distance_dataset()
    
    for i,data in enumerate(dataset):
        print i
        pl = plfit.plfit(data,quiet=False)
        alpha_set.append(pl._alpha)
        xmin_set.append(pl._xmin)
        error_set.append(pl._alphaerr)
        
    plot_pl_roi(dataset, alpha_set, xmin_set,error_set)
    #plot_alpha(alpha_set,xmin_set,error_set)
    #plot_pl_distance(dataset,alpha_set,xmin_set,error_set)
Example #31
0

# Earthquakes are a BAD FIT in the original manuscript
#plf = plfit.plfit(earthquakes.ravel(), nosmall=True, usefortran=True, verbose=True, quiet=False)
#plc = plfit.plfit(earthquakes.ravel(), nosmall=True, usecy=True, verbose=True, quiet=False)
#pl = plfit.plfit(earthquakes.ravel(),  nosmall=True, usefortran=False, verbose=True, quiet=False)
#print "Earthquakes (Clauset): n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (19447,9.00,77.83,8009,52.46,2.37,0.08,580,0.76)
#for ppp in (pl,plf,plc):
#    print "Earthquakes (me)     : n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (ppp.data.shape[0], ppp.data.mean(), ppp.data.std(), ppp.data.max(), ppp._xmin, ppp._alpha, ppp._alphaerr, ppp._ngtx, ppp._ks_prob)
#    np.testing.assert_almost_equal(ppp._xmin, 0.794, 2)
#    np.testing.assert_almost_equal(ppp._alpha, 1.64, 2)
#    np.testing.assert_almost_equal(ppp._alphaerr, 0.04, 2)
#    assert ppp._ngtx == 11697


plf = plfit.plfit(cities.ravel() / 1e3, usefortran=True, verbose=True, quiet=False)
plc = plfit.plfit(cities.ravel() / 1e3, usecy=True, verbose=True, quiet=False)
pl = plfit.plfit(cities.ravel() / 1e3, usefortran=False, verbose=True, quiet=False)
print "Cities (Clauset): n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (19447,9.00,77.83,8009,52.46,2.37,0.08,580,0.76)
for ppp in (pl,plf,plc):
    print "Cities (me)     : n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (ppp.data.shape[0], ppp.data.mean(), ppp.data.std(), ppp.data.max(), ppp._xmin, ppp._alpha, ppp._alphaerr, ppp._ngtx, ppp._ks_prob)
    np.testing.assert_almost_equal(ppp._xmin, 52.46, 2)
    np.testing.assert_almost_equal(ppp._alpha, 2.37, 2)
    np.testing.assert_almost_equal(ppp._alphaerr, 0.08, 2)
    assert ppp._ngtx == 580
figure(1)
clf()
title("Cities")
subplot(131)
pl.plotpdf()
subplot(132)
Example #32
0
t = timing.Timer()
t.tic()
for n, m in itertools.product(DIMENSIONS, INITIAL_EDGES):
    sim = ba.BA()
    sim.callback = lambda *_: None
    sim.run(starting_network_size=m, starting_edges=m, steps=n - m)

    print 'Run simulation BA({}{})'.format(n, m)
    sim_graph = sim.graph.handle
    del sim
    sim_C = nx.average_clustering(sim_graph)
    print '\tComputed clustering'
    sim_CPL = approximate_cpl(sim_graph)
    print '\tComputed CPL'
    sim_alpha, sim_xmin, sim_L = plfit.plfit(sim_graph.degree().values())
    print '\tComputed Alpha'
    del sim_graph
    gc.collect()
    print '\tCollected'

    nx_graph = nx.barabasi_albert_graph(n, m)
    print 'Created BA({},{})'.format(n, m)
    nx_C = nx.average_clustering(nx_graph)
    print '\tComputed clustering'
    nx_CPL = approximate_cpl(nx_graph)
    print '\tComputed CPL'
    nx_alpha, nx_xmin, nx_L = plfit.plfit(nx_graph.degree().values())
    print '\tComputed Alpha'
    del nx_graph
    gc.collect()
Example #33
0
import os
from plfit import plfit
from numpy import dtype, loadtxt
import numpy

print os.getcwd()

#PDF_FILE = 'no_sync_clock_no_sync_agent_1352849705/pdf.csv'
#CCDF_FILE = 'no_sync_clock_no_sync_agent_1352849705/ccdf.csv'
#
#data_record = dtype([('nodes', float), ('ab', float), ('nx', float)])
#
#PDF_DATA = loadtxt(PDF_FILE, dtype=data_record,
#                   skiprows=11, delimiter=',')
#
#CCDF_DATA = loadtxt(CCDF_FILE, dtype=data_record,
#                   skiprows=11, delimiter=',')


FILE_NAME = 'no_sync_clock_1352885864/raw_degrees.csv'
RAW_DEGREES = loadtxt(FILE_NAME, dtype=float)

print plfit(RAW_DEGREES.astype(int).tolist())
Example #34
0
        nel = int(sys.argv[2])
        if len(sys.argv) > 3:
            xmin = float(sys.argv[3])
        else:
            xmin = 0.5
    else: 
        nel = 1000
else:
    nel = 1000
    xmin = 0.5
    ntests = 1000

a = np.zeros(ntests)
for i in range(ntests):
    X=plfit.plexp_inv(np.random.rand(nel),xmin,2.5)
    p=plfit.plfit(X,xmin=xmin,quiet=True,silent=True)
    a[i]=p._alpha

h,b = plt.hist(a,bins=30)[:2]
bx = (b[1:]+b[:-1])/2.0

from agpy import gaussfitter
p,m,pe,chi2 = gaussfitter.onedgaussfit(bx,h,params=[0,ntests/10.0,2.5,0.05],fixed=[1,0,0,0])

fig1 = plt.figure(1)
fig1.clf()
plt.plot(bx,m)

print("XMIN fixed: Alpha = 2.5 (real), %0.3f +/- %0.3f (measured)" % (p[2],p[3]))

Example #35
0
        nel = int(sys.argv[2])
        if len(sys.argv) > 3:
            xmin = float(sys.argv[3])
        else:
            xmin = 0.5
    else:
        nel = 1000
else:
    nel = 1000
    xmin = 0.5
    ntests = 1000

a = np.zeros(ntests)
for i in range(ntests):
    X = plfit.plexp_inv(np.random.rand(nel), xmin, 2.5)
    p = plfit.plfit(X, xmin=xmin, quiet=True, silent=True)
    a[i] = p._alpha

h, b = plt.hist(a, bins=30)[:2]
bx = (b[1:] + b[:-1]) / 2.0

from agpy import gaussfitter

p, m, pe, chi2 = gaussfitter.onedgaussfit(bx,
                                          h,
                                          params=[0, ntests / 10.0, 2.5, 0.05],
                                          fixed=[1, 0, 0, 0])

fig1 = plt.figure(1)
fig1.clf()
plt.plot(bx, m)
Example #36
0
             bins=zebins,\
#             cumulative=-1,\
             normed=True,\
        )
plt.xscale('log')
if pression:    plt.xlabel('Pressure (Pa)')
else:           plt.xlabel('Diameter (m)')
plt.ylabel('Population (normalized)')
if pression: prefix="p"
else:        prefix=""
myp.makeplotres(prefix+"histogram",res=200,disp=False)
plt.close(1)

### COMPARED HISTOGRAMS
### --- FIT WITH POWER LAW
if pression: [alpha, xmin, L] = plfit.plfit(plothist,'xmin',0.3)
else:        [alpha, xmin, L] = plfit.plfit(plothist,'limit',20.)
print alpha,xmin

#a = plpva.plpva(plothist,0.75,'xmin',0.75)
#print a

#### DEUXIEME ROUTINE
####IL FAUT UTILISER LE DISCRET POUR LA TAILLE !!!
#if pression:   myplfit = plfit.plfit(plothist,verbose=True,xmin=0.75)
#else:          myplfit = plfit.plfit(plothist,verbose=True,xmin=20.)
#myplfit.plotppf()
#plt.show()
#exit()

Example #37
0
t = timing.Timer()
t.tic()
for n, m in itertools.product(DIMENSIONS, INITIAL_EDGES):
    sim = ba.BA()
    sim.callback = lambda *_: None
    sim.run(starting_network_size=m, starting_edges=m, steps=n-m)
    
    print 'Run simulation BA({}{})'.format(n, m)
    sim_graph = sim.graph.handle
    del sim
    sim_C = nx.average_clustering(sim_graph)
    print '\tComputed clustering'
    sim_CPL = approximate_cpl(sim_graph)
    print '\tComputed CPL'
    sim_alpha, sim_xmin, sim_L = plfit.plfit(sim_graph.degree().values())
    print '\tComputed Alpha'
    del sim_graph
    gc.collect()
    print '\tCollected'
    
    nx_graph = nx.barabasi_albert_graph(n, m)
    print 'Created BA({},{})'.format(n, m)
    nx_C = nx.average_clustering(nx_graph)
    print '\tComputed clustering'
    nx_CPL = approximate_cpl(nx_graph)
    print '\tComputed CPL'
    nx_alpha, nx_xmin, nx_L = plfit.plfit(nx_graph.degree().values())
    print '\tComputed Alpha'
    del nx_graph
    gc.collect()
Example #38
0
from myutil import *
import plfit

prob = 0.2
graph = set_up_graph()

for node_i in range(3, TOTAL_N_NODES):
    random_node_i = random.randint(0, node_i - 1)
    graph.AddNode(node_i)

    random_prob = random.uniform(0, 1)
    if random_prob < prob: graph.AddEdge(node_i, random_node_i)
    else: graph.AddEdge(node_i, rand_nbr_node(graph, random_node_i))

degrees = [graph.GetNI(i).GetDeg() for i in range(0, TOTAL_N_NODES)]
fit = plfit.plfit(degrees)

print 'xmin  = %s' % fit._xmin
print 'alpha = %s' % fit._alpha
Example #39
0
from agpy import readcol

try:
    ne = int(sys.argv[1])
    seed(1)
    X=plfit.plexp_inv(rand(ne),1,2.5)
    X[:100] = X[100:200]
except ValueError:
    X = readcol(sys.argv[1])

if len(sys.argv)>2:
    discrete = bool(sys.argv[2])
else:
    discrete=None

print "Cython"
t1=time.time(); p3=plfit.plfit(X,discrete=discrete,usefortran=False,usecy=True); print time.time()-t1
print "Fortran"
t1=time.time(); p1=plfit.plfit(X,discrete=discrete,usefortran=True); print time.time()-t1
print "Numpy"
t1=time.time(); p3=plfit.plfit(X,discrete=discrete,usefortran=False); print time.time()-t1

print "Jeff Alcott's Powerlaw"
t5=time.time(); p5=powerlaw.Fit(X,discrete=discrete); print time.time()-t5



print "Pure Python"
t4=time.time(); p4=plfit.plfit_py(X.tolist()); print time.time()-t4

Example #40
0
import plfit

if len(sys.argv) < 2:
    print("Usage: ./check_power_law.py series [-float]")
    sys.exit(0)

data = None
if len(sys.argv) >= 3 and sys.argv[2] == '-float':
    data = load_proportions(sys.argv[1])
else:
    data = load_counts(sys.argv[1])


dt = [i+1 for i,amt in enumerate(data) for j in range(amt)]

mf = plfit.plfit(dt,xmin = 1)

"""
rank = [i+1 for i in range(len(data))]

logRank = make_log(rank)
logData = make_log(data)

xmin = 1 #min(data)

lgs = [cnt*log((i+1)/xmin) for i, cnt in enumerate(data)]

n = sum(data)

alpha = 1 + n/sum(lgs)
Example #41
0
bins = np.logspace(np.log10(2.5e-3), 1)
pl.hist(full_table['MUSTANG_dend_flux'][mgps_ok], bins=bins, log=True,
        label="All sources")
pl.hist(full_table['MUSTANG_dend_flux'][cm_mm_nondetection & mgps_ok],
        bins=bins, log=True, label="cm/mm nondetections")
pl.hist(full_table['MUSTANG_dend_flux'][compact & mgps_ok],
        bins=bins, log=True, label="Compact sources", alpha=0.75, edgecolor='k', facecolor='none')
pl.hist(full_table['MUSTANG_dend_flux'][compact & cm_mm_nondetection & mgps_ok],
        bins=bins, log=True, label="Compact sources w/o cm/mm detections", alpha=0.75, edgecolor='w')
pl.semilogx()
pl.legend(loc='best')
pl.xlabel("MUSTANG source flux $S_{3 \mathrm{mm}}$ [Jy]")
pl.ylabel("Number of Sources")
pl.savefig(f'{catalog_figure_path}/full_catalog_histogram_cleaned.pdf')


PL_all = powerlaw.Fit(full_table['MUSTANG_dend_flux'])
PL_cmmmn = powerlaw.Fit(full_table['MUSTANG_dend_flux'][cm_mm_nondetection])

print(f"Power-law distribution has alpha={PL_all.alpha:0.3f} +/- {PL_all.sigma:0.3f} and xmin={PL_all.xmin:0.3f}")
print(f"cm/mm nondetection Power-law distribution has alpha={PL_cmmmn.alpha:0.3f} +/- {PL_cmmmn.sigma:0.3f} and xmin={PL_cmmmn.xmin:0.3f}")

plfit.plfit(full_table['MUSTANG_dend_flux'])
plfit.plfit(full_table['MUSTANG_dend_flux'][cm_mm_nondetection])

with open(f'{pilotpaperpath}/distribution_alphas.tex', 'w') as fh:
    fh.write(f"\\newcommand{{\\plalphaall}}{{{PL_all.alpha:0.3f}}}\n")
    fh.write(f"\\newcommand{{\\plsigmaall}}{{{PL_all.sigma:0.3f}}}\n")
    fh.write(f"\\newcommand{{\\plalphacmmmn}}{{{PL_cmmmn.alpha:0.3f}}}\n")
    fh.write(f"\\newcommand{{\\plsigmacmmmn}}{{{PL_cmmmn.sigma:0.3f}}}\n")
Example #42
0
##

with open("../results/fig3_data.csv", 'w') as f:
    writer = csv.writer(f)
    tmp = writer.writerow(['type', 'r', 'p', 'type2', 'id', 'alpha'])
    for graph in glob(util.data_path + '/synth_graphs/g-*-u-*.csv'):
        # read data
        degs = []
        with open(graph, 'r') as f_in:
            reader = csv.reader(f_in)
            tmp = next(reader, None)  # skip the header
            for row in reader:
                degs += [row[1], row[2]]
        degs = list(Counter(degs).values())
        # run plfit
        alpha = plfit.plfit(degs, discrete=True).plfit()[1]
        # some fits are unstable, skip
        if alpha > 6:
            continue
        # write results
        graph_id = graph[:-4].split('/')[-1].split('-')
        tmp = writer.writerow(graph_id + [alpha])

##
## Figure 4 - Log-likelihood of misspecified models
##

graph = 'g-1.00-0.50-u-fig3'
(G, el) = synth_generate.make_rp_graph(id,
                                       G_in=nx.complete_graph(10),
                                       n_max=10000,
Example #43
0
from __future__ import print_function
import numpy as np
import plfit
import pylab as plt
import itertools

for ii in range(10):
    nel = 2000
    alpha = 2.5
    xmin = 1.0
    data = plfit.plexp_inv(np.random.rand(nel), xmin, alpha)

    result_py = plfit.plfit(data, quiet=False, silent=False, usecy=False, usefortran=False)
    result_cy = plfit.plfit(data, quiet=False, silent=False, usecy=True,  usefortran=False)
    result_fo = plfit.plfit(data, quiet=False, silent=False, usecy=False, usefortran=True )
    result_py.name = 'python'
    result_cy.name = 'cython'
    result_fo.name = 'fortran'

    for aa,bb in itertools.combinations((result_py, result_cy, result_fo), 2):
        np.testing.assert_almost_equal(aa._alpha, bb._alpha, 5)

        assert aa._ngtx == bb._ngtx
        # should be the same value and exact
        assert aa._xmin == bb._xmin

        maxdiff_xmin_kstest = np.max(np.abs((aa._xmin_kstest[:-1] - bb._xmin_kstest[:-1])))
        maxdiff_alpha_values = np.max(np.abs((aa._alpha_values[:-1] - bb._alpha_values[:-1])))
        print("comparing {0} to {1}".format(aa.name, bb.name))
        print("maxdiff xmin: ", maxdiff_xmin_kstest)
        print("maxdiff alpha: ", maxdiff_alpha_values)
            #nlines = os.system("grep -c message '%s'" % (root+"/"+filename))
            nmsgdict[username] += nlines

if __name__ == "__main__":
    from pylab import *
    figure(1)
    N = array(nmsgdict.values())
    hist(log10(N[N>0]))
    xlabel("log Number of messages")
    ylabel("Number of users")

    figure(2)
    C = array(countdict.values())
    hist(log10(C[C>0]))
    xlabel("log Number of conversations")
    ylabel("Number of users")

    try:
        import plfit
        pn = plfit.plfit(N[N>0])
        figure(3)
        pn.plotcdf()

        pc = plfit.plfit(C[C>0])
        figure(4)
        pc.plotcdf()

    except ImportError:
        # if you haven't installed the plfit code from agpy
        pass
Example #45
0
from __future__ import print_function
import numpy as np
import plfit
import pylab as plt
import itertools

for ii in range(10):
    nel = 2000
    alpha = 2.5
    xmin = 1.0
    data = plfit.plexp_inv(np.random.rand(nel), xmin, alpha)

    result_py = plfit.plfit(data,
                            quiet=False,
                            silent=False,
                            usecy=False,
                            usefortran=False)
    result_cy = plfit.plfit(data,
                            quiet=False,
                            silent=False,
                            usecy=True,
                            usefortran=False)
    result_fo = plfit.plfit(data,
                            quiet=False,
                            silent=False,
                            usecy=False,
                            usefortran=True)
    result_py.name = 'python'
    result_cy.name = 'cython'
    result_fo.name = 'fortran'
Example #46
0
import os

if not os.path.exists('tst.csv'):
    import requests
    result = requests.get('https://gist.githubusercontent.com/vfilimonov/1072e402e922712ad980/raw/27cc61d65590b382ec39120a1d25d4bd3abcfb4d/tst.csv')
    with open('tst.csv','w') as f:
        f.write(result.content)

import numpy as np
import plfit

y = np.genfromtxt('tst.csv', delimiter=',')
tst_fit_py = plfit.plfit(y, usecy=False, usefortran=False, discrete=False)
tst_fit_fo = plfit.plfit(y, usecy=False, usefortran=True, discrete=False)
tst_fit_cy = plfit.plfit(y, usecy=True, usefortran=False, discrete=False)

print("py: ",tst_fit_py._xmins.shape, tst_fit_py._xmin_kstest.shape)
print("cy: ",tst_fit_cy._xmins.shape, tst_fit_cy._xmin_kstest.shape)
print("fo: ",tst_fit_fo._xmins.shape, tst_fit_fo._xmin_kstest.shape)

def func(xmin):
    ff = plfit.plfit(y, xmin=xmin, quiet=True, silent=True)
    return ff._ks

tst_KS_plfit = [func(xmin) for xmin in tst_fit_py._xmins]

import pylab as pl
pl.plot(tst_fit_py._xmins, tst_KS_plfit, 'g-')
pl.plot(tst_fit_py._xmins, tst_fit_py._xmin_kstest, 'r-', alpha=0.5)
pl.plot(tst_fit_cy._xmins, tst_fit_cy._xmin_kstest, 'b--', linewidth=2, alpha=0.5)
pl.plot(tst_fit_fo._xmins, tst_fit_fo._xmin_kstest, 'k:', linewidth=2, alpha=0.5)
Example #47
0
from myutil import *
import plfit

prob  = 0.2
graph = set_up_graph()

for node_i in range(3, TOTAL_N_NODES):
    random_node_i = random.randint(0, node_i - 1)
    graph.AddNode(node_i)

    random_prob   = random.uniform(0, 1)
    if random_prob < prob:  graph.AddEdge(node_i, random_node_i)
    else:                   graph.AddEdge(node_i, rand_nbr_node(graph, random_node_i))

degrees = [graph.GetNI(i).GetDeg() for i in range(0, TOTAL_N_NODES)]
fit     = plfit.plfit(degrees)

print 'xmin  = %s' % fit._xmin
print 'alpha = %s' % fit._alpha
########################################################
# Started Logging At: 2012-05-19 09:46:24
########################################################

import idlsave
bgps = idlsave.read('bolocat_v2.0_culled.sav')
bgps.bgps_culled
hist(bgps.bgps_culled.flux_40,bins=50)
hist(bgps.bgps_culled.flux_40,bins=np.logspace(-1,2,50))
clf()
hist(bgps.bgps_culled.flux_40,bins=np.logspace(-1,2,50))
loglog()
hist(bgps.bgps_culled.flux_40,bins=np.logspace(-1,2,50))
import plfit
plfit.plfit(bgps.bgps_culled.flux_40)
P = In[11]
P = Out[11]
O
P
P.kstest_ 
P.alpha_ 
P.plot_lognormal_pdf()
figure()
P.plot_lognormal_pdf()
figure()
P.plotpdf()
P._av
P._ks
P._alpha
P._alphaerr
print P
Example #49
0
#    assert ppp._ngtx == 1711

# Earthquakes are a BAD FIT in the original manuscript
#plf = plfit.plfit(earthquakes.ravel(), nosmall=True, usefortran=True, verbose=True, quiet=False)
#plc = plfit.plfit(earthquakes.ravel(), nosmall=True, usecy=True, verbose=True, quiet=False)
#pl = plfit.plfit(earthquakes.ravel(),  nosmall=True, usefortran=False, verbose=True, quiet=False)
#print "Earthquakes (Clauset): n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (19447,9.00,77.83,8009,52.46,2.37,0.08,580,0.76)
#for ppp in (pl,plf,plc):
#    print "Earthquakes (me)     : n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f" % (ppp.data.shape[0], ppp.data.mean(), ppp.data.std(), ppp.data.max(), ppp._xmin, ppp._alpha, ppp._alphaerr, ppp._ngtx, ppp._ks_prob)
#    np.testing.assert_almost_equal(ppp._xmin, 0.794, 2)
#    np.testing.assert_almost_equal(ppp._alpha, 1.64, 2)
#    np.testing.assert_almost_equal(ppp._alphaerr, 0.04, 2)
#    assert ppp._ngtx == 11697

plf = plfit.plfit(cities.ravel() / 1e3,
                  usefortran=True,
                  verbose=True,
                  quiet=False)
plc = plfit.plfit(cities.ravel() / 1e3, usecy=True, verbose=True, quiet=False)
pl = plfit.plfit(cities.ravel() / 1e3,
                 usefortran=False,
                 verbose=True,
                 quiet=False)
print(
    "Cities (Clauset): n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f"
    % (19447, 9.00, 77.83, 8009, 52.46, 2.37, 0.08, 580, 0.76))
for ppp in (pl, plf, plc):
    print(
        "Cities (me)     : n:%10i mean,std,max: %8.2f,%8.2f,%8.2f xmin: %8.2f alpha: %8.2f (%8.2f) ntail: %10i p: %5.2f"
        % (ppp.data.shape[0], ppp.data.mean(), ppp.data.std(), ppp.data.max(),
           ppp._xmin, ppp._alpha, ppp._alphaerr, ppp._ngtx, ppp._ks_prob))
    np.testing.assert_almost_equal(ppp._xmin, 52.46, 2)
Example #50
0
import plfit
import pylab as plt

xmins = np.logspace(-1, 1)
nel = 2000

fig1 = plt.figure(1)
fig1.clf()
ax1 = fig1.add_subplot(2, 1, 1)
ax2 = fig1.add_subplot(2, 1, 2)

for alpha in (1.5, 2.5, 3.5):
    results = []
    for xmin in xmins:
        data = plfit.plexp_inv(np.random.rand(nel), xmin, alpha)
        result = plfit.plfit(data, quiet=True, silent=True)
        results.append(result)

    fitted_xmins = [r._xmin for r in results]
    fitted_alphas = [r._alpha for r in results]
    fitted_alpha_errors = [r._alphaerr for r in results]

    ax1.loglog(xmins, fitted_xmins, "s", label="$\\alpha={0}$".format(alpha), alpha=0.5)
    ax1.loglog(xmins, xmins, "k--", alpha=0.5, zorder=-1)

    ax2.errorbar(
        xmins, fitted_alphas, yerr=fitted_alpha_errors, marker="s", label="$\\alpha={0}$".format(alpha), alpha=0.5
    )
    ax2.set_xscale("log")

ax1.legend(loc="best")
Example #51
0
import scipy.io
import plfit
import time

m = scipy.io.loadmat('AUD_Ret_1000.mat')
A = m['A'].squeeze()
Pnpy = plfit.plfit(A)
Pfor = plfit.plfit(A)
Pfor_nosmall = plfit.plfit(A)
Pcy = plfit.plfit(A)
Py = plfit.plfit_py(A)


# for comparison
r_for_nosmall = Pfor_nosmall.plfit(usefortran=True, verbose=True, quiet=False, discrete=False, nosmall=True)

t0 = time.time()
r_for = Pfor.plfit(usefortran=True, verbose=True, quiet=False, discrete=False, nosmall=False)
t1 = time.time()
r_cy  = Pcy.plfit(usecy=True, verbose=True, quiet=False, discrete=False, nosmall=False)
t2 = time.time()
r_ppy = Py.plfit(nosmall=False)
t3 = time.time()
r_npy = Pnpy.plfit(usefortran=False, usecy=False, verbose=True, quiet=False, discrete=False, nosmall=False)
t4 = time.time()

print("xmin,alpha for 4 different implementations: ")
print("npy: ",r_npy)
print("ppy: ",r_ppy)
print("for: ",r_cy)
print("cy:  ",r_for)
Example #52
0
    hist(log10(C[C > 0]))
    xlabel("log Number of conversations")
    ylabel("Number of users")

    CN = array(countdict.keys())
    print "Top 10 most conversations \n" + "\n".join(
        "%s: %i" % (a, b) for a, b in zip(CN[argsort(C)[-10:]],
                                          sort(C)[-10:]))

    figure(5)  # idea courtesy Jordan Mirocha
    loglog(C, N, 'ko')
    xlabel("Number of conversations")
    ylabel("Number of messages exchanged")
    for ii in unique(concatenate([argsort(C)[-11:], argsort(N)[-11:]])):
        text(C[ii], N[ii], CN[ii].split('@')[0])
    title("Adium conversation history")

    try:
        import plfit
        pn = plfit.plfit(N[N > 0])
        figure(3)
        pn.plotcdf()

        pc = plfit.plfit(C[C > 0])
        figure(4)
        pc.plotcdf()

    except ImportError:
        # if you haven't installed the plfit code from agpy
        pass
Example #53
0
        path +
        'exp12_ds2_astrosky_arrang45_atmotest_amp5.0E+02_sky00_seed00_peak050.00_nosmooth_bolocat.sav'
    )
    bolocat_in = idlsave.read(
        path +
        'exp12_ds2_astrosky_arrang45_atmotest_amp5.0E+02_sky00_seed00_peak050.00_nosmooth_bolocat_input.sav'
    )
    bolocat_filt = idlsave.read(
        path +
        'exp12_ds2_astrosky_arrang45_atmotest_amp5.0E+02_sky00_seed00_peak050.00_nosmooth_filtered_bolocat.sav'
    )
    bolocat_bgps = atpy.Table(
        '/Users/adam/work/catalogs/bolocam_gps_v1_0_1.tbl')

    print "Bolocat"
    PL40 = plfit.plfit(bolocat.bolocat_struct.flux_40)
    PLS = plfit.plfit(bolocat.bolocat_struct.flux)
    print "Input"
    PL40in = plfit.plfit(bolocat_in.bolocat.flux_40)
    PLSin = plfit.plfit(bolocat_in.bolocat.flux)
    print "Filtered"
    PL40filt = plfit.plfit(bolocat_filt.bolocat.flux_40 -
                           bolocat_filt.bolocat.flux_40.min())
    PLSfilt = plfit.plfit(bolocat_filt.bolocat.flux)
    print "L30"
    L30 = (bolocat_bgps.glon_peak < 31) * (bolocat_bgps.glon_peak > 30)
    PLS_L30 = plfit.plfit(bolocat_bgps.flux[L30])
    PL40_L30 = plfit.plfit(bolocat_bgps.flux_40[L30])

    figure(1)
    clf()