def MakeFigures():
    pops = populations.Process()
    print len(pops)

    cdf = Cdf.MakeCdfFromList(pops, 'populations')

    myplot.Cdf(cdf,
               root='populations',
               title='City/Town Populations',
               xlabel='population',
               ylabel='CDF',
               legend=False)

    myplot.Cdf(cdf,
               root='populations_logx',
               title='City/Town Populations',
               xlabel='population',
               ylabel='CDF',
               xscale='log',
               legend=False)

    myplot.Cdf(cdf,
               root='populations_loglog',
               complement=True,
               title='City/Town Populations',
               xlabel='population',
               ylabel='Complementary CDF',
               yscale='log',
               xscale='log',
               legend=False)

    t = [math.log(x) for x in pops]
    t.sort()
    rankit.MakeNormalPlot(t, 'populations_rankit')
Exemple #2
0
def main():
    #cdf, place = total_percentile_rank(results)
    speeds = relay.GetSpeeds(results)
    speed = relay.ConvertPaceToSpeed('6:53')
    cdf = Cdf.MakeCdfFromList(speeds)
    print cdf.Prob(speed),'speed'
    print convert_speeds_to_time(speed),'time'
    myplot.Cdf(cdf)
    myplot.Show()
    
    speeds_old = GetSpeeds_M4049(results)
    cdf_old = Cdf.MakeCdfFromList(speeds_old)
    rank = cdf_old.Prob(speed)
    print rank,'rank', speed,'speed'
    print convert_speeds_to_time(speed),'time'
    myplot.Cdf(cdf_old)
    myplot.Show()
    
    speeds_5059 = GetSpeeds_M5059(results)
    cdf_5059 = Cdf.MakeCdfFromList(speeds_5059)
    future_speed = cdf_5059.Value(rank)
    print future_speed,'speed'
    print convert_speeds_to_time(future_speed),'time'
    myplot.Cdf(cdf_5059)
    myplot.Show()
   
    fspeeds = GetSpeeds_F2039(results)
    cdf_female = Cdf.MakeCdfFromList(fspeeds)
    fspeed = cdf_female.Value(rank)
    print fspeed,'speed'
    print convert_speeds_to_time(fspeed),'time'
    myplot.Cdf(cdf_female)
    myplot.Show()
Exemple #3
0
def MakeFigure():
    fp = open('babyboom.dat')
    
    # skip to the beginning of the data
    for line in fp:
        if line.find('START DATA') != -1:
            break
    
    # read a list of times
    times = []
    for line in fp:
        t = line.split()
        time = int(t[-1])
        times.append(time)
    
    # compute interarrival times
    diffs = [times[0]]
    for i in range(len(times)-1):
        diff = times[i+1] - times[i]
        diffs.append(diff)
    
    n = len(diffs)
    mu = thinkstats.Mean(diffs)
        
    print 'mean interarrival time', mu
    
    cdf = Cdf.MakeCdfFromList(diffs, 'actual')

    sample = [random.expovariate(1/mu) for i in range(n)]
    model = Cdf.MakeCdfFromList(sample, 'model')
    
    myplot.Cdf(cdf)
    myplot.Save(root='interarrivals',
              title='Time between births',
              xlabel='minutes',
              ylabel='CDF',
              legend=False,
                formats=['eps', 'png', 'pdf'])

    myplot.Cdfs([cdf, model], complement=True)
    myplot.Save(root='interarrivals_model',
                title='Time between births',
                xlabel='minutes',
                ylabel='Complementary CDF',
                yscale='log',
                formats=['eps', 'png', 'pdf'])

    pyplot.subplots_adjust(bottom=0.11)
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='interarrivals_logy',
                title='Time between births',
                xlabel='minutes',
                ylabel='Complementary CDF',
                yscale='log',
                legend=False,
                formats=['eps', 'png', 'pdf'])
Exemple #4
0
def main():
    data_dir = '../chap1/'
    preg = survey.Pregnancies()
    preg.ReadRecords(data_dir)
    cdf = weight_cdf(preg)
    myplot.Cdf(cdf)
    myplot.show()

    sample = Sample(cdf, 1000)
    cdf_sample = Cdf.MakeCdfFromList(sample)
    myplot.Cdf(cdf_sample)
    myplot.Show()
Exemple #5
0
def main(script, *args):
    data = ReadIncomeFile()
    hist, pmf, cdf = MakeIncomeDist(data)

    # plot the CDF on a log-x scale
    myplot.Cdf(cdf, root='income_logx', xscale='log')

    # plot the complementary CDF on a log-log scale
    myplot.Cdf(cdf,
               root='income_loglog',
               complement=True,
               xscale='log',
               yscale='log',
               show=True)
Exemple #6
0
def observe_data(l, name=None, show=False):	
	cdf = pmf = None
	if isinstance(l, list):
		cdf = Cdf.MakeCdfFromList(l,name+' cdf')
		pmf = Pmf.MakePmfFromList(l, name+' pmf')
	elif isinstance(l, Pmf.Pmf):
		pmf = l
		cdf = Cdf.MakeCdfFromPmf(l)
		if name is None: name = pmf.name 
	elif isinstance(l, Cdf.Cdf):
		cdf = l
		if name is None: name = cdf.name 
	else:
		raise Exception('input parameter type is wrong')

	v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75)
	mean = cdf.Mean()
	print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \
	      (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median))
	
	if show:
		if pmf is not None:
			myplot.Pmf(pmf)	
			myplot.Show()
		myplot.Cdf(cdf)
		myplot.Show()
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    # myplot.Clf()
    # myplot.Hist(pmf)
    # myplot.Save(root='observed_speeds',
    #             title='PMF of running speed',
    #             xlabel='speed (mph)',
    #             ylabel='probability')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')

    myplot.Clf()
    myplot.Hist(biased)
    myplot.Save(root='observed_speeds',
                title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')

    cdf = Cdf.MakeCdfFromPmf(biased)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.show(root='observed_speeds_cdf',
                title='CDF of running speed',
                xlabel='speed (mph)',
                ylabel='cumulative probability')
def PlotAges(resp):
    """Plot the distribution of ages."""
    ages = [r.age for r in resp.records]
    cdf = Cdf.MakeCdfFromList(ages)
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
Exemple #9
0
def main():
    # Exercise 3.6
    myBirthWeight = 163
    table = survey.Pregnancies()
    table.ReadRecords()

    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    print("My birth weight percentile rank (vs all births): %d" %
          (100 * liveBirthWeightsCdf.Prob(myBirthWeight)))

    unfilteredNotFirstLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                          for p in table.records
                                          if p.outcome == 1 and p.birthord != 1
                                          ]
    notFirstLiveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredNotFirstLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    notFirstLiveBirthWeightsCdf = Cdf.MakeCdfFromList(
        notFirstLiveBirthWeights, name="not first live birth weights")
    print("My birth weight percentile rank (vs first births): %d" %
          (100 * notFirstLiveBirthWeightsCdf.Prob(myBirthWeight)))

    myplot.Cdf(notFirstLiveBirthWeightsCdf)
    myplot.Show(title="not first live birth weight CDF",
                xlabel="birth weight oz",
                ylabel="probability")
Exemple #10
0
def process(data):
    # Hist 分布图
    hist = Pmf.MakeHistFromList(data, name='hist')
    myplot.Hist(hist, color='blue')
    myplot.Show()

    # Pmf 分布图
    pmf = Pmf.MakePmfFromHist(hist, name='pmf')
    myplot.Pmf(pmf, color='yellow')
    myplot.Show()

    myplot.Clf()

    # 实际数据的CDF分布图
    cdf = Cdf.MakeCdfFromList(data, name='loafs')
    myplot.Cdf(cdf)

    mu, var = thinkstats.MeanVar(data)
    sigma = math.sqrt(var)
    print("mu = %.3f, sigma = %.3f" % (mu, sigma))

    # 正态分布
    xs = normal_sample(len(data), mu, sigma)  # xs = data
    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs]
    myplot.Scatter(xs, ys, color='red', label='sample')
    myplot.Show()
Exemple #11
0
def main():
    babies = BabyBoom.Babies()
    babies.ReadRecords(data_dir='res', n=None)
    lastmin = 0
    interval = []
    for item in babies.records:
        interval.append(item.minutes - lastmin)
        lastmin = item.minutes

    cdf = Cdf.MakeCdfFromList(interval, name='baby interval')
    myplot.Cdf(cdf, complement=False, transform=None)
    myplot.Show()

    # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X)
    myplot.Clf()
    myplot.Cdf(cdf, complement=True, yscale='log')
    myplot.Show()
Exemple #12
0
def main():
    sz, alph, exem = 1000, 1.7, 100
    lst = paretovariate(sz, alph, exem)
    lst_cdf = Cdf.MakeCdfFromList(lst)

    myplot.Clf()
    myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log')
    myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
Exemple #13
0
def MakeCdfs(lens):
    cdf = Cdf.MakeCdfFromList(lens, 'slashdot')

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='slashdot.logx',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log')

    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='slashdot.loglog',
                xlabel='Number of friends/foes',
                ylabel='CDF',
                xscale='log',
                yscale='log')
Exemple #14
0
def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    myplot.Clf()
    myplot.Cdf(pool.age_cdf)
    myplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    myplot.Clf()
    myplot.Cdf(pool.weight_cdf)
    myplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    myplot.Clf()
    myplot.Cdfs([firsts.age_cdf, others.age_cdf])
    myplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    myplot.Clf()
    myplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    myplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    pyplot.clf()
    #pyplot.scatter(ages, weights, alpha=0.2)
    pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r)
    myplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)
Exemple #15
0
def PlotPosteriorMeans(d, name):
    """Plots the CDF of the means of the posteriors.

    d: map from code to posterior Suite
    name: label for the cdf
    """
    means = [item.Mean() for item in d.itervalues()]
    cdf = thinkbayes.MakeCdfFromList(means, name=name)
    myplot.Cdf(cdf)
def CheckCdf2():
    """Compare chi2 values from the simulation with a chi-squared dist."""
    df = 3
    t = [SimulateChi2() for i in range(1000)]
    t2 = [scipy.stats.chi2.cdf(x, df) for x in t]
    cdf = Cdf.MakeCdfFromList(t2)

    myplot.Cdf(cdf)
    myplot.Show()
Exemple #17
0
def main():
    list = [100 * random.random() for i in range(1000)]
    pmf = Pmf.MakePmfFromList(list, name='pfm')
    cdf = Cdf.MakeCdfFromList(list, name='cdf')
    myplot.Pmf(pmf)
    myplot.Show()
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
Exemple #18
0
def main():

    #Exercise 3.5
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    speedsCdf = Cdf.MakeCdfFromList(speeds, "race speeds")
    mplt.Cdf(speedsCdf)
    mplt.show(title="Race Speed CDF",
              xlabel="speed in mph",
              ylabel="probability")
Exemple #19
0
    def PlotPosterior(self, root=None, clf=False):
        if root: clf = True

        posterior = self.Cdf()
        myplot.Cdf(posterior,
                   root=root,
                   clf=clf,
                   xlabel='# of taxa',
                   ylabel='prob',
                   legend=False)
Exemple #20
0
def MakeExample():
    """Make a simple example CDF."""
    t = [2, 1, 3, 2, 5]
    cdf = Cdf.MakeCdfFromList(t)
    myplot.Cdf(cdf,
               root='example_cdf',
               title='CDF',
               xlabel='x',
               ylabel='CDF(x)',
               axis=[0, 6, 0, 1],
               legend=False)
Exemple #21
0
def Q2(results):
    results.sort()
    #  print(results)
    cdf = Cdf.MakeCdfFromList(results, name='cdf')
    myplot.Cdf(cdf)
    myplot.Show()
    p = [0.95, 0.99]
    for i in p:
        significant_value = cdf.Value(i)
        print("p:%4.2f significant_value = %d" % (i, significant_value))
    pass
Exemple #22
0
def CheckCdf():
    """
    """
    xs, ys = Chi2Cdf(df=3, high=15)
    pyplot.plot(xs, ys)

    t = [SimulateChi2() for i in range(1000)]
    cdf = Cdf.MakeCdfFromList(t)

    myplot.Cdf(cdf)
    myplot.Show()
Exemple #23
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    cdf = Cdf.MakeCdfFromList(speeds, 'speeds')
    myplot.Cdf(cdf,
               title='CDF of running speed',
               xlabel='speed (mph)',
               ylabel='probability',
               show=True)
def PlotMarginals(suite):
    """Plot the marginal distributions for a 2-D joint distribution."""
    pmf_m, pmf_s = ComputeMarginals(suite)

    pyplot.clf()
    pyplot.figure(1, figsize=(7, 4))

    pyplot.subplot(1, 2, 1)
    cdf_m = Cdf.MakeCdfFromPmf(pmf_m, 'mu')
    myplot.Cdf(cdf_m)
    pyplot.xlabel('Mean height (cm)')
    pyplot.ylabel('CDF')

    pyplot.subplot(1, 2, 2)
    cdf_s = Cdf.MakeCdfFromPmf(pmf_s, 'sigma')
    myplot.Cdf(cdf_s)
    pyplot.xlabel('Std Dev height (cm)')
    pyplot.ylabel('CDF')

    myplot.Save(root='bayes_height_marginals_%s' % suite.name)
Exemple #25
0
def main():
    pareto = paretovariate(1, 0.5)
    cdf = Cdf.MakeCdfFromList(pareto)
    myplot.Cdf(cdf)
    myplot.show()

    ccdf = ccdf_list(cdf)
    plt = myplot.pyplot
    plt.plot(cdf.Values(), ccdf)
    plt.xscale('log')
    plt.yscale('log')
    plt.show()
Exemple #26
0
def main(script, *args):
    data = ReadIncomeFile()
    hist, pmf, cdf = MakeIncomeDist(data)

    # plot the CDF on a log-x scale
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='income_logx',
                xscale='log',
                xlabel='income',
                ylabel='CDF')

    # plot the complementary CDF on a log-log scale
    myplot.Clf()
    myplot.Cdf(cdf, complement=True)
    myplot.Save(root='income_loglog',
                complement=True,
                xscale='log',
                yscale='log',
                xlabel='income',
                ylabel='complementary CDF')
Exemple #27
0
def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # plot PMFs of birth weights for first babies and others
    myplot.Clf()
    myplot.Hist(firsts.weight_pmf, linewidth=0, color='blue')
    myplot.Hist(others.weight_pmf, linewidth=0, color='orange')
    myplot.Save(root='nsfg_birthwgt_pmf',
                title='Birth weight PMF',
                xlabel='weight (ounces)',
                ylabel='probability')

    # plot CDFs of birth weights for first babies and others
    myplot.Clf()
    myplot.Cdf(firsts.weight_cdf, linewidth=2, color='blue')
    myplot.Cdf(others.weight_cdf, linewidth=2, color='orange')
    myplot.Save(root='nsfg_birthwgt_cdf',
                title='Birth weight CDF',
                xlabel='weight (ounces)',
                ylabel='probability',
                axis=[0, 200, 0, 1])
Exemple #28
0
def main():
    """when k=1 weibull would be liner"""
    weibull = weibullvariate(1, 1)
    cdf = Cdf.MakeCdfFromList(weibull)
    myplot.Cdf(cdf)
    myplot.show()

    ccdf = ccdf_list(cdf)
    plt = myplot.pyplot
    plt.plot(cdf.Values(), ccdf)
    #plt.xscale('log')
    plt.yscale('log')
    plt.show()
Exemple #29
0
def main():
    # Exercise 3.9
    table = survey.Pregnancies()
    table.ReadRecords()
    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    samepleListLiveBirthWeights = sample(liveBirthWeightsCdf, 1000)
    myplot.Cdf(Cdf.MakeCdfFromList(samepleListLiveBirthWeights))
    myplot.show(title="CDF of live births resampled")

    # Exercise 3.10
    randomList = [random.random() for x in range(1000)]
    myplot.Pmf(Pmf.MakePmfFromList(randomList))
    myplot.show(title="random pmf")
    myplot.Cdf(Cdf.MakeCdfFromList(randomList))
    myplot.Show(title="random cdf")
Exemple #30
0
def main():
    # 通过公式计算F(X)
    lamb = 1 / 32.6
    xs = sorted([random.expovariate(lamb) for i in range(43)])
    ys = [1 - pow(math.e, -lamb * x) for x in xs]
    myplot.Plot(xs, ys, label='formular')

    # 通过累计统计F(X) ---> 经验CDF
    cdf = Cdf.MakeCdfFromList(xs, name='expovariate')
    print("mu = ", cdf.Mean())
    print("median = ", cdf.Percentile(50))
    myplot.Cdf(cdf, complement=False, transform=None)
    myplot.Show()