def MakeFigures(): pops = populations.Process() print len(pops) cdf = Cdf.MakeCdfFromList(pops, 'populations') myplot.Cdf(cdf, root='populations', title='City/Town Populations', xlabel='population', ylabel='CDF', legend=False) myplot.Cdf(cdf, root='populations_logx', title='City/Town Populations', xlabel='population', ylabel='CDF', xscale='log', legend=False) myplot.Cdf(cdf, root='populations_loglog', complement=True, title='City/Town Populations', xlabel='population', ylabel='Complementary CDF', yscale='log', xscale='log', legend=False) t = [math.log(x) for x in pops] t.sort() rankit.MakeNormalPlot(t, 'populations_rankit')
def main(): #cdf, place = total_percentile_rank(results) speeds = relay.GetSpeeds(results) speed = relay.ConvertPaceToSpeed('6:53') cdf = Cdf.MakeCdfFromList(speeds) print cdf.Prob(speed),'speed' print convert_speeds_to_time(speed),'time' myplot.Cdf(cdf) myplot.Show() speeds_old = GetSpeeds_M4049(results) cdf_old = Cdf.MakeCdfFromList(speeds_old) rank = cdf_old.Prob(speed) print rank,'rank', speed,'speed' print convert_speeds_to_time(speed),'time' myplot.Cdf(cdf_old) myplot.Show() speeds_5059 = GetSpeeds_M5059(results) cdf_5059 = Cdf.MakeCdfFromList(speeds_5059) future_speed = cdf_5059.Value(rank) print future_speed,'speed' print convert_speeds_to_time(future_speed),'time' myplot.Cdf(cdf_5059) myplot.Show() fspeeds = GetSpeeds_F2039(results) cdf_female = Cdf.MakeCdfFromList(fspeeds) fspeed = cdf_female.Value(rank) print fspeed,'speed' print convert_speeds_to_time(fspeed),'time' myplot.Cdf(cdf_female) myplot.Show()
def MakeFigure(): fp = open('babyboom.dat') # skip to the beginning of the data for line in fp: if line.find('START DATA') != -1: break # read a list of times times = [] for line in fp: t = line.split() time = int(t[-1]) times.append(time) # compute interarrival times diffs = [times[0]] for i in range(len(times)-1): diff = times[i+1] - times[i] diffs.append(diff) n = len(diffs) mu = thinkstats.Mean(diffs) print 'mean interarrival time', mu cdf = Cdf.MakeCdfFromList(diffs, 'actual') sample = [random.expovariate(1/mu) for i in range(n)] model = Cdf.MakeCdfFromList(sample, 'model') myplot.Cdf(cdf) myplot.Save(root='interarrivals', title='Time between births', xlabel='minutes', ylabel='CDF', legend=False, formats=['eps', 'png', 'pdf']) myplot.Cdfs([cdf, model], complement=True) myplot.Save(root='interarrivals_model', title='Time between births', xlabel='minutes', ylabel='Complementary CDF', yscale='log', formats=['eps', 'png', 'pdf']) pyplot.subplots_adjust(bottom=0.11) myplot.Cdf(cdf, complement=True) myplot.Save(root='interarrivals_logy', title='Time between births', xlabel='minutes', ylabel='Complementary CDF', yscale='log', legend=False, formats=['eps', 'png', 'pdf'])
def main(): data_dir = '../chap1/' preg = survey.Pregnancies() preg.ReadRecords(data_dir) cdf = weight_cdf(preg) myplot.Cdf(cdf) myplot.show() sample = Sample(cdf, 1000) cdf_sample = Cdf.MakeCdfFromList(sample) myplot.Cdf(cdf_sample) myplot.Show()
def main(script, *args): data = ReadIncomeFile() hist, pmf, cdf = MakeIncomeDist(data) # plot the CDF on a log-x scale myplot.Cdf(cdf, root='income_logx', xscale='log') # plot the complementary CDF on a log-log scale myplot.Cdf(cdf, root='income_loglog', complement=True, xscale='log', yscale='log', show=True)
def observe_data(l, name=None, show=False): cdf = pmf = None if isinstance(l, list): cdf = Cdf.MakeCdfFromList(l,name+' cdf') pmf = Pmf.MakePmfFromList(l, name+' pmf') elif isinstance(l, Pmf.Pmf): pmf = l cdf = Cdf.MakeCdfFromPmf(l) if name is None: name = pmf.name elif isinstance(l, Cdf.Cdf): cdf = l if name is None: name = cdf.name else: raise Exception('input parameter type is wrong') v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75) mean = cdf.Mean() print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \ (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median)) if show: if pmf is not None: myplot.Pmf(pmf) myplot.Show() myplot.Cdf(cdf) myplot.Show()
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') # myplot.Clf() # myplot.Hist(pmf) # myplot.Save(root='observed_speeds', # title='PMF of running speed', # xlabel='speed (mph)', # ylabel='probability') # plot the biased distribution seen by the observer biased = BiasPmf(pmf, 7.5, name='observed speeds') myplot.Clf() myplot.Hist(biased) myplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = Cdf.MakeCdfFromPmf(biased) myplot.Clf() myplot.Cdf(cdf) myplot.show(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def PlotAges(resp): """Plot the distribution of ages.""" ages = [r.age for r in resp.records] cdf = Cdf.MakeCdfFromList(ages) myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def main(): # Exercise 3.6 myBirthWeight = 163 table = survey.Pregnancies() table.ReadRecords() unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1] liveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights, name="live birth weights") print("My birth weight percentile rank (vs all births): %d" % (100 * liveBirthWeightsCdf.Prob(myBirthWeight))) unfilteredNotFirstLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1 and p.birthord != 1 ] notFirstLiveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredNotFirstLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] notFirstLiveBirthWeightsCdf = Cdf.MakeCdfFromList( notFirstLiveBirthWeights, name="not first live birth weights") print("My birth weight percentile rank (vs first births): %d" % (100 * notFirstLiveBirthWeightsCdf.Prob(myBirthWeight))) myplot.Cdf(notFirstLiveBirthWeightsCdf) myplot.Show(title="not first live birth weight CDF", xlabel="birth weight oz", ylabel="probability")
def process(data): # Hist 分布图 hist = Pmf.MakeHistFromList(data, name='hist') myplot.Hist(hist, color='blue') myplot.Show() # Pmf 分布图 pmf = Pmf.MakePmfFromHist(hist, name='pmf') myplot.Pmf(pmf, color='yellow') myplot.Show() myplot.Clf() # 实际数据的CDF分布图 cdf = Cdf.MakeCdfFromList(data, name='loafs') myplot.Cdf(cdf) mu, var = thinkstats.MeanVar(data) sigma = math.sqrt(var) print("mu = %.3f, sigma = %.3f" % (mu, sigma)) # 正态分布 xs = normal_sample(len(data), mu, sigma) # xs = data ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs] myplot.Scatter(xs, ys, color='red', label='sample') myplot.Show()
def main(): babies = BabyBoom.Babies() babies.ReadRecords(data_dir='res', n=None) lastmin = 0 interval = [] for item in babies.records: interval.append(item.minutes - lastmin) lastmin = item.minutes cdf = Cdf.MakeCdfFromList(interval, name='baby interval') myplot.Cdf(cdf, complement=False, transform=None) myplot.Show() # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X) myplot.Clf() myplot.Cdf(cdf, complement=True, yscale='log') myplot.Show()
def main(): sz, alph, exem = 1000, 1.7, 100 lst = paretovariate(sz, alph, exem) lst_cdf = Cdf.MakeCdfFromList(lst) myplot.Clf() myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log') myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
def MakeCdfs(lens): cdf = Cdf.MakeCdfFromList(lens, 'slashdot') myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log') myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='slashdot.loglog', xlabel='Number of friends/foes', ylabel='CDF', xscale='log', yscale='log')
def MakeFigures(pool, firsts, others): """Creates several figures for the book.""" # CDF of all ages myplot.Clf() myplot.Cdf(pool.age_cdf) myplot.Save(root='agemodel_age_cdf', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF', legend=False) # CDF of all weights myplot.Clf() myplot.Cdf(pool.weight_cdf) myplot.Save(root='agemodel_weight_cdf', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF', legend=False) # plot CDFs of birth ages for first babies and others myplot.Clf() myplot.Cdfs([firsts.age_cdf, others.age_cdf]) myplot.Save(root='agemodel_age_cdfs', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF') myplot.Clf() myplot.Cdfs([firsts.weight_cdf, others.weight_cdf]) myplot.Save(root='agemodel_weight_cdfs', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF') # make a scatterplot of ages and weights ages, weights = GetAgeWeight(pool) pyplot.clf() #pyplot.scatter(ages, weights, alpha=0.2) pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r) myplot.Save(root='agemodel_scatter', xlabel='Age (years)', ylabel='Birth weight (oz)', legend=False)
def PlotPosteriorMeans(d, name): """Plots the CDF of the means of the posteriors. d: map from code to posterior Suite name: label for the cdf """ means = [item.Mean() for item in d.itervalues()] cdf = thinkbayes.MakeCdfFromList(means, name=name) myplot.Cdf(cdf)
def CheckCdf2(): """Compare chi2 values from the simulation with a chi-squared dist.""" df = 3 t = [SimulateChi2() for i in range(1000)] t2 = [scipy.stats.chi2.cdf(x, df) for x in t] cdf = Cdf.MakeCdfFromList(t2) myplot.Cdf(cdf) myplot.Show()
def main(): list = [100 * random.random() for i in range(1000)] pmf = Pmf.MakePmfFromList(list, name='pfm') cdf = Cdf.MakeCdfFromList(list, name='cdf') myplot.Pmf(pmf) myplot.Show() myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def main(): #Exercise 3.5 results = relay.ReadResults() speeds = relay.GetSpeeds(results) speedsCdf = Cdf.MakeCdfFromList(speeds, "race speeds") mplt.Cdf(speedsCdf) mplt.show(title="Race Speed CDF", xlabel="speed in mph", ylabel="probability")
def PlotPosterior(self, root=None, clf=False): if root: clf = True posterior = self.Cdf() myplot.Cdf(posterior, root=root, clf=clf, xlabel='# of taxa', ylabel='prob', legend=False)
def MakeExample(): """Make a simple example CDF.""" t = [2, 1, 3, 2, 5] cdf = Cdf.MakeCdfFromList(t) myplot.Cdf(cdf, root='example_cdf', title='CDF', xlabel='x', ylabel='CDF(x)', axis=[0, 6, 0, 1], legend=False)
def Q2(results): results.sort() # print(results) cdf = Cdf.MakeCdfFromList(results, name='cdf') myplot.Cdf(cdf) myplot.Show() p = [0.95, 0.99] for i in p: significant_value = cdf.Value(i) print("p:%4.2f significant_value = %d" % (i, significant_value)) pass
def CheckCdf(): """ """ xs, ys = Chi2Cdf(df=3, high=15) pyplot.plot(xs, ys) t = [SimulateChi2() for i in range(1000)] cdf = Cdf.MakeCdfFromList(t) myplot.Cdf(cdf) myplot.Show()
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds cdf = Cdf.MakeCdfFromList(speeds, 'speeds') myplot.Cdf(cdf, title='CDF of running speed', xlabel='speed (mph)', ylabel='probability', show=True)
def PlotMarginals(suite): """Plot the marginal distributions for a 2-D joint distribution.""" pmf_m, pmf_s = ComputeMarginals(suite) pyplot.clf() pyplot.figure(1, figsize=(7, 4)) pyplot.subplot(1, 2, 1) cdf_m = Cdf.MakeCdfFromPmf(pmf_m, 'mu') myplot.Cdf(cdf_m) pyplot.xlabel('Mean height (cm)') pyplot.ylabel('CDF') pyplot.subplot(1, 2, 2) cdf_s = Cdf.MakeCdfFromPmf(pmf_s, 'sigma') myplot.Cdf(cdf_s) pyplot.xlabel('Std Dev height (cm)') pyplot.ylabel('CDF') myplot.Save(root='bayes_height_marginals_%s' % suite.name)
def main(): pareto = paretovariate(1, 0.5) cdf = Cdf.MakeCdfFromList(pareto) myplot.Cdf(cdf) myplot.show() ccdf = ccdf_list(cdf) plt = myplot.pyplot plt.plot(cdf.Values(), ccdf) plt.xscale('log') plt.yscale('log') plt.show()
def main(script, *args): data = ReadIncomeFile() hist, pmf, cdf = MakeIncomeDist(data) # plot the CDF on a log-x scale myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='income_logx', xscale='log', xlabel='income', ylabel='CDF') # plot the complementary CDF on a log-log scale myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='income_loglog', complement=True, xscale='log', yscale='log', xlabel='income', ylabel='complementary CDF')
def MakeFigures(pool, firsts, others): """Creates several figures for the book.""" # plot PMFs of birth weights for first babies and others myplot.Clf() myplot.Hist(firsts.weight_pmf, linewidth=0, color='blue') myplot.Hist(others.weight_pmf, linewidth=0, color='orange') myplot.Save(root='nsfg_birthwgt_pmf', title='Birth weight PMF', xlabel='weight (ounces)', ylabel='probability') # plot CDFs of birth weights for first babies and others myplot.Clf() myplot.Cdf(firsts.weight_cdf, linewidth=2, color='blue') myplot.Cdf(others.weight_cdf, linewidth=2, color='orange') myplot.Save(root='nsfg_birthwgt_cdf', title='Birth weight CDF', xlabel='weight (ounces)', ylabel='probability', axis=[0, 200, 0, 1])
def main(): """when k=1 weibull would be liner""" weibull = weibullvariate(1, 1) cdf = Cdf.MakeCdfFromList(weibull) myplot.Cdf(cdf) myplot.show() ccdf = ccdf_list(cdf) plt = myplot.pyplot plt.plot(cdf.Values(), ccdf) #plt.xscale('log') plt.yscale('log') plt.show()
def main(): # Exercise 3.9 table = survey.Pregnancies() table.ReadRecords() unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1] liveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights, name="live birth weights") samepleListLiveBirthWeights = sample(liveBirthWeightsCdf, 1000) myplot.Cdf(Cdf.MakeCdfFromList(samepleListLiveBirthWeights)) myplot.show(title="CDF of live births resampled") # Exercise 3.10 randomList = [random.random() for x in range(1000)] myplot.Pmf(Pmf.MakePmfFromList(randomList)) myplot.show(title="random pmf") myplot.Cdf(Cdf.MakeCdfFromList(randomList)) myplot.Show(title="random cdf")
def main(): # 通过公式计算F(X) lamb = 1 / 32.6 xs = sorted([random.expovariate(lamb) for i in range(43)]) ys = [1 - pow(math.e, -lamb * x) for x in xs] myplot.Plot(xs, ys, label='formular') # 通过累计统计F(X) ---> 经验CDF cdf = Cdf.MakeCdfFromList(xs, name='expovariate') print("mu = ", cdf.Mean()) print("median = ", cdf.Percentile(50)) myplot.Cdf(cdf, complement=False, transform=None) myplot.Show()