def Main(): Ratio() return pmf1 = Pmf.Pmf() for x in range(0, 101): pmf1.Set(x, 1) pmf1.Normalize() pmf2 = TrianglePrior() # plot the priors myplot.Clf() myplot.Pmfs([pmf1, pmf2]) myplot.Save(root='simple_coin_both_prior', title='Biased coin', xlabel='x', ylabel='Probability') RunUpdate(pmf1) RunUpdate(pmf2) # plot the posterior distributions myplot.Clf() myplot.Pmfs([pmf1, pmf2]) myplot.Save(root='simple_coin_both_post', title='Biased coin', xlabel='x', ylabel='Probability')
def Main(): truth = ReadTruth() truth_map = {} for pcode, label in truth: truth_map[pcode] = label labels = ReadLabels() photo_map, labeler_map = MakeObjects(labels) RunUpdates(photo_map, labeler_map, labels) yes = [] no = [] for pcode, photo in photo_map.iteritems(): if pcode in truth_map: mean = photo.Mean() if truth_map[pcode] == '1': yes.append(mean) else: no.append(mean) myplot.Clf() cdf_yes = thinkbayes.MakeCdfFromList(yes, name='yes') cdf_no = thinkbayes.MakeCdfFromList(no, name='no') myplot.Cdfs([cdf_yes, cdf_no]) myplot.Show() return myplot.Clf() PlotPosteriorMeans(photo_map, 'photos') PlotPosteriorMeans(labeler_map, 'labelers') myplot.Show()
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') # myplot.Clf() # myplot.Hist(pmf) # myplot.Save(root='observed_speeds', # title='PMF of running speed', # xlabel='speed (mph)', # ylabel='probability') # plot the biased distribution seen by the observer biased = BiasPmf(pmf, 7.5, name='observed speeds') myplot.Clf() myplot.Hist(biased) myplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = Cdf.MakeCdfFromPmf(biased) myplot.Clf() myplot.Cdf(cdf) myplot.show(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_d = d[1] lady_d = d[2] # 男性的mu, var, sigma, 变异系数CV man_mu, man_var = thinkstats.TrimmedMeanVar(man_d) man_sigma = math.sqrt(man_var) man_cv = man_sigma/man_mu print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv)) # 女性的mu, var, sigma, 变异系数CV lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d) lady_sigma = math.sqrt(lady_var) lady_cv = lady_sigma/lady_mu print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv)) # 男性, 女性Hist分布 man_hist = Pmf.MakeHistFromList(man_d, name='man hist') myplot.Hist(man_hist) myplot.Show() myplot.Clf() lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist') myplot.Hist(lady_hist) myplot.Show() myplot.Clf() # 男性, 女性Pmf分布 man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf') myplot.Pmf(man_pmf) myplot.Show() myplot.Clf() lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf') myplot.Pmf(lady_pmf) myplot.Show() myplot.Clf() # 男性/女性Cdf累积分布 man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf') lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf') myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None) myplot.Show()
def process(data): # Hist 分布图 hist = Pmf.MakeHistFromList(data, name='hist') myplot.Hist(hist, color='blue') myplot.Show() # Pmf 分布图 pmf = Pmf.MakePmfFromHist(hist, name='pmf') myplot.Pmf(pmf, color='yellow') myplot.Show() myplot.Clf() # 实际数据的CDF分布图 cdf = Cdf.MakeCdfFromList(data, name='loafs') myplot.Cdf(cdf) mu, var = thinkstats.MeanVar(data) sigma = math.sqrt(var) print("mu = %.3f, sigma = %.3f" % (mu, sigma)) # 正态分布 xs = normal_sample(len(data), mu, sigma) # xs = data ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs] myplot.Scatter(xs, ys, color='red', label='sample') myplot.Show()
def PlotAges(resp): """Plot the distribution of ages.""" ages = [r.age for r in resp.records] cdf = Cdf.MakeCdfFromList(ages) myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def main(): sz, alph, exem = 1000, 1.7, 100 lst = paretovariate(sz, alph, exem) lst_cdf = Cdf.MakeCdfFromList(lst) myplot.Clf() myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log') myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
def MakeCdfs(lens): cdf = Cdf.MakeCdfFromList(lens, 'slashdot') myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log') myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='slashdot.loglog', xlabel='Number of friends/foes', ylabel='CDF', xscale='log', yscale='log')
def MakeFigures(pool, firsts, others): """Creates several figures for the book.""" # CDF of all ages myplot.Clf() myplot.Cdf(pool.age_cdf) myplot.Save(root='agemodel_age_cdf', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF', legend=False) # CDF of all weights myplot.Clf() myplot.Cdf(pool.weight_cdf) myplot.Save(root='agemodel_weight_cdf', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF', legend=False) # plot CDFs of birth ages for first babies and others myplot.Clf() myplot.Cdfs([firsts.age_cdf, others.age_cdf]) myplot.Save(root='agemodel_age_cdfs', title="Distribution of mother's age", xlabel='age (years)', ylabel='CDF') myplot.Clf() myplot.Cdfs([firsts.weight_cdf, others.weight_cdf]) myplot.Save(root='agemodel_weight_cdfs', title="Distribution of birth weight", xlabel='birth weight (oz)', ylabel='CDF') # make a scatterplot of ages and weights ages, weights = GetAgeWeight(pool) pyplot.clf() #pyplot.scatter(ages, weights, alpha=0.2) pyplot.hexbin(ages, weights, cmap=matplotlib.cm.gray_r) myplot.Save(root='agemodel_scatter', xlabel='Age (years)', ylabel='Birth weight (oz)', legend=False)
def Resample(cdf, n=10000): sample = cdf.Sample(n) new_cdf = Cdf.MakeCdfFromList(sample, 'resampled') myplot.Clf() myplot.Cdfs([cdf, new_cdf]) myplot.Save(root='resample_cdf', title='CDF', xlabel='weight in oz', ylabel='CDF(x)')
def main(): list = [100 * random.random() for i in range(1000)] pmf = Pmf.MakePmfFromList(list, name='pfm') cdf = Cdf.MakeCdfFromList(list, name='cdf') myplot.Pmf(pmf) myplot.Show() myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') observed = BiasPmf(pmf, 7.5, 'observed speeds') myplot.Clf() myplot.Hist(observed) myplot.Show(title='observed speeds', xlabel='speed (mph)', ylabel='probability')
def main(script, *args): data = ReadIncomeFile() hist, pmf, cdf = MakeIncomeDist(data) # plot the CDF on a log-x scale myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='income_logx', xscale='log', xlabel='income', ylabel='CDF') # plot the complementary CDF on a log-log scale myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='income_loglog', complement=True, xscale='log', yscale='log', xlabel='income', ylabel='complementary CDF')
def MakeExample(): """Make a simple example CDF.""" t = [2, 1, 3, 2, 5] cdf = Cdf.MakeCdfFromList(t) myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='example_cdf', title='CDF', xlabel='x', ylabel='CDF(x)', axis=[0, 6, 0, 1], legend=False)
def MakeFigures(pool, firsts, others): """Creates several figures for the book.""" # plot PMFs of birth weights for first babies and others myplot.Clf() myplot.Hist(firsts.weight_pmf, linewidth=0, color='blue') myplot.Hist(others.weight_pmf, linewidth=0, color='orange') myplot.Save(root='nsfg_birthwgt_pmf', title='Birth weight PMF', xlabel='weight (ounces)', ylabel='probability') # plot CDFs of birth weights for first babies and others myplot.Clf() myplot.Cdf(firsts.weight_cdf, linewidth=2, color='blue') myplot.Cdf(others.weight_cdf, linewidth=2, color='orange') myplot.Save(root='nsfg_birthwgt_cdf', title='Birth weight CDF', xlabel='weight (ounces)', ylabel='probability', axis=[0, 200, 0, 1])
def MakeFigures(exam, alice, bob): formats = ['png'] myplot.Pmf(exam.prior, label='prior') myplot.Save(root='sat_prior', formats=formats, xlabel='p', ylabel='PMF') myplot.Clf() myplot.Pmfs([alice, bob]) myplot.Save(root='sat_posterior', formats=formats, xlabel='p', ylabel='PMF')
def MakeFigures(): pops = populations.ReadData() print len(pops) cdf = Cdf.MakeCdfFromList(pops, 'populations') myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='populations', title='City/Town Populations', xlabel='population', ylabel='CDF', legend=False) myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='populations_logx', title='City/Town Populations', xlabel='population', ylabel='CDF', xscale='log', legend=False) myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save(root='populations_loglog', title='City/Town Populations', xlabel='population', ylabel='Complementary CDF', yscale='log', xscale='log', legend=False) t = [math.log(x) for x in pops] t.sort() rankit.MakeNormalPlot(t, 'populations_rankit')
def PlotCdfs(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = Cdf.MakeCdfFromList(outliers, label) cdfs.append(cdf) myplot.Clf() myplot.Cdfs(cdfs) myplot.Save(root='bayes_height_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def MakeFigures(pmf, biased_pmf): """Makes figures showing the CDF of the biased and unbiased PMFs""" cdf = Cdf.MakeCdfFromPmf(pmf, 'unbiased') print('unbiased median', cdf.Percentile(50)) print('percent < 100', cdf.Prob(100)) print('percent < 1000', cdf.Prob(1000)) biased_cdf = Cdf.MakeCdfFromPmf(biased_pmf, 'biased') print('biased median', biased_cdf.Percentile(50)) myplot.Clf() myplot.Cdfs([cdf, biased_cdf]) myplot.Save(root='slashdot.logx', xlabel='Number of friends/foes', ylabel='CDF', xscale='log')
def main(): babies = BabyBoom.Babies() babies.ReadRecords(data_dir='res', n=None) lastmin = 0 interval = [] for item in babies.records: interval.append(item.minutes - lastmin) lastmin = item.minutes cdf = Cdf.MakeCdfFromList(interval, name='baby interval') myplot.Cdf(cdf, complement=False, transform=None) myplot.Show() # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X) myplot.Clf() myplot.Cdf(cdf, complement=True, yscale='log') myplot.Show()
def CmpNormalModelWithDataSample(): firsts, others, babies = Babies.PartitionBabies() weights = Babies.GetWightList(babies) pmf = Pmf.MakePmfFromList(weights) mu = pmf.Mean() var = pmf.Var(mu) sigma = math.sqrt(var) print("mu = {}, var = {}, sigma = {}".format(mu, var, sigma)) # 经验分布, 数据 cdf = Cdf.MakeCdfFromPmf(pmf, name='data') myplot.cdf(cdf) # u, sigma --> 误差函数计算 模型 xs, yy = pmf.Render() ys = [erf.NormalCdf(x, mu, sigma) for x in xs] myplot.Plot(xs, ys, label='Model') myplot.Show() myplot.Clf()
def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25): """Makes a figure showing the CDF of height in ParetoWorld. Compared to a normal distribution. xmin: parameter of the Pareto distribution alpha: parameter of the Pareto distribution mu: parameter of the Normal distribution sigma: parameter of the Normal distribution """ t1 = [xmin * random.paretovariate(alpha) for i in range(10000)] cdf1 = Cdf.MakeCdfFromList(t1, name='pareto') t2 = [random.normalvariate(mu, sigma) for i in range(10000)] cdf2 = Cdf.MakeCdfFromList(t2, name='normal') myplot.Clf() myplot.Cdfs([cdf1, cdf2]) myplot.Save(root='pareto_world2', title='Pareto World', xlabel='height (cm)', ylabel='CDF')
# Example 3-10 import random, Pmf, Cdf, myplot size = 10000 lst = [random.random() for i in range(size)] lst_pmf = Pmf.MakePmfFromList(lst) lst_cdf = Cdf.MakeCdfFromList(lst) myplot.Clf() myplot.Pmf(lst_pmf) myplot.Show(title='PMF of {0} randoms'.format(size)) myplot.Clf() myplot.Cdf(lst_cdf) myplot.Show(title='CDF of {0} randoms'.format(size)) # yes, the distribution is uniform