def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) speeds = relay.BinData(speeds, 3, 12, 100) # plot the distribution of actual speeds pmf = thinkstats2.Pmf(speeds, 'actual speeds') # plot the biased distribution seen by the observer biased = ObservedPmf(pmf, 7.5, label='observed speeds') thinkplot.Pmf(biased) thinkplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='PMF') cdf = thinkstats2.Cdf(pmf) cdf_biased = thinkstats2.Cdf(biased) thinkplot.PrePlot(2) thinkplot.Cdfs([cdf, cdf_biased]) thinkplot.Save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='CDF')
def ComparePriors(constructors, labels, \ dataset = [60], largest_size = 1000): """Runs the likelihood of a train with number specified by dataset given a arbitrary number of priors. constructors = a list of anything that inherits from the Dice class labels = labels for these prior distributions dataset = the number of the train spotted largest_size = the assumed size of the largest train company out there """ thinkplot.Clf() thinkplot.PrePlot(num=2) for constructor, label in zip(constructors, labels): suite = MakePosterior(largest_size, dataset, constructor) suite.name = label print "Expected Value for {}".format(suite.name) print "\t {}: {}".format(largest_size, suite.Mean()) print("\t 90 percent Credibility Interval") interval = Percentile(suite, 5), Percentile(suite, 95) print '\t', interval thinkplot.Pmf(suite) thinkplot.Save(root='one_many_firm_comparison', xlabel='Number of trains', ylabel='Probability')
def MakePlot(self, root='redline4'): """Makes a plot showing the mixture.""" thinkplot.Clf() # plot the MetaPmf for pmf, prob in sorted(self.metapmf.Items()): cdf = pmf.MakeCdf().Scale(1.0 / 60) width = 2 / math.log(-math.log(prob)) thinkplot.Plot(cdf.xs, cdf.ps, alpha=0.2, linewidth=width, color='blue', label='') # plot the mixture and the distribution based on a point estimate thinkplot.PrePlot(2) #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60)) thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0 / 60)) thinkplot.Save(root=root, xlabel='Wait time (min)', ylabel='CDF', formats=FORMATS, axis=[0, 10, 0, 1])
def PlotSurvivalFunctions(sf_map, predict_flag=False, colormap=None): """Plot estimated survival functions. sf_map: map from group name to sequence of survival functions predict_flag: whether the lines are predicted or actual colormap: map from group name to color """ thinkplot.PrePlot(num=len(sf_map)) for name, sf_seq in sorted(sf_map.items(), reverse=True): if len(sf_seq) == 0: continue sf = sf_seq[0] if len(sf) == 0: continue ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90]) thinkplot.FillBetween(ts, rows[0], rows[2], color='gray', alpha=0.2) if not predict_flag: if colormap: color = colormap[name] thinkplot.Plot(ts, rows[1], label='%ds' % name, color=color) else: thinkplot.Plot(ts, rows[1], label='%ds' % name)
def main(): print("Single Die:") d6 = Die(6) print(d6) #use thinkbayes to simulate dice = [d6] * 3 three = SampleSum(dice, 5000) print("##################################") print("Three Die:") print(three) #use thinkbayes to enumerate three_exact = d6 + d6 + d6 print("##################################") print("Exact Three Die:") print(three_exact) # Use Allen Downey's thinkplot module to create a graph thinkplot.PrePlot(1) thinkplot.Plot(three) thinkplot.Plot(three_exact) thinkplot.Save(root='DD1', xlabel='Sum of 3 d6', ylabel='Probability', formats=['pdf']) print("Program Complete")
def MakeNormalModel(arrivalDelays): """Plot the CDF of arrival delays with a normal model. This is a modified copy from analytic.py """ # estimate parameters: trimming outliers yields a better fit mu, var = thinkstats2.TrimmedMeanVar(arrivalDelays, p=0.01) print('Mean, Var', mu, var) # plot the model sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5) thinkplot.Plot(xs, ps, label='model', color='0.8') # plot the data cdf = thinkstats2.Cdf(arrivalDelays, label='data') thinkplot.PrePlot(1) thinkplot.Cdf(cdf) thinkplot.Save(root='NormalModel_arrivaldelay_model', title='Arrival Delays', xlabel='arrival delays (min)', ylabel='CDF')
def MakeArrivalDepartureDelayScatterPlots(flights): """Make scatterplots. """ sample = thinkstats2.SampleRows(flights, 10000) # simple scatter plot thinkplot.PrePlot(cols=2) # departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample) # airports = sample.AIRLINE # arrivalDelays = sample.ARRIVAL_DELAY # ScatterPlot(airports, arrivalDelays) # scatter plot with jitter # thinkplot.SubPlot(2) departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample, hjitter=1.3, wjitter=0.5) thinkplot.Scatter(arrivalDelays, departureDelays, alpha=1) thinkplot.Config( xlabel='arrival delay (min)', ylabel='departure delay (min)', # axis=[-20, 20, 20, 200], legend=False) thinkplot.Save(root='ArrivalDepartureDelayScatterplot')
def MakePosteriorPlot(suite): """Plots the posterior marginal distributions for alpha and beta. suite: posterior joint distribution of location """ marginal_alpha = suite.Marginal(0) marginal_alpha.name = 'alpha' marginal_beta = suite.Marginal(1) marginal_beta.name = 'beta' print('alpha CI', marginal_alpha.CredibleInterval(50)) print('beta CI', marginal_beta.CredibleInterval(50)) thinkplot.PrePlot(num=2) #thinkplot.Pmf(marginal_alpha) #thinkplot.Pmf(marginal_beta) thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_alpha)) thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_beta)) thinkplot.Save('paintball2', xlabel='Distance', ylabel='Prob', loc=4, formats=FORMATS)
def PlotCdf(cdf): """Plots the actual and fitted distributions. cdf: CDF object """ xs, ps = cdf.xs, cdf.ps cps = [1-p for p in ps] # CCDF on logy scale: shows exponential behavior thinkplot.Clf() thinkplot.Plot(xs, cps, 'bo-') thinkplot.Save(root='kidney1', formats=FORMATS, xlabel='RDT', ylabel='CCDF (log scale)', yscale='log') # CDF, model and data thinkplot.Clf() thinkplot.PrePlot(num=2) mxs, mys = ModelCdf() thinkplot.Plot(mxs, mys, label='model', linestyle='dashed') thinkplot.Plot(xs, ps, 'gs', label='data') thinkplot.Save(root='kidney2', formats=FORMATS, xlabel='RDT (volume doublings per year)', ylabel='CDF', title='Distribution of RDT', axis=[-2, 7, 0, 1], loc=4)
def PlotCoefVariation(suites): """Plot the posterior distributions for CV. suites: map from label to Pmf of CVs. """ thinkplot.Clf() thinkplot.PrePlot(num=2) pmfs = {} for label, suite in suites.items(): pmf = CoefVariation(suite) print('CV posterior mean', pmf.Mean()) cdf = thinkbayes.MakeCdfFromPmf(pmf, label) thinkplot.Cdf(cdf) pmfs[label] = pmf thinkplot.Save(root='variability_cv', xlabel='Coefficient of variation', ylabel='Probability') print('female bigger', thinkbayes.PmfProbGreater(pmfs['female'], pmfs['male'])) print('male bigger', thinkbayes.PmfProbGreater(pmfs['male'], pmfs['female']))
def PlotSurvivalFunctions(sf_map, predict_flag=False): """Plot estimated survival functions. sf_map: map from group name to sequence of survival functions predict_flag: whether the lines are predicted or actual """ thinkplot.PrePlot(len(sf_map)) for name, sf_seq in sorted(sf_map.items(), reverse=True): if len(sf_seq) == 0: continue sf = sf_seq[0] if len(sf) == 0: continue ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90]) thinkplot.FillBetween(ts, rows[0], rows[2], color='gray') if not predict_flag: thinkplot.Plot(ts, rows[1], label='19%d'%name) thinkplot.Config(xlabel='age (years)', ylabel='prob unmarried', xlim=[14, 45], ylim=[0, 1], legend=True, loc='upper right')
def PlotRemainingLifetime(sf1, sf2): """Plots remaining lifetimes for pregnancy and age at first marriage. sf1: SurvivalFunction for pregnancy length sf2: SurvivalFunction for age at first marriage """ thinkplot.PrePlot(cols=2) rem_life1 = sf1.RemainingLifetime() thinkplot.Plot(rem_life1) thinkplot.Config(title='remaining pregnancy length', xlabel='weeks', ylabel='mean remaining weeks') thinkplot.SubPlot(2) func = lambda pmf: pmf.Percentile(50) rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func) thinkplot.Plot(rem_life2) thinkplot.Config(title='years until first marriage', ylim=[0, 15], xlim=[11, 31], xlabel='age (years)', ylabel='median remaining years') thinkplot.Save(root='survival6', formats=FORMATS)
def MakePlots(player1, player2): """Generates two plots. price1 shows the priors for the two players price2 shows the distribution of diff for the two players """ # plot the prior distribution of price for both players MakePrice1(player1, player2) thinkplot.Save(root='price1', xlabel='price ($)', ylabel='PDF', formats=FORMATS) # plot the historical distribution of underness for both players thinkplot.Clf() thinkplot.PrePlot(num=2) cdf1 = player1.CdfDiff() cdf1.name = 'player 1' cdf2 = player2.CdfDiff() cdf2.name = 'player 2' print('Player median', cdf1.Percentile(50)) print('Player median', cdf2.Percentile(50)) print('Player 1 overbids', player1.ProbOverbid()) print('Player 2 overbids', player2.ProbOverbid()) thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(root='price2', xlabel='diff ($)', ylabel='CDF', formats=FORMATS)
def main(): euro = Euro(range(101)) euro.label = "Uniform prior" euro_triangleprior = Euro(range(101), triangle_prior=True) euro_triangleprior.label = "Triangle prior" for data in range(140): euro.Update('H') euro_triangleprior.Update('H') for data in range(110): euro.Update('T') euro_triangleprior.Update('T') print("Summary for uniform prior: ") summarize_posterior(euro) print("Summary for triangle prior: ") summarize_posterior(euro_triangleprior) # Use Allen Downey's thinkplot module to create a graph thinkplot.PrePlot(1) thinkplot.Plot(euro) thinkplot.Plot(euro_triangleprior) thinkplot.Save(root='euro2', xlabel='Bias of heads vs. tails', ylabel='Probability', formats=['pdf'])
def main(): # d6 = Die(6) # d8 = Die(8) d6 = Pmf() print type(d6) d6.Set(Die(6), 2) print type(d6) d8 = Pmf() d8.Set(Die(8), 3) d12 = Pmf() d12.Set(Die(12), 1) d20 = Pmf() d20.Set(Die(20), 1) mix = Pmf() for dice in [d6, d8, d12, d20]: # print type(dice) for die, weight in dice.Items(): # print type(die) for outcome, prob in die.Items(): mix.Incr(outcome, weight * prob) mix.Normalize() thinkplot.PrePlot(1) thinkplot.Pmf(mix) thinkplot.Save(root='dice_Mix_self2', xlabel='', ylabel='Probability', formats=['pdf'])
def CH3_2(): """ 火车头问题(Train) 有一天看到一个编号60的火车头经过, 论共有多少个火车头? 假设 上限 N = 1000, 500, 2000 猜测结果对上限敏感 实际N个火车头, 假设看到了60号火车头 1 1/N 0 2 1/N 0 ... ... ... 59 1/N 0 60 1/N 1/60 61 1/N 1/61 ... ... ... 1000 1/N 1/1000 """ # 假设有1 - 1000个编号的火车头 N = 1000 hypoes = range(1, N) suite = Train(hypoes) suite.Update(60) thinkplot.PrePlot(num=1) thinkplot.Pmf(suite) thinkplot.Show(title='Train', xlabel='Number of trains', ylabel='Probability') print(suite.Mean())
def PlotOptimalBid(): """Plots optimal bid vs estimated price. """ player1, player2 = MakePlayers() guesses = numpy.linspace(15000, 60000, 21) res = [] for guess in guesses: player1.MakeBeliefs(guess) mean = player1.posterior.Mean() mle = player1.posterior.MaximumLikelihood() calc = GainCalculator(player1, player2) bids, gains = calc.ExpectedGains() gain, bid = max(zip(gains, bids)) res.append((guess, mean, mle, gain, bid)) guesses, means, _mles, gains, bids = zip(*res) thinkplot.PrePlot(num=3) pyplot.plot([15000, 60000], [15000, 60000], color='gray') thinkplot.Plot(guesses, means, label='mean') #thinkplot.Plot(guesses, mles, label='MLE') thinkplot.Plot(guesses, bids, label='bid') thinkplot.Plot(guesses, gains, label='gain') thinkplot.Save(root='price6', xlabel='guessed price ($)', formats=FORMATS)
def main(script, filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(num=6, rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf, color='C0', label=filename) thinkplot.Config(title='CDF on linear scale', ylabel='CDF') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log', color='C0') thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential', color='C0') thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys, color='C0') thinkplot.Config(title='Normal probability plot', xlabel='random normal', ylabel='data') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto', color='C0') thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull', color='C0') thinkplot.Config(title='CCDF on loglog-y log-x scale', ylabel='log log CCDF', **scale) thinkplot.Show(legend=False)
def main(script, filename='mystery0.dat'): data = ReadFile(filename) cdf = thinkstats2.Cdf(data) thinkplot.PrePlot(rows=2, cols=3) thinkplot.SubPlot(1) thinkplot.Cdf(cdf) thinkplot.Config(title='linear') thinkplot.SubPlot(2) scale = thinkplot.Cdf(cdf, xscale='log') thinkplot.Config(title='logx', **scale) thinkplot.SubPlot(3) scale = thinkplot.Cdf(cdf, transform='exponential') thinkplot.Config(title='expo', **scale) thinkplot.SubPlot(4) xs, ys = thinkstats2.NormalProbability(data) thinkplot.Plot(xs, ys) thinkplot.Config(title='normal') thinkplot.SubPlot(5) scale = thinkplot.Cdf(cdf, transform='pareto') thinkplot.Config(title='pareto', **scale) thinkplot.SubPlot(6) scale = thinkplot.Cdf(cdf, transform='weibull') thinkplot.Config(title='weibull', **scale) thinkplot.Show(legend=False)
def Specific_Character(House, Gender, Class, ksweep, lamsweep, Title=''): """Knits many function together to produce a prediction for a given house, gender and class The house can be any key in hd, class can be 'Noble' or 'Small' or 'All' , and the gender can be 'M' or 'F' or 'All'. This also needs to make a linspace for k and lambda, so ksweep and lsweep are lists of the form [lower limit, upper limit, number of points]. You can also choose what to title your graph.""" hd = PrepData() #Get the data alive, dead = char_lists(hd, House, Gender, Class) #Sort by alive/dead for given attributes introductions, lifetimes = ages(alive, dead) #Get ages and lifespans sf, haz = SurvivalHaz(introductions, lifetimes) #Use kaplan-meyer lam = thinkbayes2.MakeUniformPmf(lamsweep[0], lamsweep[1], lamsweep[2]) #Our uniform priors k = thinkbayes2.MakeUniformPmf(ksweep[0], ksweep[1], ksweep[2]) k, lam = MakeDistr(introductions, lifetimes, k, lam) #Get our posterior thinkplot.PrePlot(2) thinkplot.Pdfs([k, lam]) plt.xlabel('Value') plt.ylabel('Probability') plt.title('Posterior Distributions') print('If these distributions look chopped off, adjust kweep and lsweep') thinkplot.Show() mk = k.Mean() ml = lam.Mean() kl, kh = k.Percentile(5), k.Percentile(95) ll, lh = lam.Percentile(5), lam.Percentile(95) CredIntPlt(sf, kl, kh, ll, lh, House, mk, ml, Title) plt.show()
def class_sizes(): """Generate PMFs of observed and actual class size. """ # start with the actual distribution of class sizes from the book d = {7: 8, 12: 8, 17: 14, 22: 4, 27: 6, 32: 12, 37: 8, 42: 3, 47: 2} # form the pmf pmf = thinkstats2.Pmf(d, label='actual') print('mean', pmf.Mean()) print('var', pmf.Var()) # compute the biased pmf biased_pmf = bias_pmf(pmf, label='observed') print('mean', biased_pmf.Mean()) print('var', biased_pmf.Var()) # unbias the biased pmf unbiased_pmf = unbias_pmf(biased_pmf, label='unbiased') print('mean', unbiased_pmf.Mean()) print('var', unbiased_pmf.Var()) # plot the Pmfs thinkplot.PrePlot(2) thinkplot.Pmfs([pmf, biased_pmf]) thinkplot.Save(root='class_size1', xlabel='class size', ylabel='PMF', axis=[0, 52, 0, 0.27])
def PlotResiduals(live): """Plots percentiles of the residuals. live: DataFrame """ ages = live.agepreg weights = live.totalwgt_lb inter, slope = thinkstats2.LeastSquares(ages, weights) live['residual'] = thinkstats2.Residuals(ages, weights, inter, slope) bins = np.arange(10, 48, 3) indices = np.digitize(live.agepreg, bins) groups = live.groupby(indices) ages = [group.agepreg.mean() for _, group in groups][1:-1] cdfs = [thinkstats2.Cdf(group.residual) for _, group in groups][1:-1] thinkplot.PrePlot(3) for percent in [75, 50, 25]: weights = [cdf.Percentile(percent) for cdf in cdfs] label = '%dth' % percent thinkplot.Plot(ages, weights, label=label) thinkplot.Save(root='linear2', xlabel='age (years)', ylabel='residual (lbs)', xlim=[10, 45])
def PlotPercentileLines(x_means, cdfs, **options): thinkplot.PrePlot(3) for percent in [75, 50, 25]: y_percentiles = [cdf.Percentile(percent) for cdf in cdfs] label = '%dth' % percent thinkplot.Plot(x_means, y_percentiles, label=label) thinkplot.Show(**options)
def CH3_4(): """ 不同先验计算后验 Train: 先验概率为 均匀分布uniform Train2: 先验概率为 指数分布power law alpha = 1: +-------------------------------------+ | 先验概率(未归一化) | | 1 1/1 0 | | 2 1/2 0 | | ... ... ... | | 59 1/59 0 | | 60 1/60 1/60 | | 61 1/61 1/61 | | ... ... | | 1000 1/1000 1/1000 | +-------------------------------------+ """ dataset = [60] N = 1000 def _makePosterior(uppernum, constructor, dataset): """ 根据构造器函数和数据集, 生成后验概率suite """ suite = constructor(range(1, uppernum)) for data in dataset: suite.Update(data) return suite thinkplot.Clf() thinkplot.PrePlot(num=2) constructors = [Train, Train2] labels = ['uniform', 'power law'] for constructor, label in zip(constructors, labels): suite = _makePosterior(N, constructor, dataset) suite.name = label thinkplot.Pmf(suite) thinkplot.Show(title='compare priors', xlabel='Number of trains', ylabel='prob') dataset = [60, 30, 90] for constructor, label in zip(constructors, labels): for n in [500, 1000, 2000]: suite = _makePosterior(n, constructor, dataset) # 单点估计 print("%s: n = %d, mu = %.3f" % (label, n, suite.Mean())) # 后验概率的置信区间5% - 95% (see CH3_5) interval = thinkbayes.Percentile(suite, 5), thinkbayes.Percentile(suite, 95) print(interval) # CH3_6, 累计分布函数计算百分位数 if n == 2000: cdf = thinkbayes.MakeCdfFromPmf(suite) interval = cdf.Percentile(5), cdf.Percentile(95) print("MakeCdfFromPmf:", interval)
def EstimateMarriageSurvivalByDecade(groups, **options): """Groups respondents by decade and plots survival curves. groups: GroupBy object """ thinkplot.PrePlot(len(groups)) for _, group in groups: _, sf = EstimateMarriageSurvival(group) thinkplot.Plot(sf, **options)
def AddLabelsByDecade(groups, **options): """Draws fake points in order to add labels to the legend. groups: GroupBy object """ thinkplot.PrePlot(len(groups)) for name, _ in groups: label = '%d0s' % name thinkplot.Plot([15], [1], label=label, **options)
def MakeCdfs(male, female): malecdf = thinkstats2.Cdf(male.totalwgt_lb, label='Male') femalecdf = thinkstats2.Cdf(female.totalwgt_lb, label='Female') thinkplot.PrePlot(2) thinkplot.Cdfs([malecdf, femalecdf]) thinkplot.Config(xlabel='Baby Weight (Lbs)', ylabel='CDF', title='Baby Weights') thinkplot.Show()
def MakePrice1(player1, player2): """ plot the prior distribution of price for both players""" thinkplot.Clf() thinkplot.PrePlot(num=2) pmf1 = player1.PmfPrice() pmf1.name = 'showcase 1' pmf2 = player2.PmfPrice() pmf2.name = 'showcase 2' thinkplot.Pmfs([pmf1, pmf2])
def MakeCdfs(male, female): malecdf = thinkstats2.Cdf(male.alcwknd, label='Male') femalecdf = thinkstats2.Cdf(female.alcwknd, label='Female') thinkplot.PrePlot(2) thinkplot.Cdfs([malecdf, femalecdf]) thinkplot.Config(xlabel='Alcohol Consumed (grams)', ylabel='CDF', title='Weekend Alcohol Consumption') thinkplot.Show()
def main(): hypos = range(1, 1001) suite = Train(hypos) suite.Update(60) print(suite.Mean()) thinkplot.PrePlot(1) thinkplot.Pmf(suite)