def PlotJointDist(self): """Makes a pcolor plot of the age-size joint distribution.""" thinkplot.Clf() joint = self.cache.GetDistAgeSize() thinkplot.Contour(joint, contour=False, pcolor=True) thinkplot.Save(root='kidney8', formats=FORMATS, axis=[0, 41, -0.7, 1.31], yticks=MakeLogTicks([0.2, 0.5, 1, 2, 5, 10, 20]), xlabel='ages', ylabel='diameter (cm, log scale)')
def MakePlots(player1, player2): """Generates two plots. price1 shows the priors for the two players price2 shows the distribution of diff for the two players """ # plot the prior distribution of price for both players thinkplot.Clf() thinkplot.PrePlot(num=2) pmf1 = player1.PmfPrice() pmf1.label = 'showcase 1' pmf2 = player2.PmfPrice() pmf2.label = 'showcase 2' thinkplot.Pdfs([pmf1, pmf2]) thinkplot.Save(root='price1', xlabel='price ($)', ylabel='PDF', formats=FORMATS) # plot the historical distribution of underness for both players thinkplot.Clf() thinkplot.PrePlot(num=2) cdf1 = player1.CdfDiff() cdf1.label = 'player 1' cdf2 = player2.CdfDiff() cdf2.label = 'player 2' print('Player median', cdf1.Percentile(50)) print('Player median', cdf2.Percentile(50)) print('Player 1 overbids', player1.ProbOverbid()) print('Player 2 overbids', player2.ProbOverbid()) thinkplot.Cdfs([cdf1, cdf2]) thinkplot.Save(root='price2', xlabel='diff ($)', ylabel='CDF', formats=FORMATS)
def MakeParetoCdf(): """Generates a plot of the Pareto CDF.""" xmin = 0.5 thinkplot.PrePlot(3) for alpha in [2.0, 1.0, 0.5]: xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100) thinkplot.Plot(xs, ps, label='alpha=%g' % alpha) thinkplot.Save(root='analytic_pareto_cdf', title='Pareto CDF', xlabel='x', ylabel='CDF')
def main(): hypos = xrange(1, 1001) suite = Train(hypos) suite.Update(60) print(suite.Mean()) thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Save(root='train1', xlabel='Number of trains', ylabel='Probability', formats=['pdf', 'eps'])
def PlotSuites(suites, root): """Plots two suites. suite1, suite2: Suite objects root: string filename to write """ thinkplot.Clf() thinkplot.PrePlot(len(suites)) thinkplot.Pmfs(suites) thinkplot.Save(root=root, xlabel='x', ylabel='Probability', formats=['pdf', 'eps'])
def MakeExpoCdf(): """Generates a plot of the exponential CDF.""" thinkplot.PrePlot(3) for lam in [2.0, 1, 0.5]: xs, ps = thinkstats2.RenderExpoCdf(lam, 0, 3.0, 50) label = r'$\lambda=%g$' % lam thinkplot.Plot(xs, ps, label=label) thinkplot.Save(root='analytic_expo_cdf', title='Exponential CDF', xlabel='x', ylabel='CDF')
def main(name): thinkstats2.RandomSeed(18) transactions = ReadData() dailies = GroupByQualityAndDay(transactions) PlotDailies(dailies) RunModels(dailies) PrintSerialCorrelations(dailies) MakeAcfPlot(dailies) name = 'high' daily = dailies[name] PlotLinearModel(daily, name) PlotRollingMean(daily, name) PlotFilled(daily, name) years = np.linspace(0, 5, 101) thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name) PlotPredictions(daily, years) xlim = years[0] - 0.1, years[-1] + 0.1 thinkplot.Save(root='timeseries4', title='predictions', xlabel='years', xlim=xlim, ylabel='price per gram ($)') name = 'medium' daily = dailies[name] thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name) PlotIntervals(daily, years) PlotPredictions(daily, years) xlim = years[0] - 0.1, years[-1] + 0.1 thinkplot.Save(root='timeseries5', title='predictions', xlabel='years', xlim=xlim, ylabel='price per gram ($)')
def ComputeSkewnesses(): def VertLine(x, y): thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1) live, firsts, others = first.MakeFrames() data = live.totalwgt_lb.dropna() print('Birth weight') mean, median = Summarize(data) y = 0.35 VertLine(mean, y) thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right') VertLine(median, y) thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='birth weight') thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF') df = brfss.ReadBrfss(nrows=None) data = df.wtkg2.dropna() print('Adult weight') mean, median = Summarize(data) y = 0.02499 VertLine(mean, y) thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left') VertLine(median, y) thinkplot.Text(median - 1.5, 0.1 * y, 'median', horizontalalignment='right') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='adult weight') thinkplot.Save(root='density_wtkg2_kde', xlabel='kg', ylabel='PDF', xlim=[0, 200])
def ScatterPlot(root, heights, weights, alpha=1.0): """Make a scatter plot and save it. root: string filename root heights: sequence of float weights: sequence of float alpha: float """ thinkplot.Scatter(heights, weights, alpha=alpha) thinkplot.Save(root=root, xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def PlotPriorDist(pmf): """Plot the prior distribution of p_correct. pmf: prior """ thinkplot.Clf() thinkplot.PrePlot(num=1) cdf1 = thinkbayes.MakeCdfFromPmf(pmf, 'prior') thinkplot.Cdf(cdf1) thinkplot.Save(root='sat_prior', xlabel='p_correct', ylabel='CDF', formats=['pdf', 'eps'])
def HexBin(root, heights, weights, bins=None): """Make a hexbin plot and save it. root: string filename root heights: sequence of float weights: sequence of float bins: 'log' or None for linear """ thinkplot.HexBin(heights, weights, bins=bins) thinkplot.Save(root=root, xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def PlotPmfs(self, root='redline0'): """Plots the computed Pmfs. root: string """ pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60) thinkplot.Clf() thinkplot.PrePlot(2) thinkplot.Pmfs(pmfs) thinkplot.Save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS)
def main(): suite = Train(range(0, 101)) trainNum = [6, 10, 20, 12, 15, 19, 4, 10] # suite.Update(60) for TN in trainNum: suite.Update(TN) suite.Print() thinkplot.PrePlot(1) thinkplot.Pmf(suite) thinkplot.Save(root='train_self1', xlabel='Number of trains', ylabel='Probability', formats=['pdf'])
def main(): PlotMarriageData() return preg = nsfg.ReadFemPreg() print('Number of pregnancies', len(preg)) complete = preg.query('outcome in [1, 3, 4]').prglngth print('Number of complete pregnancies', len(complete)) ongoing = preg[preg.outcome == 6].prglngth print('Number of ongoing pregnancies', len(ongoing)) PlotSurvival(complete) thinkplot.Save(root='survival1', xlabel='t (weeks)')
def main(script): thinkstats2.RandomSeed(17) live, firsts, others = first.MakeFrames() live = live.dropna(subset=['agepreg', 'totalwgt_lb']) BinnedPercentiles(live) ages = live.agepreg weights = live.totalwgt_lb print('thinkstats2 Corr', thinkstats2.Corr(ages, weights)) print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(ages, weights)) ScatterPlot(ages, weights, alpha=0.1) thinkplot.Save(root='chap07scatter1', legend=False, formats=['jpg'])
def MakeParetoCdf2(): """Generates a plot of the CDF of height in Pareto World.""" xmin = 100 alpha = 1.7 xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 1000.0, n=100) thinkplot.Plot(xs, ps) median = ParetoMedian(xmin, alpha) thinkplot.Save(root='analytic_pareto_height', title='Pareto CDF', xlabel='height (cm)', ylabel='CDF', legend=False)
def MakePlot(self, root='redline3'): """Plot the CDFs. root: string """ # observed gaps cdf_prior_x = self.prior_x.MakeCdf() cdf_post_x = self.post_x.MakeCdf() cdf_y = self.pmf_y.MakeCdf() cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60) thinkplot.Clf() thinkplot.PrePlot(3) thinkplot.Cdfs(cdfs) thinkplot.Save(root=root, xlabel='Time (min)', ylabel='CDF', formats=FORMATS) pmfs = self.prior_x, self.post_x pmfs = ScaleDists(pmfs, 1.0 / 60) thinkplot.PrePlot(3) thinkplot.Pmfs(pmfs) thinkplot.Save(root=root + 'a', xlabel='Time (min)', ylabel='Probability', formats=FORMATS) pmfs = self.prior_x, self.post_x, self.pmf_y pmfs = ScaleDists(pmfs, 1.0 / 60) thinkplot.PrePlot(3) thinkplot.Pmfs(pmfs) thinkplot.Save(root=root + 'b', xlabel='Time (min)', ylabel='Probability', formats=FORMATS)
def main(): thinkstats2.RandomSeed(17) preg = nsfg.ReadFemPreg() sf1 = PlotPregnancyData(preg) # make the plots based on Cycle 6 resp6 = ReadFemResp2002() sf2 = PlotMarriageData(resp6) ResampleSurvival(resp6) PlotRemainingLifetime(sf1, sf2) # read Cycles 5 and 7 resp5 = ReadFemResp1995() resp7 = ReadFemResp2010() # plot resampled survival functions by decade resps = [resp5, resp6, resp7] PlotResampledByDecade(resps) thinkplot.Save(root='survival4', xlabel='age (years)', ylabel='prob unmarried', xlim=[13, 45], ylim=[0, 1], formats=FORMATS) # plot resampled survival functions by decade, with predictions PlotResampledByDecade(resps, predict_flag=True, omit=[5]) thinkplot.Save(root='survival5', xlabel='age (years)', ylabel='prob unmarried', xlim=[13, 45], ylim=[0, 1], formats=FORMATS)
def PlotOutliers(samples): """Make CDFs showing the distribution of outliers.""" cdfs = [] for label, sample in samples.iteritems(): outliers = [x for x in sample if x < 150] cdf = thinkbayes.MakeCdfFromList(outliers, label) cdfs.append(cdf) thinkplot.Clf() thinkplot.Cdfs(cdfs) thinkplot.Save(root='variability_cdfs', title='CDF of height', xlabel='Reported height (cm)', ylabel='CDF')
def main(): print "1" hockey1 = Hockey() # print(type(hockey1)) thinkplot.PrePlot(1) thinkplot.Pmf(hockey1) thinkplot.Save(root='hockey_self2_prior', xlabel='', ylabel='Probability', formats=['pdf']) print(hockey1.Values()) for hypo in hockey1.Values(): print(hockey1.Likelihood(2, hypo)) hockey1.UpdateSet([0, 2, 4, 3, 8]) thinkplot.Pmf(hockey1) thinkplot.Save(root='hockey_self2_posterior', xlabel='', ylabel='Probability', formats=['pdf']) print("No error, everything worked fine")
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds pmf = thinkstats2.MakePmfFromList(speeds, 'actual speeds') # plot the biased distribution seen by the observer biased = BiasPmf(pmf, 7.5, name='observed speeds') thinkplot.Hist(biased) thinkplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = thinkstats2.MakeCdfFromPmf(biased) thinkplot.Clf() thinkplot.Cdf(cdf) thinkplot.Save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def MakeGenderPlot(filename='heri14.csv'): """Generates a plot with the data, a fitted model, and error bars.""" pyplot.clf() data = ReadData(filename) men = GetColumn(data, 6) ts, ys = RenderColumn(men) pyplot.plot(ts, ys, 'b-', linewidth=3, alpha=0.7, label='men') women = GetColumn(data, 11) ts, ys = RenderColumn(women) pyplot.plot(ts, ys, 'g-', linewidth=3, alpha=0.7, label='women') thinkplot.Save(root='heri14.3', formats=FORMATS, title='', xlabel='', ylabel='Preferred religion None (%)', axis=[1967, UPPER, 0, 28]) del men[1969] del women[1969] ts, ds = DiffColumns(men, women) MakePlot(ts, ds, model='ys ~ ts') pyplot.plot(ts, ds, color='purple', linewidth=3, alpha=0.7, label='Gender gap') thinkplot.Save(root='heri14.4', formats=FORMATS, title='', xlabel='', ylabel='Percentage points', axis=[1967, UPPER, 0, 6])
def PlotQuadraticModel(daily, name): model, results = RunQuadraticModel(daily) regression.SummarizeResults(results) timeseries.PlotFittedValues(model, results, label=name) thinkplot.Save(root='Output_Timeseries1', title='Fitted Val', xlabel='yr', xlim=[-0.2, 4], ylabel='price per gram ($)') timeseries.PlotResidualPercentiles(model, results) thinkplot.Save(root='Output_Timeseries2', title='Residual', xlabel='yr', ylabel='price per gram ($)') years = np.linspace(0, 10, 200) thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name) timeseries.PlotPredictions(daily, years, func=RunQuadraticModel) thinkplot.Save(root='Output_Timeseries3', title='Predict', xlabel='yr', xlim=[years[0]-0.1, years[-1]+0.1], ylabel='price per gram ($)')
def ResampleDivorceCurveByDecade(resps): """Plots divorce curves for each birth cohort. resps: list of respondent DataFrames """ for i in range(41): samples = [thinkstats2.ResampleRowsWeighted(resp) for resp in resps] sample = pandas.concat(samples, ignore_index=True) groups = sample.groupby('decade') if i == 0: survival.AddLabelsByDecade(groups, alpha=0.7) EstimateSurvivalByDecade(groups, alpha=0.1) thinkplot.Save(root='survival7', xlabel='years', axis=[0, 28, 0, 1])
def PlotSuites(suites, root): """Plots two suites. suite1, suite2: Suite objects root: string filename to write """ formats = ['pdf', 'png'] thinkplot.Clf() thinkplot.PrePlot(len(suites)) thinkplot.Pmfs(suites) thinkplot.Save(root=root, xlabel='Percentage of Active Female Users', ylabel='Probability', formats=formats, legend=True)
def MakePmfPlot(alpha = 10): """Plots Pmf of location for a range of betas.""" locations = range(0, 31) betas = [10, 20, 40] thinkplot.PrePlot(num=len(betas)) for beta in betas: pmf = MakeLocationPmf(alpha, beta, locations) pmf.name = 'beta = %d' % beta thinkplot.Pdf(pmf) thinkplot.Save('paintball1', xlabel='Distance', ylabel='Prob', formats=FORMATS)
def PlotPregnancyData(preg): """Plots survival and hazard curves based on pregnancy lengths. preg: """ complete = preg.query('outcome in [1, 3, 4]').prglngth print('Number of complete pregnancies', len(complete)) ongoing = preg[preg.outcome == 6].prglngth print('Number of ongoing pregnancies', len(ongoing)) PlotSurvival(complete) thinkplot.Save(root='survival1', xlabel='t (weeks)', formats=FORMATS) hf = EstimateHazardFunction(complete, ongoing) sf = hf.MakeSurvival() return sf
def main(): pmf_dice = Pmf() pmf_dice.Set(Die(6),2) pmf_dice.Set(Die(8),3) pmf_dice.Set(Die(12),1) pmf_dice.Set(Die(20),1) mix = Pmf() for die, weight in pmf_dice.Items(): for outcome, prob in die.Items(): mix.Incr(outcome, weight*prob) mix.Normalize() thinkplot.PrePlot(1) thinkplot.Pmf(mix) thinkplot.Save(root='dice_Mix_self3',xlabel='',ylabel='Probability',formats=['pdf'])
def main(): ps = numpy.linspace(0, 1, 101) bill = Billiards(ps) bill.Update((5, 3)) thinkplot.Pdf(bill) thinkplot.Save(root='billiards1', xlabel='probability of win', ylabel='PDF', formats=['png']) bayes_result = ProbWinMatch(bill) print(thinkbayes.Odds(1-bayes_result)) mle = 5 / 8 freq_result = (1-mle)**3 print(thinkbayes.Odds(1-freq_result))
def TestSample(live): """Plots the distribution of weights against a random sample. live: DataFrame for live births """ weights = live.totalwgt_lb cdf = thinkstats2.Cdf(weights, label='totalwgt_lb') sample = cdf.Sample(1000) sample_cdf = thinkstats2.Cdf(sample, label='sample') thinkplot.PrePlot(2) thinkplot.Cdfs([cdf, sample_cdf]) thinkplot.Save(root='cumulative_sample', xlabel='weight (pounds)', ylabel='CDF')