Ejemplo n.º 1
0
    def PlotJointDist(self):
        """Makes a pcolor plot of the age-size joint distribution."""
        thinkplot.Clf()

        joint = self.cache.GetDistAgeSize()
        thinkplot.Contour(joint, contour=False, pcolor=True)

        thinkplot.Save(root='kidney8',
                       formats=FORMATS,
                       axis=[0, 41, -0.7, 1.31],
                       yticks=MakeLogTicks([0.2, 0.5, 1, 2, 5, 10, 20]),
                       xlabel='ages',
                       ylabel='diameter (cm, log scale)')
Ejemplo n.º 2
0
def MakePlots(player1, player2):
    """Generates two plots.

    price1 shows the priors for the two players
    price2 shows the distribution of diff for the two players
    """

    # plot the prior distribution of price for both players
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    pmf1 = player1.PmfPrice()
    pmf1.label = 'showcase 1'
    pmf2 = player2.PmfPrice()
    pmf2.label = 'showcase 2'
    thinkplot.Pdfs([pmf1, pmf2])
    thinkplot.Save(root='price1',
                xlabel='price ($)',
                ylabel='PDF',
                formats=FORMATS)

    # plot the historical distribution of underness for both players
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)
    cdf1 = player1.CdfDiff()
    cdf1.label = 'player 1'
    cdf2 = player2.CdfDiff()
    cdf2.label = 'player 2'

    print('Player median', cdf1.Percentile(50))
    print('Player median', cdf2.Percentile(50))

    print('Player 1 overbids', player1.ProbOverbid())
    print('Player 2 overbids', player2.ProbOverbid())

    thinkplot.Cdfs([cdf1, cdf2])
    thinkplot.Save(root='price2',
                xlabel='diff ($)',
                ylabel='CDF',
                formats=FORMATS)
Ejemplo n.º 3
0
def MakeParetoCdf():
    """Generates a plot of the Pareto CDF."""
    xmin = 0.5

    thinkplot.PrePlot(3)
    for alpha in [2.0, 1.0, 0.5]:
        xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100)
        thinkplot.Plot(xs, ps, label='alpha=%g' % alpha)

    thinkplot.Save(root='analytic_pareto_cdf',
                   title='Pareto CDF',
                   xlabel='x',
                   ylabel='CDF')
Ejemplo n.º 4
0
def main():
    hypos = xrange(1, 1001)
    suite = Train(hypos)

    suite.Update(60)
    print(suite.Mean())

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Save(root='train1',
                   xlabel='Number of trains',
                   ylabel='Probability',
                   formats=['pdf', 'eps'])
Ejemplo n.º 5
0
def PlotSuites(suites, root):
    """Plots two suites.
    suite1, suite2: Suite objects
    root: string filename to write
    """
    thinkplot.Clf()
    thinkplot.PrePlot(len(suites))
    thinkplot.Pmfs(suites)

    thinkplot.Save(root=root,
                   xlabel='x',
                   ylabel='Probability',
                   formats=['pdf', 'eps'])
Ejemplo n.º 6
0
def MakeExpoCdf():
    """Generates a plot of the exponential CDF."""

    thinkplot.PrePlot(3)
    for lam in [2.0, 1, 0.5]:
        xs, ps = thinkstats2.RenderExpoCdf(lam, 0, 3.0, 50)
        label = r'$\lambda=%g$' % lam
        thinkplot.Plot(xs, ps, label=label)
    
    thinkplot.Save(root='analytic_expo_cdf',
                   title='Exponential CDF',
                   xlabel='x',
                   ylabel='CDF')
Ejemplo n.º 7
0
def main(name):
    thinkstats2.RandomSeed(18)
    transactions = ReadData()

    dailies = GroupByQualityAndDay(transactions)
    PlotDailies(dailies)
    RunModels(dailies)
    PrintSerialCorrelations(dailies)
    MakeAcfPlot(dailies)

    name = 'high'
    daily = dailies[name]

    PlotLinearModel(daily, name)
    PlotRollingMean(daily, name)
    PlotFilled(daily, name)

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries4',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)')

    name = 'medium'
    daily = dailies[name]

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotIntervals(daily, years)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries5',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)')
Ejemplo n.º 8
0
def ComputeSkewnesses():
    def VertLine(x, y):
        thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print('Birth weight')
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right')
    VertLine(median, y)
    thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='birth weight')
    thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF')

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print('Adult weight')
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left')
    VertLine(median, y)
    thinkplot.Text(median - 1.5,
                   0.1 * y,
                   'median',
                   horizontalalignment='right')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='adult weight')
    thinkplot.Save(root='density_wtkg2_kde',
                   xlabel='kg',
                   ylabel='PDF',
                   xlim=[0, 200])
Ejemplo n.º 9
0
def ScatterPlot(root, heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    root: string filename root
    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Save(root=root,
                   xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)
Ejemplo n.º 10
0
def PlotPriorDist(pmf):
    """Plot the prior distribution of p_correct.

    pmf: prior
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=1)

    cdf1 = thinkbayes.MakeCdfFromPmf(pmf, 'prior')
    thinkplot.Cdf(cdf1)
    thinkplot.Save(root='sat_prior',
                   xlabel='p_correct',
                   ylabel='CDF',
                   formats=['pdf', 'eps'])
Ejemplo n.º 11
0
def HexBin(root, heights, weights, bins=None):
    """Make a hexbin plot and save it.

    root: string filename root
    heights: sequence of float
    weights: sequence of float
    bins: 'log' or None for linear
    """
    thinkplot.HexBin(heights, weights, bins=bins)
    thinkplot.Save(root=root,
                   xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)
Ejemplo n.º 12
0
    def PlotPmfs(self, root='redline0'):
        """Plots the computed Pmfs.

        root: string
        """
        pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(2)
        thinkplot.Pmfs(pmfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)
Ejemplo n.º 13
0
def main():
    suite = Train(range(0, 101))
    trainNum = [6, 10, 20, 12, 15, 19, 4, 10]
    #	suite.Update(60)
    for TN in trainNum:
        suite.Update(TN)
    suite.Print()

    thinkplot.PrePlot(1)
    thinkplot.Pmf(suite)
    thinkplot.Save(root='train_self1',
                   xlabel='Number of trains',
                   ylabel='Probability',
                   formats=['pdf'])
Ejemplo n.º 14
0
def main():
    PlotMarriageData()
    return

    preg = nsfg.ReadFemPreg()
    print('Number of pregnancies', len(preg))

    complete = preg.query('outcome in [1, 3, 4]').prglngth
    print('Number of complete pregnancies', len(complete))
    ongoing = preg[preg.outcome == 6].prglngth
    print('Number of ongoing pregnancies', len(ongoing))

    PlotSurvival(complete)
    thinkplot.Save(root='survival1', xlabel='t (weeks)')
def main(script):
    thinkstats2.RandomSeed(17)

    live, firsts, others = first.MakeFrames()
    live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    BinnedPercentiles(live)

    ages = live.agepreg
    weights = live.totalwgt_lb
    print('thinkstats2 Corr', thinkstats2.Corr(ages, weights))
    print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(ages, weights))

    ScatterPlot(ages, weights, alpha=0.1)
    thinkplot.Save(root='chap07scatter1', legend=False, formats=['jpg'])
Ejemplo n.º 16
0
def MakeParetoCdf2():
    """Generates a plot of the CDF of height in Pareto World."""
    xmin = 100
    alpha = 1.7
    xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 1000.0, n=100)
    thinkplot.Plot(xs, ps)

    median = ParetoMedian(xmin, alpha)

    thinkplot.Save(root='analytic_pareto_height',
                   title='Pareto CDF',
                   xlabel='height (cm)',
                   ylabel='CDF',
                   legend=False)
Ejemplo n.º 17
0
    def MakePlot(self, root='redline3'):
        """Plot the CDFs.

        root: string
        """
        # observed gaps
        cdf_prior_x = self.prior_x.MakeCdf()
        cdf_post_x = self.post_x.MakeCdf()
        cdf_y = self.pmf_y.MakeCdf()

        cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0 / 60)

        thinkplot.Clf()
        thinkplot.PrePlot(3)
        thinkplot.Cdfs(cdfs)
        thinkplot.Save(root=root,
                       xlabel='Time (min)',
                       ylabel='CDF',
                       formats=FORMATS)

        pmfs = self.prior_x, self.post_x
        pmfs = ScaleDists(pmfs, 1.0 / 60)
        thinkplot.PrePlot(3)
        thinkplot.Pmfs(pmfs)
        thinkplot.Save(root=root + 'a',
                       xlabel='Time (min)',
                       ylabel='Probability',
                       formats=FORMATS)

        pmfs = self.prior_x, self.post_x, self.pmf_y
        pmfs = ScaleDists(pmfs, 1.0 / 60)
        thinkplot.PrePlot(3)
        thinkplot.Pmfs(pmfs)
        thinkplot.Save(root=root + 'b',
                       xlabel='Time (min)',
                       ylabel='Probability',
                       formats=FORMATS)
Ejemplo n.º 18
0
def main():
    thinkstats2.RandomSeed(17)
    
    preg = nsfg.ReadFemPreg()
    sf1 = PlotPregnancyData(preg)

    # make the plots based on Cycle 6
    resp6 = ReadFemResp2002()

    sf2 = PlotMarriageData(resp6)

    ResampleSurvival(resp6)

    PlotRemainingLifetime(sf1, sf2)

    # read Cycles 5 and 7
    resp5 = ReadFemResp1995()
    resp7 = ReadFemResp2010()

    # plot resampled survival functions by decade
    resps = [resp5, resp6, resp7]
    PlotResampledByDecade(resps)
    thinkplot.Save(root='survival4',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)

    # plot resampled survival functions by decade, with predictions
    PlotResampledByDecade(resps, predict_flag=True, omit=[5])
    thinkplot.Save(root='survival5',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)
Ejemplo n.º 19
0
def PlotOutliers(samples):
    """Make CDFs showing the distribution of outliers."""
    cdfs = []
    for label, sample in samples.iteritems():
        outliers = [x for x in sample if x < 150]

        cdf = thinkbayes.MakeCdfFromList(outliers, label)
        cdfs.append(cdf)

    thinkplot.Clf()
    thinkplot.Cdfs(cdfs)
    thinkplot.Save(root='variability_cdfs',
                   title='CDF of height',
                   xlabel='Reported height (cm)',
                   ylabel='CDF')
Ejemplo n.º 20
0
def main():
    print "1"
    hockey1 = Hockey()
    #    print(type(hockey1))

    thinkplot.PrePlot(1)
    thinkplot.Pmf(hockey1)
    thinkplot.Save(root='hockey_self2_prior',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])

    print(hockey1.Values())
    for hypo in hockey1.Values():
        print(hockey1.Likelihood(2, hypo))

    hockey1.UpdateSet([0, 2, 4, 3, 8])
    thinkplot.Pmf(hockey1)
    thinkplot.Save(root='hockey_self2_posterior',
                   xlabel='',
                   ylabel='Probability',
                   formats=['pdf'])

    print("No error, everything worked fine")
Ejemplo n.º 21
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = thinkstats2.MakePmfFromList(speeds, 'actual speeds')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')

    thinkplot.Hist(biased)
    thinkplot.Save(root='observed_speeds',
                   title='PMF of running speed',
                   xlabel='speed (mph)',
                   ylabel='probability')

    cdf = thinkstats2.MakeCdfFromPmf(biased)

    thinkplot.Clf()
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='observed_speeds_cdf',
                   title='CDF of running speed',
                   xlabel='speed (mph)',
                   ylabel='cumulative probability')
Ejemplo n.º 22
0
def MakeGenderPlot(filename='heri14.csv'):
    """Generates a plot with the data, a fitted model, and error bars."""
    pyplot.clf()

    data = ReadData(filename)

    men = GetColumn(data, 6)
    ts, ys = RenderColumn(men)
    pyplot.plot(ts, ys, 'b-', linewidth=3, alpha=0.7, label='men')

    women = GetColumn(data, 11)
    ts, ys = RenderColumn(women)
    pyplot.plot(ts, ys, 'g-', linewidth=3, alpha=0.7, label='women')

    thinkplot.Save(root='heri14.3',
                formats=FORMATS,
                title='',
                xlabel='',
                ylabel='Preferred religion None (%)',
                axis=[1967, UPPER, 0, 28])

    del men[1969]
    del women[1969]
    ts, ds = DiffColumns(men, women)

    MakePlot(ts, ds, model='ys ~ ts')

    pyplot.plot(ts, ds, color='purple', linewidth=3, alpha=0.7,
                label='Gender gap')

    thinkplot.Save(root='heri14.4',
                formats=FORMATS,
                title='',
                xlabel='',
                ylabel='Percentage points',
                axis=[1967, UPPER, 0, 6])
def PlotQuadraticModel(daily, name):
    model, results = RunQuadraticModel(daily)
    regression.SummarizeResults(results)
    timeseries.PlotFittedValues(model, results, label=name)
    thinkplot.Save(root='Output_Timeseries1',
                   title='Fitted Val',
                   xlabel='yr',
                   xlim=[-0.2, 4],
                   ylabel='price per gram ($)')

    timeseries.PlotResidualPercentiles(model, results)
    thinkplot.Save(root='Output_Timeseries2',
                   title='Residual',
                   xlabel='yr',
                   ylabel='price per gram ($)')

    years = np.linspace(0, 10, 200)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)
    thinkplot.Save(root='Output_Timeseries3',
                   title='Predict',
                   xlabel='yr',
                   xlim=[years[0]-0.1, years[-1]+0.1],
                   ylabel='price per gram ($)')
Ejemplo n.º 24
0
def ResampleDivorceCurveByDecade(resps):
    """Plots divorce curves for each birth cohort.

    resps: list of respondent DataFrames    
    """
    for i in range(41):
        samples = [thinkstats2.ResampleRowsWeighted(resp) for resp in resps]
        sample = pandas.concat(samples, ignore_index=True)
        groups = sample.groupby('decade')
        if i == 0:
            survival.AddLabelsByDecade(groups, alpha=0.7)

        EstimateSurvivalByDecade(groups, alpha=0.1)

    thinkplot.Save(root='survival7', xlabel='years', axis=[0, 28, 0, 1])
Ejemplo n.º 25
0
def PlotSuites(suites, root):
    """Plots two suites.

    suite1, suite2: Suite objects
    root: string filename to write
    """
    formats = ['pdf', 'png']
    thinkplot.Clf()
    thinkplot.PrePlot(len(suites))
    thinkplot.Pmfs(suites)
    thinkplot.Save(root=root,
                   xlabel='Percentage of Active Female Users',
                   ylabel='Probability',
                   formats=formats,
                   legend=True)
Ejemplo n.º 26
0
def MakePmfPlot(alpha = 10):
    """Plots Pmf of location for a range of betas."""
    locations = range(0, 31)

    betas = [10, 20, 40]
    thinkplot.PrePlot(num=len(betas))

    for beta in betas:
        pmf = MakeLocationPmf(alpha, beta, locations)
        pmf.name = 'beta = %d' % beta
        thinkplot.Pdf(pmf)

    thinkplot.Save('paintball1',
                xlabel='Distance',
                ylabel='Prob',
                formats=FORMATS)
Ejemplo n.º 27
0
def PlotPregnancyData(preg):
    """Plots survival and hazard curves based on pregnancy lengths.
    
    preg:
    """
    complete = preg.query('outcome in [1, 3, 4]').prglngth
    print('Number of complete pregnancies', len(complete))
    ongoing = preg[preg.outcome == 6].prglngth
    print('Number of ongoing pregnancies', len(ongoing))

    PlotSurvival(complete)
    thinkplot.Save(root='survival1', xlabel='t (weeks)', formats=FORMATS)

    hf = EstimateHazardFunction(complete, ongoing)
    sf = hf.MakeSurvival()
    return sf
Ejemplo n.º 28
0
def main():
    pmf_dice = Pmf()
    pmf_dice.Set(Die(6),2)
    pmf_dice.Set(Die(8),3)
    pmf_dice.Set(Die(12),1)
    pmf_dice.Set(Die(20),1)
    
    mix = Pmf()
    for die, weight in pmf_dice.Items():
        for outcome, prob in die.Items():
            mix.Incr(outcome, weight*prob)
    mix.Normalize()

    thinkplot.PrePlot(1)
    thinkplot.Pmf(mix)
    thinkplot.Save(root='dice_Mix_self3',xlabel='',ylabel='Probability',formats=['pdf'])
Ejemplo n.º 29
0
def main():
    ps = numpy.linspace(0, 1, 101)
    bill = Billiards(ps)
    bill.Update((5, 3))
    thinkplot.Pdf(bill)
    thinkplot.Save(root='billiards1',
                   xlabel='probability of win',
                   ylabel='PDF',
                   formats=['png'])

    bayes_result = ProbWinMatch(bill)
    print(thinkbayes.Odds(1-bayes_result))

    mle = 5 / 8
    freq_result = (1-mle)**3
    print(thinkbayes.Odds(1-freq_result))
Ejemplo n.º 30
0
def TestSample(live):
    """Plots the distribution of weights against a random sample.

    live: DataFrame for live births
    """
    weights = live.totalwgt_lb
    cdf = thinkstats2.Cdf(weights, label='totalwgt_lb')

    sample = cdf.Sample(1000)
    sample_cdf = thinkstats2.Cdf(sample, label='sample')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, sample_cdf])
    thinkplot.Save(root='cumulative_sample',
                   xlabel='weight (pounds)',
                   ylabel='CDF')