Ejemplo n.º 1
0
def ComputeSkewnesses():
    """Plots KDE of birthweight and adult weight.
    """
    def VertLine(x, y):
        thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print('Birth weight')
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right')
    VertLine(median, y)
    thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='birth weight')
    thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF')

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print('Adult weight')
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left')
    VertLine(median, y)
    thinkplot.Text(median - 1.5,
                   0.1 * y,
                   'median',
                   horizontalalignment='right')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='adult weight')
    thinkplot.Save(root='density_wtkg2_kde',
                   xlabel='kg',
                   ylabel='PDF',
                   xlim=[0, 200])
Ejemplo n.º 2
0
def MakeCrediblePlot(suite):
    """Makes a plot showing several two-dimensional credible intervals.

    suite: Suite
    """
    d = dict((pair, 0) for pair in suite.Values())

    percentages = [75, 50, 25]
    for p in percentages:
        interval = suite.MaxLikeInterval(p)
        for pair in interval:
            d[pair] += 1

    thinkplot.Contour(d, contour=False, pcolor=True)
    thinkplot.Text(17, 4, '25', color='white')
    thinkplot.Text(17, 15, '50', color='white')
    thinkplot.Text(17, 30, '75')

    thinkplot.Save('paintball5',
                   xlabel='alpha',
                   ylabel='beta',
                   formats=FORMATS,
                   legend=False)
Ejemplo n.º 3
0
def PlotSamplingDistributions(live):
    """Plots confidence intervals for the fitted curve and sampling dists.

    live: DataFrame
    """
    ages = live.agepreg
    weights = live.totalwgt_lb
    inter, slope = thinkstats2.LeastSquares(ages, weights)
    res = thinkstats2.Residuals(ages, weights, inter, slope)
    r2 = thinkstats2.CoefDetermination(weights, res)

    print('rho', thinkstats2.Corr(ages, weights))
    print('R2', r2)
    print('R', math.sqrt(r2))
    print('Std(ys)', thinkstats2.Std(weights))
    print('Std(res)', thinkstats2.Std(res))

    # plot the confidence intervals
    inters, slopes = SamplingDistributions(live, iters=1001)
    PlotConfidenceIntervals(ages,
                            inters,
                            slopes,
                            percent=90,
                            alpha=0.3,
                            label='90% CI')
    thinkplot.Text(42, 7.53, '90%')
    PlotConfidenceIntervals(ages,
                            inters,
                            slopes,
                            percent=50,
                            alpha=0.5,
                            label='50% CI')
    thinkplot.Text(42, 7.59, '50%')

    thinkplot.Save(root='linear3',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   legend=False)

    # plot the confidence intervals
    thinkplot.PrePlot(2)
    thinkplot.Scatter(ages, weights, color='gray', alpha=0.1)
    PlotConfidenceIntervals(ages, inters, slopes, res=res, alpha=0.2)
    PlotConfidenceIntervals(ages, inters, slopes)
    thinkplot.Save(root='linear5',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   title='90% CI',
                   axis=[10, 45, 0, 15],
                   legend=False)

    # plot the sampling distribution of slope under null hypothesis
    # and alternate hypothesis
    sampling_cdf = thinkstats2.Cdf(slopes)
    print('p-value, sampling distribution', sampling_cdf[0])

    ht = SlopeTest((ages, weights))
    pvalue = ht.PValue()
    print('p-value, slope test', pvalue)

    print('inter', inter, thinkstats2.Mean(inters))
    Summarize(inters, inter)
    print('slope', slope, thinkstats2.Mean(slopes))
    Summarize(slopes, slope)

    thinkplot.PrePlot(2)
    thinkplot.Plot([0, 0], [0, 1], color='0.8')
    ht.PlotCdf(label='null hypothesis')
    thinkplot.Cdf(sampling_cdf, label='sampling distribution')
    thinkplot.Save(root='linear4',
                   xlabel='slope (lbs / year)',
                   ylabel='CDF',
                   xlim=[-0.03, 0.03],
                   loc='upper left')