Example #1
0
def MakeFigures(df):
    """Generates CDFs and normal prob plots for weights and log weights."""
    weights = df.wtkg2.dropna()
    log_weights = np.log10(weights)

    # plot weights on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalModel(weights)
    thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF')

    thinkplot.SubPlot(2)
    MakeNormalModel(log_weights)
    thinkplot.Config(xlabel='adult weight (log10 kg)')

    thinkplot.Save(root='brfss_weight')

    # make normal probability plots on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalPlot(weights)
    thinkplot.Config(xlabel='z', ylabel='weights (kg)')

    thinkplot.SubPlot(2)
    MakeNormalPlot(log_weights)
    thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)')

    thinkplot.Save(root='brfss_weight_normal')
Example #2
0
def MakeHists(live):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb')
    thinkplot.Hist(hist)
    thinkplot.Save(root='first_wgt_lb_hist', 
                   xlabel='pounds',
                   ylabel='frequency',
                   axis=[-1, 14, 0, 3200])

    hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz')
    thinkplot.Hist(hist)
    thinkplot.Save(root='first_wgt_oz_hist', 
                   xlabel='ounces',
                   ylabel='frequency',
                   axis=[-1, 16, 0, 1200])

    hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
    thinkplot.Hist(hist)
    thinkplot.Save(root='first_agepreg_hist', 
                   xlabel='years',
                   ylabel='frequency')

    hist = thinkstats2.Hist(live.prglngth, label='prglngth')
    thinkplot.Hist(hist)
    thinkplot.Save(root='first_prglngth_hist', 
                   xlabel='weeks',
                   ylabel='frequency',
                   axis=[-1, 53, 0, 5000])
Example #3
0
def PlotPregnancyData(preg):
    """Plots survival and hazard curves based on pregnancy lengths.
    
    preg:


    Outcome codes from http://www.icpsr.umich.edu/nsfg6/Controller?
    displayPage=labelDetails&fileCode=PREG&section=&subSec=8016&srtLabel=611932

    1	LIVE BIRTH	 	9148
    2	INDUCED ABORTION	1862
    3	STILLBIRTH	 	120
    4	MISCARRIAGE	 	1921
    5	ECTOPIC PREGNANCY	190
    6	CURRENT PREGNANCY	352

    """
    complete = preg.query('outcome in [1, 3, 4]').prglngth
    print('Number of complete pregnancies', len(complete))
    ongoing = preg[preg.outcome == 6].prglngth
    print('Number of ongoing pregnancies', len(ongoing))

    PlotSurvival(complete)
    thinkplot.Save(root='survival1', xlabel='t (weeks)', formats=FORMATS)

    hf = EstimateHazardFunction(complete, ongoing)
    sf = hf.MakeSurvival()
    return sf
Example #4
0
def RunTests(data, iters=1000):
    """Runs several tests on the given data.

    data: pair of sequences
    iters: number of iterations to run
    """

    # test the difference in means
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=iters)
    print('\nmeans permute two-sided')
    PrintTest(p_value, ht)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis1',
                   title='Permutation test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in means one-sided
    ht = DiffMeansOneSided(data)
    p_value = ht.PValue(iters=iters)
    print('\nmeans permute one-sided')
    PrintTest(p_value, ht)

    # test the difference in std
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=iters)
    print('\nstd permute one-sided')
    PrintTest(p_value, ht)
Example #5
0
def ResampleSurvival(resp, iters=101):
    """Resamples respondents and estimates the survival function.

    resp: DataFrame of respondents
    iters: number of resamples
    """
    _, sf = EstimateMarriageSurvival(resp)
    thinkplot.Plot(sf)

    low, high = resp.agemarry.min(), resp.agemarry.max()
    ts = np.arange(low, high, 1 / 12.0)

    ss_seq = []
    for _ in range(iters):
        sample = thinkstats2.ResampleRowsWeighted(resp)
        _, sf = EstimateMarriageSurvival(sample)
        ss_seq.append(sf.Probs(ts))

    low, high = thinkstats2.PercentileRows(ss_seq, [5, 95])
    thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
    thinkplot.Save(root='survival3',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[12, 46],
                   ylim=[0, 1],
                   formats=FORMATS)
Example #6
0
def MakeParetoCdf2():
    """Generates a plot of the CDF of height in Pareto World."""
    xmin = 100
    alpha = 1.7
    xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 1000.0, n=100)
    thinkplot.Plot(xs, ps)

    thinkplot.Save(root='analytic_pareto_height',
                   title='Pareto CDF',
                   xlabel='height (cm)',
                   ylabel='CDF',
                   legend=False)
Example #7
0
def MakeExpoCdf():
    """Generates a plot of the exponential CDF."""

    thinkplot.PrePlot(3)
    for lam in [2.0, 1, 0.5]:
        xs, ps = thinkstats2.RenderExpoCdf(lam, 0, 3.0, 50)
        label = r'$\lambda=%g$' % lam
        thinkplot.Plot(xs, ps, label=label)

    thinkplot.Save(root='analytic_expo_cdf',
                   title='Exponential CDF',
                   xlabel='x',
                   ylabel='CDF')
Example #8
0
def MakeParetoCdf():
    """Generates a plot of the Pareto CDF."""
    xmin = 0.5

    thinkplot.PrePlot(3)
    for alpha in [2.0, 1.0, 0.5]:
        xs, ps = thinkstats2.RenderParetoCdf(xmin, alpha, 0, 10.0, n=100)
        thinkplot.Plot(xs, ps, label=r'$\alpha=%g$' % alpha)

    thinkplot.Save(root='analytic_pareto_cdf',
                   title='Pareto CDF',
                   xlabel='x',
                   ylabel='CDF')
Example #9
0
def main():
    thinkstats2.RandomSeed(17)

    preg = nsfg.ReadFemPreg()
    sf1 = PlotPregnancyData(preg)

    # make the plots based on Cycle 6
    resp6 = ReadFemResp2002()

    sf2 = PlotMarriageData(resp6)

    ResampleSurvival(resp6)

    PlotRemainingLifetime(sf1, sf2)

    # read Cycles 5 and 7
    resp5 = ReadFemResp1995()
    resp7 = ReadFemResp2010()

    # plot resampled survival functions by decade
    resps = [resp5, resp6, resp7]
    PlotResampledByDecade(resps)
    thinkplot.Save(root='survival4',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)

    # plot resampled survival functions by decade, with predictions
    PlotResampledByDecade(resps, predict_flag=True, omit=[5])
    thinkplot.Save(root='survival5',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   xlim=[13, 45],
                   ylim=[0, 1],
                   formats=FORMATS)
Example #10
0
def MakeExampleNormalPlot():
    """Generates a sample normal probability plot.
    """
    n = 1000
    thinkplot.PrePlot(3)

    mus = [0, 1, 5]
    sigmas = [1, 1, 2]
    for mu, sigma in zip(mus, sigmas):
        sample = np.random.normal(mu, sigma, n)
        xs, ys = thinkstats2.NormalProbability(sample)
        label = '$\mu=%d$, $\sigma=%d$' % (mu, sigma)
        thinkplot.Plot(xs, ys, label=label)

    thinkplot.Save(root='analytic_normal_prob_example',
                   title='Normal probability plot',
                   xlabel='standard normal sample',
                   ylabel='sample values')
Example #11
0
def MakeComparison(firsts, others):
    """Plots histograms of pregnancy length for first babies and others.

    firsts: DataFrame
    others: DataFrame
    """
    first_hist = thinkstats2.Hist(firsts.prglngth, label='first')
    other_hist = thinkstats2.Hist(others.prglngth, label='other')

    width = 0.45
    thinkplot.PrePlot(2)
    thinkplot.Hist(first_hist, align='right', width=width)
    thinkplot.Hist(other_hist, align='left', width=width)

    thinkplot.Save(root='first_nsfg_hist', 
                   title='Histogram',
                   xlabel='weeks',
                   ylabel='frequency',
                   axis=[27, 46, 0, 2700])
Example #12
0
def MakeNormalCdf():
    """Generates a plot of the normal CDF."""

    thinkplot.PrePlot(3)

    mus = [1.0, 2.0, 3.0]
    sigmas = [0.5, 0.4, 0.3]
    for mu, sigma in zip(mus, sigmas):
        xs, ps = thinkstats2.RenderNormalCdf(mu=mu,
                                             sigma=sigma,
                                             low=-1.0,
                                             high=4.0)
        label = r'$\mu=%g$, $\sigma=%g$' % (mu, sigma)
        thinkplot.Plot(xs, ps, label=label)

    thinkplot.Save(root='analytic_normal_cdf',
                   title='Normal CDF',
                   xlabel='x',
                   ylabel='CDF',
                   loc=2)
Example #13
0
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals', legend=False)
Example #14
0
def MakeNormalPlot(weights, term_weights):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='all live')

    xs, ys = thinkstats2.NormalProbability(term_weights)
    thinkplot.Plot(xs, ys, label='full term')
    thinkplot.Save(root='analytic_birthwgt_normal',
                   title='Normal probability plot',
                   xlabel='Standard deviations from mean',
                   ylabel='Birth weight (lbs)')
Example #15
0
def PlotMarriageData(resp):
    """Plots hazard and survival functions.

    resp: DataFrame of respondents
    """
    hf, sf = EstimateMarriageSurvival(resp)

    thinkplot.PrePlot(rows=2)
    thinkplot.Plot(hf)
    thinkplot.Config(ylabel='hazard', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Plot(sf)
    thinkplot.Save(root='survival2',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   ylim=[0, 1],
                   legend=False,
                   formats=FORMATS)
    return sf
Example #16
0
def PrintExtremes(live):
    """Plots the histogram of pregnancy lengths and prints the extremes.

    live: DataFrame of live births
    """
    hist = thinkstats2.Hist(live.prglngth)
    thinkplot.Hist(hist, label='live births')

    thinkplot.Save(root='first_nsfg_hist_live', 
                   title='Histogram',
                   xlabel='weeks',
                   ylabel='frequency')

    print('Shortest lengths:')
    for weeks, freq in hist.Smallest(10):
        print(weeks, freq)

    print('Longest lengths:')
    for weeks, freq in hist.Largest(10):
        print(weeks, freq)
Example #17
0
def MakeNormalModel(weights):
    """Plot the CDF of birthweights with a normal model."""

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(weights, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='analytic_birthwgt_model',
                   title='Birth weights',
                   xlabel='birth weight (lbs)',
                   ylabel='CDF')
Example #18
0
def PlotRemainingLifetime(sf1, sf2):
    """Plots remaining lifetimes for pregnancy and age at first marriage.

    sf1: SurvivalFunction for pregnancy length
    sf2: SurvivalFunction for age at first marriage
    """
    thinkplot.PrePlot(cols=2)
    rem_life1 = sf1.RemainingLifetime()
    thinkplot.Plot(rem_life1)
    thinkplot.Config(title='remaining pregnancy length',
                     xlabel='weeks',
                     ylabel='mean remaining weeks')

    thinkplot.SubPlot(2)
    func = lambda pmf: pmf.Percentile(50)
    rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func)
    thinkplot.Plot(rem_life2)
    thinkplot.Config(title='years until first marriage',
                     ylim=[0, 15],
                     xlim=[11, 31],
                     xlabel='age (years)',
                     ylabel='median remaining years')

    thinkplot.Save(root='survival6', formats=FORMATS)