Ejemplo n.º 1
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # test weight_comparison
    weight_comparison(live, firsts, others)
    preg_length_comparison(live, firsts, others)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert mode == 39, mode

    # test AllModes
    modes = AllModes(hist)
    assert modes[0][1] == 4693, modes[0][1]

    for value, freq in modes[:5]:
        print(value, freq)

    print('%s: All tests passed.' % script)
Ejemplo n.º 2
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert mode == 39, mode

    # test AllModes
    modes = AllModes(hist)
    assert modes[0][1] == 4693, modes[0][1]

    for value, freq in modes[:5]:
        print(value, freq)

    firsts_wgt = firsts.totalwgt_lb.mean()
    others_wgt = others.totalwgt_lb.mean()
    print("firsts = {} pounds, others = {} pounds, dif = {} pounds ".format(
        firsts_wgt, others_wgt, firsts_wgt - others_wgt))

    d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb)
    print(d)

    print('%s: All tests passed.' % script)
Ejemplo n.º 3
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # test Mode    
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert(mode == 39)

    # test AllModes
    modes = AllModes(hist)
    assert(modes[0][1] == 4693)

    for value, freq in modes[:5]:
        print(value, freq)

    d1 = WeightDifferences(firsts, others, live)
    print("Cohens'd Effect of weight differences:", d1)

    d2 = PregnancyLengthDifferences(firsts, others)
    print("Cohens'd Effect of pregnancy length differences:", d2)

    print('%s: All tests passed.' % script)
Ejemplo n.º 4
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    pmf = thinkstats2.Pmf(live.prglngth)

    # test Mode
    mean = PmfMean(pmf)
    print('Mean of preg length', mean)
    assert mean == pmf.Mean(), mean

    variance = PmfVar(pmf)
    print('Variance of preg length', variance)
    assert variance == pmf.Var(), variance

    # test AllModes
    # modes = AllModes(hist)
    # assert modes[0][1] == 4693, modes[0][1]

    # for value, freq in modes[:5]:
    # print(value, freq)

    print('%s: All tests passed.' % script)
def main():
    thinkstats2.RandomSeed(18)
    live, firsts, others = first.MakeFrames()
    n = len(live)
    for _ in range(7):
        sample = thinkstats2.SampleRows(live, n)
        RunTests(sample)
        n //= 2
Ejemplo n.º 6
0
def main():
    live, firsts, others = first.MakeFrames()
    diffs = PairwiseDiff(live)
    mean = thinkstats2.Mean(diffs)
    print('Mean: ', mean)
    pmf = thinkstats2.Pmf(diffs)
    thinkplot.Hist(pmf)
    thinkplot.Show(xlabel='Diff in wks', ylabel='PMF')
Ejemplo n.º 7
0
def main(name, data_dir=''):
    thinkstats2.RandomSeed(17)

    MakeExample()
    live, firsts, others = first.MakeFrames()
    RandomFigure(live)
    TestSample(live)
    MakeCdf(live)
    MakeFigures(live, firsts, others)
Ejemplo n.º 8
0
def main():
    thinkstats2.RandomSeed(17)

    live, firsts, others = first.MakeFrames()
    PlotAdultWeights(live)

    PlotPregLengths(live, firsts, others)

    TestIntervention()
Ejemplo n.º 9
0
def main():
    thinkstats2.RandomSeed(17)

    live, _, _ = first.MakeFrames()
    EstimateBirthWeight(live)

    live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    PlotSamplingDistributions(live)

    PlotFit(live)
    PlotResiduals(live)
Ejemplo n.º 10
0
def main():
    #random seed saves the random samples
    thinkstats2.RandomSeed(23)
    live, firsts, others = first.MakeFrames()
    RunResampleTest(firsts, others)

    n = len(live)
    for _ in range(7):
        sample = thinkstats2.SampleRows(live, n)
        RunTests(sample)
        n //= 2
def main(name, data_dir='.'):
    thinkstats2.RandomSeed(17)
    LogisticRegressionExample()

    live, firsts, others = first.MakeFrames()
    live['isfirst'] = (live.birthord == 1)

    RunLogisticModels(live)

    RunSimpleRegression(live)
    RunModels(live)

    PredictBirthWeight(live)
def main(script):
    thinkstats2.RandomSeed(17)

    live, firsts, others = first.MakeFrames()
    live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    BinnedPercentiles(live)

    ages = live.agepreg
    weights = live.totalwgt_lb
    print('thinkstats2 Corr', thinkstats2.Corr(ages, weights))
    print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(ages, weights))

    ScatterPlot(ages, weights, alpha=0.1)
    thinkplot.Save(root='chap07scatter1', legend=False, formats=['jpg'])
Ejemplo n.º 13
0
def main(script):
    random.seed(100)
    np.random.seed(100)

    # run the coin test
    ct = CoinTest((140, 110))
    pvalue = ct.PValue()
    print("coin test p-value", pvalue)

    # compare pregnancy lengths
    print("\nprglngth")
    live, firsts, others = first.MakeFrames()
    data = firsts.prglngth.values, others.prglngth.values
    RunTests(data)

    # compare birth weights
    print("\nbirth weight")
    data = (firsts.totalwgt_lb.dropna().values,
            others.totalwgt_lb.dropna().values)
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print("means permute two-sided")
    PrintTest(p_value, ht)

    # test correlation
    live2 = live.dropna(subset=["agepreg", "totalwgt_lb"])
    data = live2.agepreg.values, live2.totalwgt_lb.values
    ht = CorrelationPermute(data)
    p_value = ht.PValue()
    print("\nage weight correlation")
    print("n=", len(live2))
    PrintTest(p_value, ht)

    # run the dice test
    RunDiceTest()

    # compare pregnancy lengths (chi-squared)
    data = firsts.prglngth.values, others.prglngth.values
    ht = PregLengthTest(data)
    p_value = ht.PValue()
    print("\npregnancy length chi-squared")
    PrintTest(p_value, ht)

    # compute the false negative rate for difference in pregnancy length
    data = firsts.prglngth.values, others.prglngth.values
    neg_rate = FalseNegRate(data)
    print("false neg rate", neg_rate)

    # run the tests with new nsfg data
    ReplicateTests()
Ejemplo n.º 14
0
def main():
    thinkstats2.RandomSeed(17)

    # run the coin test
    ct = CoinTest((140, 110))
    pvalue = ct.PValue()
    print('coin test p-value', pvalue)

    # compare pregnancy lengths
    print('\nprglngth')
    live, firsts, others = first.MakeFrames()
    data = firsts.prglngth.values, others.prglngth.values
    RunTests(data)

    # compare birth weights
    print('\nbirth weight')
    data = (firsts.totalwgt_lb.dropna().values,
            others.totalwgt_lb.dropna().values)
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print('means permute two-sided')
    PrintTest(p_value, ht)

    # test correlation
    live2 = live.dropna(subset=['agepreg', 'totalwgt_lb'])
    data = live2.agepreg.values, live2.totalwgt_lb.values
    ht = CorrelationPermute(data)
    p_value = ht.PValue()
    print('\nage weight correlation')
    print('n=', len(live2))
    PrintTest(p_value, ht)

    # run the dice test
    RunDiceTest()

    # compare pregnancy lengths (chi-squared)
    data = firsts.prglngth.values, others.prglngth.values
    ht = PregLengthTest(data)
    p_value = ht.PValue()
    print('\npregnancy length chi-squared')
    PrintTest(p_value, ht)

    # compute the false negative rate for difference in pregnancy length
    data = firsts.prglngth.values, others.prglngth.values
    neg_rate = FalseNegRate(data)
    print('false neg rate', neg_rate)

    # run the tests with new nsfg data
    ReplicateTests()
Ejemplo n.º 15
0
def main():
    thinkstats2.RandomSeed(17)

    MakeCltPlots()

    print('Gorilla example')
    dist = Normal(90, 7.5**2)
    print(dist)
    dist_xbar = dist.Sum(9) / 9
    print(dist_xbar.sigma)
    print(dist_xbar.Percentile(5), dist_xbar.Percentile(95))

    live, firsts, others = first.MakeFrames()
    TestCorrelation(live)
    PlotPregLengths(live, firsts, others)

    TestChiSquared()
Ejemplo n.º 16
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()

    # test PmfMean and PmfVar function we created
    prglngth = live.prglngth
    pmf = thinkstats2.Pmf(prglngth)
    mean = PmfMean(pmf)
    var = PmfVar(pmf, mean)

    assert (mean == pmf.Mean())
    assert (var == pmf.Var())

    print('mean/var preg length', mean, var)
    print('%s: All tests passed.' % script)
Ejemplo n.º 17
0
def main():
    thinkstats2.RandomSeed(17)

    # get the data
    live, firsts, others = first.MakeFrames()
    mean_var = thinkstats2.MeanVar(live.prglngth)
    print('(Mean, Var) of prglength for live births', mean_var)
    data = firsts.prglngth.values, others.prglngth.values

    # test the difference in means
    ht = DiffMeansPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis1',
                   title='Permutation test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in std
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis2',
                   title='Permutation test',
                   xlabel='difference in std (weeks)',
                   ylabel='CDF',
                   legend=False)

    # test the difference in means by resampling
    ht = DiffStdPermute(data)
    p_value = ht.PValue(iters=1000)
    print('p-value =', p_value)

    ht.PlotCdf()
    thinkplot.Save(root='hypothesis3',
                   title='Resampling test',
                   xlabel='difference in means (weeks)',
                   ylabel='CDF',
                   legend=False)
Ejemplo n.º 18
0
def ComputeSkewnesses():
    """Plots KDE of birthweight and adult weight.
    """
    def VertLine(x, y):
        thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print('Birth weight')
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right')
    VertLine(median, y)
    thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='birth weight')
    thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF')

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print('Adult weight')
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left')
    VertLine(median, y)
    thinkplot.Text(median - 1.5,
                   0.1 * y,
                   'median',
                   horizontalalignment='right')

    pdf = thinkstats2.EstimatedPdf(data)
    thinkplot.Pdf(pdf, label='adult weight')
    thinkplot.Save(root='density_wtkg2_kde',
                   xlabel='kg',
                   ylabel='PDF',
                   xlim=[0, 200])
Ejemplo n.º 19
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    mean1 = firsts.totalwgt_lb.mean()
    mean2 = others.totalwgt_lb.mean()

    var1 = firsts.totalwgt_lb.var()
    var2 = others.totalwgt_lb.var()

    print('Mean Weight')
    print('First babies', mean1)
    print('Others babies', mean2)

    print('Variance in Weight')
    print('First babies', var1)
    print('Others babies', var2)

    print('Difference in lbs', mean1 - mean2)

    cohen_d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb,
                                          others.totalwgt_lb)
    print('Cohen d', cohen_d)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert mode == 39, mode

    # test AllModes
    modes = AllModes(hist)
    assert modes[0][1] == 4693, modes[0][1]

    for value, freq in modes[:5]:
        print(value, freq)

    print('%s: All tests passed.' % script)
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()

    # test PmfMean and PmfVar
    prglngth = live.prglngth
    pmf = thinkstats2.Pmf(prglngth)
    mean = PmfMean(pmf)
    var = PmfVar(pmf)

    assert (mean == pmf.Mean())
    assert (var == pmf.Var())
    print('# capture the mean value')
    print('Mean Value length', mean)
    print('# capture the var value')
    print('Var Value length', var)
    print('# capture the mean/ preg length value')
    print('mean/var preg length', mean, var)
    print('%s: All tests passed.' % script)
Ejemplo n.º 21
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # explore the weight difference between first babies and others
    WeightDifference(live, firsts, others)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert(mode == 39)

    # test AllModes
    modes = AllModes(hist)
    assert(modes[0][1] == 4693)

    for value, freq in modes[:5]:ies
        print(value, freq)
Ejemplo n.º 22
0
def main(script):
    """Tests the functions in this module.

    script: string script name
    """
    live, firsts, others = first.MakeFrames()
    hist = thinkstats2.Hist(live.prglngth)

    # test Mode
    mode = Mode(hist)
    print('Mode of preg length', mode)
    assert (mode == 39)

    # test AllModes
    modes = AllModes(hist)
    print(modes)
    assert (modes[0][1] == 4693)

    for value, freq in modes[:5]:
        print(value, freq)

    print('%s: All tests passed.' % script)
    print("Cohen's d:", cohen_d(firsts.totalwgt_lb, others.totalwgt_lb))
Ejemplo n.º 23
0
def ComputeSkewnesses():
    def VertLine(x, y):
        myplots.Plot([x, x], [0, y], color="0.6", linewidth=1)

    live, firsts, others = first.MakeFrames()
    data = live.totalwgt_lb.dropna()
    print("Birth weight")
    mean, median = Summarize(data)

    y = 0.35
    VertLine(mean, y)
    myplots.Text(mean - 0.15, 0.1 * y, "mean", horizontalalignment="right")
    VertLine(median, y)
    myplots.Text(median + 0.1, 0.1 * y, "median", horizontalalignment="left")

    pdf = mystats.EstimatedPdf(data)
    myplots.Pdf(pdf, label="birth weight")
    myplots.Save(root="density_totalwgt_kde", xlabel="lbs", ylabel="PDF")

    df = brfss.ReadBrfss(nrows=None)
    data = df.wtkg2.dropna()
    print("Adult weight")
    mean, median = Summarize(data)

    y = 0.02499
    VertLine(mean, y)
    myplots.Text(mean + 1, 0.1 * y, "mean", horizontalalignment="left")
    VertLine(median, y)
    myplots.Text(median - 1.5, 0.1 * y, "median", horizontalalignment="right")

    pdf = mystats.EstimatedPdf(data)
    myplots.Pdf(pdf, label="adult weight")
    myplots.Save(root="density_wtkg2_kde",
                 xlabel="kg",
                 ylabel="PDF",
                 xlim=[0, 200])
Ejemplo n.º 24
0
[Think Stats Chapter 7 Exercise 1](http://greenteapress.com/thinkstats2/html/thinkstats2008.html#toc70) (weight vs. age)

# imports
from __future__ import print_function, division
%matplotlib inline
import numpy as np
import thinkstats2
import thinkplot

# get data
import first
live, firsts, others = first.MakeFrames()
live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
ages = live.agepreg
weights = live.totalwgt_lb

# make a scatter plot of birth weight versus mother’s age
thinkplot.Scatter(ages, weights, alpha=1, s=10)
thinkplot.Config(xlabel='Age (years)',
                 ylabel='Weight (lbs)',
                 xlim=[10, 45],
                 ylim=[0, 15],
                 legend=False)
# RESULTS: messy plot

# Plot percentiles of birth weight versus mother’s age
bins = np.arange(10, 45, 5)
indices = np.digitize(live.agepreg, bins)
groups = live.groupby(indices)
# binned mother's age
Ejemplo n.º 25
0
def COHEN(Frame1, Frame2):
    CD = (Frame1.mean() - Frame2.mean())
    SD = ((Frame1.std()**2 + Frame2.std()**2) / 2)**.5
    CD = CD / SD
    return CD


import thinkstats2, first

live, firsts, others = first.MakeFrames(
)  #Gets data from dataframe python. taken from solutions obvs

TotalW = live.totalwgt_lb
FirstW = firsts.totalwgt_lb
OtherW = others.totalwgt_lb

MyCohen = COHEN(FirstW, OtherW)
NotMyCohen = thinkstats2.CohenEffectSize(FirstW, OtherW)

FirstPMean = firsts.prglngth.mean()
OtherPMean = others.prglngth.mean()

print("the mean weight of first babies is " + str(FirstW.mean()) + " lbs")
print("the mean weight of other babies is " + str(OtherW.mean()) + " lbs")
print("the Cohen's d between the two sets is " + str(MyCohen))
print("the mean pregnancy length of first babies was " + str(FirstPMean) +
      " weeks")
print("the mean pregnancy length of other babies was " + str(OtherPMean) +
      " weeks")
print("the difference between the pregnancy lengths was " +
      str(FirstPMean - OtherPMean))
Ejemplo n.º 26
0
def main(script):
    live, firsts, others = first.MakeFrames()
    make_figures(firsts, others)
    make_hists(live)

    class_sizes()
Ejemplo n.º 27
0
import thinkplot


def Diffs(t):
    first = t[0]
    rest = t[1:]
    diffs = [first - x for x in rest]
    return diffs


def PairWiseDifference(live):
    live = live[live.prglngth >= 37]
    preg_map = nsfg.MakePregMap(live)
    diffs = []
    for caseid, indicies in preg_map.items():
        lengths = live.loc[indicies].prglngth.values
        if len(lengths) >= 2:
            diffs.extend(Diffs(lengths))
    return diffs


if __name__ == '__main__':
    live, first, other = first.MakeFrames()
    diffs = PairWiseDifference(live)
    mean = thinkstats2.Mean(diffs)
    print('Mean difference between pairs', mean)

    pmf = thinkstats2.Pmf(diffs)
    thinkplot.Hist(pmf, align="center")
    thinkplot.Show(xlabel='Difference in weeks', ylabel='PMF')
Ejemplo n.º 28
0
def main(script):
    live, firsts, others = first.MakeFrames()
    MakeFigures(firsts, others)
    MakeHists(live)

    ClassSizes()
Ejemplo n.º 29
0
def main(script):
    live, firsts, others = first.MakeFrames()
    Summarize(live, firsts, others)

    MakeFigures(firsts, others)