def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # test weight_comparison weight_comparison(live, firsts, others) preg_length_comparison(live, firsts, others) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert mode == 39, mode # test AllModes modes = AllModes(hist) assert modes[0][1] == 4693, modes[0][1] for value, freq in modes[:5]: print(value, freq) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert mode == 39, mode # test AllModes modes = AllModes(hist) assert modes[0][1] == 4693, modes[0][1] for value, freq in modes[:5]: print(value, freq) firsts_wgt = firsts.totalwgt_lb.mean() others_wgt = others.totalwgt_lb.mean() print("firsts = {} pounds, others = {} pounds, dif = {} pounds ".format( firsts_wgt, others_wgt, firsts_wgt - others_wgt)) d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print(d) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert(mode == 39) # test AllModes modes = AllModes(hist) assert(modes[0][1] == 4693) for value, freq in modes[:5]: print(value, freq) d1 = WeightDifferences(firsts, others, live) print("Cohens'd Effect of weight differences:", d1) d2 = PregnancyLengthDifferences(firsts, others) print("Cohens'd Effect of pregnancy length differences:", d2) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() pmf = thinkstats2.Pmf(live.prglngth) # test Mode mean = PmfMean(pmf) print('Mean of preg length', mean) assert mean == pmf.Mean(), mean variance = PmfVar(pmf) print('Variance of preg length', variance) assert variance == pmf.Var(), variance # test AllModes # modes = AllModes(hist) # assert modes[0][1] == 4693, modes[0][1] # for value, freq in modes[:5]: # print(value, freq) print('%s: All tests passed.' % script)
def main(): thinkstats2.RandomSeed(18) live, firsts, others = first.MakeFrames() n = len(live) for _ in range(7): sample = thinkstats2.SampleRows(live, n) RunTests(sample) n //= 2
def main(): live, firsts, others = first.MakeFrames() diffs = PairwiseDiff(live) mean = thinkstats2.Mean(diffs) print('Mean: ', mean) pmf = thinkstats2.Pmf(diffs) thinkplot.Hist(pmf) thinkplot.Show(xlabel='Diff in wks', ylabel='PMF')
def main(name, data_dir=''): thinkstats2.RandomSeed(17) MakeExample() live, firsts, others = first.MakeFrames() RandomFigure(live) TestSample(live) MakeCdf(live) MakeFigures(live, firsts, others)
def main(): thinkstats2.RandomSeed(17) live, firsts, others = first.MakeFrames() PlotAdultWeights(live) PlotPregLengths(live, firsts, others) TestIntervention()
def main(): thinkstats2.RandomSeed(17) live, _, _ = first.MakeFrames() EstimateBirthWeight(live) live = live.dropna(subset=['agepreg', 'totalwgt_lb']) PlotSamplingDistributions(live) PlotFit(live) PlotResiduals(live)
def main(): #random seed saves the random samples thinkstats2.RandomSeed(23) live, firsts, others = first.MakeFrames() RunResampleTest(firsts, others) n = len(live) for _ in range(7): sample = thinkstats2.SampleRows(live, n) RunTests(sample) n //= 2
def main(name, data_dir='.'): thinkstats2.RandomSeed(17) LogisticRegressionExample() live, firsts, others = first.MakeFrames() live['isfirst'] = (live.birthord == 1) RunLogisticModels(live) RunSimpleRegression(live) RunModels(live) PredictBirthWeight(live)
def main(script): thinkstats2.RandomSeed(17) live, firsts, others = first.MakeFrames() live = live.dropna(subset=['agepreg', 'totalwgt_lb']) BinnedPercentiles(live) ages = live.agepreg weights = live.totalwgt_lb print('thinkstats2 Corr', thinkstats2.Corr(ages, weights)) print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(ages, weights)) ScatterPlot(ages, weights, alpha=0.1) thinkplot.Save(root='chap07scatter1', legend=False, formats=['jpg'])
def main(script): random.seed(100) np.random.seed(100) # run the coin test ct = CoinTest((140, 110)) pvalue = ct.PValue() print("coin test p-value", pvalue) # compare pregnancy lengths print("\nprglngth") live, firsts, others = first.MakeFrames() data = firsts.prglngth.values, others.prglngth.values RunTests(data) # compare birth weights print("\nbirth weight") data = (firsts.totalwgt_lb.dropna().values, others.totalwgt_lb.dropna().values) ht = DiffMeansPermute(data) p_value = ht.PValue(iters=1000) print("means permute two-sided") PrintTest(p_value, ht) # test correlation live2 = live.dropna(subset=["agepreg", "totalwgt_lb"]) data = live2.agepreg.values, live2.totalwgt_lb.values ht = CorrelationPermute(data) p_value = ht.PValue() print("\nage weight correlation") print("n=", len(live2)) PrintTest(p_value, ht) # run the dice test RunDiceTest() # compare pregnancy lengths (chi-squared) data = firsts.prglngth.values, others.prglngth.values ht = PregLengthTest(data) p_value = ht.PValue() print("\npregnancy length chi-squared") PrintTest(p_value, ht) # compute the false negative rate for difference in pregnancy length data = firsts.prglngth.values, others.prglngth.values neg_rate = FalseNegRate(data) print("false neg rate", neg_rate) # run the tests with new nsfg data ReplicateTests()
def main(): thinkstats2.RandomSeed(17) # run the coin test ct = CoinTest((140, 110)) pvalue = ct.PValue() print('coin test p-value', pvalue) # compare pregnancy lengths print('\nprglngth') live, firsts, others = first.MakeFrames() data = firsts.prglngth.values, others.prglngth.values RunTests(data) # compare birth weights print('\nbirth weight') data = (firsts.totalwgt_lb.dropna().values, others.totalwgt_lb.dropna().values) ht = DiffMeansPermute(data) p_value = ht.PValue(iters=1000) print('means permute two-sided') PrintTest(p_value, ht) # test correlation live2 = live.dropna(subset=['agepreg', 'totalwgt_lb']) data = live2.agepreg.values, live2.totalwgt_lb.values ht = CorrelationPermute(data) p_value = ht.PValue() print('\nage weight correlation') print('n=', len(live2)) PrintTest(p_value, ht) # run the dice test RunDiceTest() # compare pregnancy lengths (chi-squared) data = firsts.prglngth.values, others.prglngth.values ht = PregLengthTest(data) p_value = ht.PValue() print('\npregnancy length chi-squared') PrintTest(p_value, ht) # compute the false negative rate for difference in pregnancy length data = firsts.prglngth.values, others.prglngth.values neg_rate = FalseNegRate(data) print('false neg rate', neg_rate) # run the tests with new nsfg data ReplicateTests()
def main(): thinkstats2.RandomSeed(17) MakeCltPlots() print('Gorilla example') dist = Normal(90, 7.5**2) print(dist) dist_xbar = dist.Sum(9) / 9 print(dist_xbar.sigma) print(dist_xbar.Percentile(5), dist_xbar.Percentile(95)) live, firsts, others = first.MakeFrames() TestCorrelation(live) PlotPregLengths(live, firsts, others) TestChiSquared()
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() # test PmfMean and PmfVar function we created prglngth = live.prglngth pmf = thinkstats2.Pmf(prglngth) mean = PmfMean(pmf) var = PmfVar(pmf, mean) assert (mean == pmf.Mean()) assert (var == pmf.Var()) print('mean/var preg length', mean, var) print('%s: All tests passed.' % script)
def main(): thinkstats2.RandomSeed(17) # get the data live, firsts, others = first.MakeFrames() mean_var = thinkstats2.MeanVar(live.prglngth) print('(Mean, Var) of prglength for live births', mean_var) data = firsts.prglngth.values, others.prglngth.values # test the difference in means ht = DiffMeansPermute(data) p_value = ht.PValue(iters=1000) print('p-value =', p_value) ht.PlotCdf() thinkplot.Save(root='hypothesis1', title='Permutation test', xlabel='difference in means (weeks)', ylabel='CDF', legend=False) # test the difference in std ht = DiffStdPermute(data) p_value = ht.PValue(iters=1000) print('p-value =', p_value) ht.PlotCdf() thinkplot.Save(root='hypothesis2', title='Permutation test', xlabel='difference in std (weeks)', ylabel='CDF', legend=False) # test the difference in means by resampling ht = DiffStdPermute(data) p_value = ht.PValue(iters=1000) print('p-value =', p_value) ht.PlotCdf() thinkplot.Save(root='hypothesis3', title='Resampling test', xlabel='difference in means (weeks)', ylabel='CDF', legend=False)
def ComputeSkewnesses(): """Plots KDE of birthweight and adult weight. """ def VertLine(x, y): thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1) live, firsts, others = first.MakeFrames() data = live.totalwgt_lb.dropna() print('Birth weight') mean, median = Summarize(data) y = 0.35 VertLine(mean, y) thinkplot.Text(mean - 0.15, 0.1 * y, 'mean', horizontalalignment='right') VertLine(median, y) thinkplot.Text(median + 0.1, 0.1 * y, 'median', horizontalalignment='left') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='birth weight') thinkplot.Save(root='density_totalwgt_kde', xlabel='lbs', ylabel='PDF') df = brfss.ReadBrfss(nrows=None) data = df.wtkg2.dropna() print('Adult weight') mean, median = Summarize(data) y = 0.02499 VertLine(mean, y) thinkplot.Text(mean + 1, 0.1 * y, 'mean', horizontalalignment='left') VertLine(median, y) thinkplot.Text(median - 1.5, 0.1 * y, 'median', horizontalalignment='right') pdf = thinkstats2.EstimatedPdf(data) thinkplot.Pdf(pdf, label='adult weight') thinkplot.Save(root='density_wtkg2_kde', xlabel='kg', ylabel='PDF', xlim=[0, 200])
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) mean1 = firsts.totalwgt_lb.mean() mean2 = others.totalwgt_lb.mean() var1 = firsts.totalwgt_lb.var() var2 = others.totalwgt_lb.var() print('Mean Weight') print('First babies', mean1) print('Others babies', mean2) print('Variance in Weight') print('First babies', var1) print('Others babies', var2) print('Difference in lbs', mean1 - mean2) cohen_d = thinkstats2.CohenEffectSize(firsts.totalwgt_lb, others.totalwgt_lb) print('Cohen d', cohen_d) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert mode == 39, mode # test AllModes modes = AllModes(hist) assert modes[0][1] == 4693, modes[0][1] for value, freq in modes[:5]: print(value, freq) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() # test PmfMean and PmfVar prglngth = live.prglngth pmf = thinkstats2.Pmf(prglngth) mean = PmfMean(pmf) var = PmfVar(pmf) assert (mean == pmf.Mean()) assert (var == pmf.Var()) print('# capture the mean value') print('Mean Value length', mean) print('# capture the var value') print('Var Value length', var) print('# capture the mean/ preg length value') print('mean/var preg length', mean, var) print('%s: All tests passed.' % script)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # explore the weight difference between first babies and others WeightDifference(live, firsts, others) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert(mode == 39) # test AllModes modes = AllModes(hist) assert(modes[0][1] == 4693) for value, freq in modes[:5]:ies print(value, freq)
def main(script): """Tests the functions in this module. script: string script name """ live, firsts, others = first.MakeFrames() hist = thinkstats2.Hist(live.prglngth) # test Mode mode = Mode(hist) print('Mode of preg length', mode) assert (mode == 39) # test AllModes modes = AllModes(hist) print(modes) assert (modes[0][1] == 4693) for value, freq in modes[:5]: print(value, freq) print('%s: All tests passed.' % script) print("Cohen's d:", cohen_d(firsts.totalwgt_lb, others.totalwgt_lb))
def ComputeSkewnesses(): def VertLine(x, y): myplots.Plot([x, x], [0, y], color="0.6", linewidth=1) live, firsts, others = first.MakeFrames() data = live.totalwgt_lb.dropna() print("Birth weight") mean, median = Summarize(data) y = 0.35 VertLine(mean, y) myplots.Text(mean - 0.15, 0.1 * y, "mean", horizontalalignment="right") VertLine(median, y) myplots.Text(median + 0.1, 0.1 * y, "median", horizontalalignment="left") pdf = mystats.EstimatedPdf(data) myplots.Pdf(pdf, label="birth weight") myplots.Save(root="density_totalwgt_kde", xlabel="lbs", ylabel="PDF") df = brfss.ReadBrfss(nrows=None) data = df.wtkg2.dropna() print("Adult weight") mean, median = Summarize(data) y = 0.02499 VertLine(mean, y) myplots.Text(mean + 1, 0.1 * y, "mean", horizontalalignment="left") VertLine(median, y) myplots.Text(median - 1.5, 0.1 * y, "median", horizontalalignment="right") pdf = mystats.EstimatedPdf(data) myplots.Pdf(pdf, label="adult weight") myplots.Save(root="density_wtkg2_kde", xlabel="kg", ylabel="PDF", xlim=[0, 200])
[Think Stats Chapter 7 Exercise 1](http://greenteapress.com/thinkstats2/html/thinkstats2008.html#toc70) (weight vs. age) # imports from __future__ import print_function, division %matplotlib inline import numpy as np import thinkstats2 import thinkplot # get data import first live, firsts, others = first.MakeFrames() live = live.dropna(subset=['agepreg', 'totalwgt_lb']) ages = live.agepreg weights = live.totalwgt_lb # make a scatter plot of birth weight versus mother’s age thinkplot.Scatter(ages, weights, alpha=1, s=10) thinkplot.Config(xlabel='Age (years)', ylabel='Weight (lbs)', xlim=[10, 45], ylim=[0, 15], legend=False) # RESULTS: messy plot # Plot percentiles of birth weight versus mother’s age bins = np.arange(10, 45, 5) indices = np.digitize(live.agepreg, bins) groups = live.groupby(indices) # binned mother's age
def COHEN(Frame1, Frame2): CD = (Frame1.mean() - Frame2.mean()) SD = ((Frame1.std()**2 + Frame2.std()**2) / 2)**.5 CD = CD / SD return CD import thinkstats2, first live, firsts, others = first.MakeFrames( ) #Gets data from dataframe python. taken from solutions obvs TotalW = live.totalwgt_lb FirstW = firsts.totalwgt_lb OtherW = others.totalwgt_lb MyCohen = COHEN(FirstW, OtherW) NotMyCohen = thinkstats2.CohenEffectSize(FirstW, OtherW) FirstPMean = firsts.prglngth.mean() OtherPMean = others.prglngth.mean() print("the mean weight of first babies is " + str(FirstW.mean()) + " lbs") print("the mean weight of other babies is " + str(OtherW.mean()) + " lbs") print("the Cohen's d between the two sets is " + str(MyCohen)) print("the mean pregnancy length of first babies was " + str(FirstPMean) + " weeks") print("the mean pregnancy length of other babies was " + str(OtherPMean) + " weeks") print("the difference between the pregnancy lengths was " + str(FirstPMean - OtherPMean))
def main(script): live, firsts, others = first.MakeFrames() make_figures(firsts, others) make_hists(live) class_sizes()
import thinkplot def Diffs(t): first = t[0] rest = t[1:] diffs = [first - x for x in rest] return diffs def PairWiseDifference(live): live = live[live.prglngth >= 37] preg_map = nsfg.MakePregMap(live) diffs = [] for caseid, indicies in preg_map.items(): lengths = live.loc[indicies].prglngth.values if len(lengths) >= 2: diffs.extend(Diffs(lengths)) return diffs if __name__ == '__main__': live, first, other = first.MakeFrames() diffs = PairWiseDifference(live) mean = thinkstats2.Mean(diffs) print('Mean difference between pairs', mean) pmf = thinkstats2.Pmf(diffs) thinkplot.Hist(pmf, align="center") thinkplot.Show(xlabel='Difference in weeks', ylabel='PMF')
def main(script): live, firsts, others = first.MakeFrames() MakeFigures(firsts, others) MakeHists(live) ClassSizes()
def main(script): live, firsts, others = first.MakeFrames() Summarize(live, firsts, others) MakeFigures(firsts, others)