def EstimateGoals(lam, m): def VertLine(x, y=1): thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3) lams = [] for _ in range(m): goals = SimulateGame(lam) lams.append(goals) print('RMSE of Goals: ', estimation.RMSE(lams, lam)) print('Mean Error of Goals: ', estimation.MeanError(lams, lam)) cdf = thinkstats2.Cdf(lams) ci = cdf.Percentile(5), cdf.Percentile(95) VertLine(ci[0]) VertLine(ci[1]) thinkplot.Cdf(cdf) #thinkplot.Show(xlabel = 'Goals', ylabel = 'CumProb', title = 'Sampling Distribution, lam = ' + str(lam)) thinkplot.SaveFormat(root='Q9_sampling_dist', fmt='png', xlabel='Goals', ylabel='CumProb', title='Sampling Distribution, lam = ' + str(lam))
import nsfg import thinkstats2 import thinkplot import probability p = nsfg.ReadFemResp() act_pmf = thinkstats2.Pmf(p.numkdhh, label='actual') print(act_pmf) bias_pmf = probability.BiasPmf(act_pmf, label='observed') print(bias_pmf) print('Mean number of children, actual: ', act_pmf.Mean()) print('Mean number of children, biased: ', bias_pmf.Mean()) fig = thinkplot.Pmfs([act_pmf, bias_pmf]) #thinkplot.show(xlabel='No. of Children', ylabel='pmf') thinkplot.SaveFormat(root = 'act_vs_biased', fmt = 'png', xlabel = 'No. of Children', ylabel = 'pmf')
thinkstats2.RandomSeed(1) ns = np.arange(10, 1000, 10) stderrs = [] cis = [] cdfs = [] for n in ns: cdf, stderr, ci = Simulate_Sample(2, n, m=1000) cdfs.append(cdf) stderrs.append(stderr) cis.append(ci) print('Standard error, n = 10: ', stderrs[0]) print('Confidence interval, n = 10: ', cis[0]) idx = [i for i, x in enumerate(ns) if x == 100] print('Standard error, n = 100: ', stderrs[list(ns).index(100)]) print('Confidence interval, n = 100: ', cis[list(ns).index(100)]) print('Standard error, n = 1000: ', stderrs[-1]) print('Confidence interval, n = 1000: ', cis[-1]) thinkplot.Cdf(cdfs[0]) #thinkplot.Show(xlabel='x', ylabel='CumProb') thinkplot.SaveFormat(root='Q8_cdf', fmt='png', xlabel='x', ylabel='CumProb') thinkplot.Plot(ns, stderrs) #thinkplot.Show(xlabel='n', ylabel='Standard Error') thinkplot.SaveFormat(root='Q8_stderr', fmt='png', xlabel='n', ylabel='Standard Error')
import DataSet import pdb H155 = DataSet.DataSet('h155.pkl') df = H155.df errorlist = [] totalvars = len(H155.varnames) for index, var in enumerate(H155.varnames): print '\nPlotting %s, variable %d out of %d.' % (var, index + 1, totalvars) thisplot = df[df[var] > 0][var] try: thishist = ts2.Cdf(thisplot) tplt.Cdf(thishist) tplt.Config(title=var, ylabel='Probability', xlabel='Response') except Exception, e: print 'ERROR CREATING %s' % var errorlist.append((var, e)) else: tplt.SaveFormat('graphs/cdftests2/%s' % var, 'png') tplt.Clf() # Exclude error codes # Title the graphs # place counter in for loop print errorlist with open('graphs/cdftests2/errorlog.txt', 'wb+') as wrfile: for err in errorlist: wrfile.write("%s\n" % err)
import first import numpy as np live, firsts, others = first.MakeFrames() live = live.dropna(subset=['agepreg', 'totalwgt_lb']) rho = thinkstats2.Corr(live.agepreg, live.totalwgt_lb) rho_s = thinkstats2.SpearmanCorr(live.agepreg, live.totalwgt_lb) print('Pearson\'s Correlation, Mother\'s age and Birth weight: ', rho) print('Spearman\'s Rank Correlation, Mother\'s age and Birth weight: ', rho_s) thinkplot.LEGEND = False thinkplot.Scatter(live.agepreg, live.totalwgt_lb) #thinkplot.Show(xlabel = 'Mother\'s age', ylabel = 'Birth weight') thinkplot.SaveFormat(root='age_weight_scatter', fmt='png', xlabel='Mothers\'s age', ylabel='Birth weight') thinkplot.LEGEND = True bins = np.arange(10, 45, 2.5) indices = np.digitize(live.agepreg, bins) groups = live.groupby(indices) ages = [group.agepreg.mean() for i, group in groups] cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups] for percent in [75, 50, 25]: weights = [cdf.Percentile(percent) for cdf in cdfs] label = '%dth' % percent thinkplot.Plot(ages, weights, label=label) #thinkplot.Show(xlabel = 'Mother\'s age', ylabel = 'Birth weight') thinkplot.SaveFormat(root='age_weight_percentiles', fmt='png',
import nsfg import thinkstats2 import thinkplot import random import pandas as pd import numpy as np def rand_list(n): rands = [random.random() for _ in range(n)] return rands rands = rand_list(1000) pmf = thinkstats2.Pmf(rands) cdf = thinkstats2.Cdf(rands) thinkplot.Pmf(pmf) #thinkplot.Show(xlabel='x', ylabel='pmf') thinkplot.SaveFormat(root='Q4_2pmf', fmt='png', xlabel='x', ylabel='Prob') thinkplot.Cdf(cdf) #thinkplot.Show(xlabel = 'x', ylabel = 'cdf') thinkplot.SaveFormat(root='Q4_2cdf', fmt='png', xlabel='x', ylabel='CumProb')