def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_d = d[1] lady_d = d[2] # 男性的mu, var, sigma, 变异系数CV man_mu, man_var = thinkstats.TrimmedMeanVar(man_d) man_sigma = math.sqrt(man_var) man_cv = man_sigma/man_mu print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv)) # 女性的mu, var, sigma, 变异系数CV lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d) lady_sigma = math.sqrt(lady_var) lady_cv = lady_sigma/lady_mu print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv)) # 男性, 女性Hist分布 man_hist = Pmf.MakeHistFromList(man_d, name='man hist') myplot.Hist(man_hist) myplot.Show() myplot.Clf() lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist') myplot.Hist(lady_hist) myplot.Show() myplot.Clf() # 男性, 女性Pmf分布 man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf') myplot.Pmf(man_pmf) myplot.Show() myplot.Clf() lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf') myplot.Pmf(lady_pmf) myplot.Show() myplot.Clf() # 男性/女性Cdf累积分布 man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf') lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf') myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None) myplot.Show()
def MakeNormalPlot(values): """Makes a normal probability plot. Args: values: sequence of values lineoptions: dictionary of options for pyplot.plot options: dictionary of options for myplot.Save """ # TODO: when n is small, generate a larger sample and desample pyplot.clf() # compute parameters mu, var = thinkstats.TrimmedMeanVar(values, p=0.01) sigma = math.sqrt(var) n = len(values) # plot resampled data PlotSimulatedData(mu, sigma, n) # plot real data xs = GenerateNormalVariates(0, 1, n) pyplot.plot(sorted(xs), sorted(values), 'r.', markersize=3) myplot.Save(show=True, xlabel='Standard normal values', legend=False)
def MakeNormalModel(self, weights, root, xmax=175, xlabel='adult weight (kg)', axis=None): cdf = Cdf.MakeCdfFromList(weights) pyplot.clf() t = weights[:] t.sort() mu, var = thinkstats.TrimmedMeanVar(t) print('n, Mean, Var', len(weights), mu, var) sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = continuous.RenderNormalCdf(mu, sigma, xmax) pyplot.plot(xs, ps, label='model', linewidth=4, color='0.7') xs, ps = cdf.Render() pyplot.plot(xs, ps, label='data', linewidth=2, color='blue') myplot.Save(root, title = 'Adult weight', xlabel = xlabel, ylabel = 'CDF', axis=axis or [0, xmax, 0, 1])
def SummarizeHeight(self): """Print summary statistics for male and female height.""" # make a dictionary that maps from gender code to list of heights d = {1: [], 2: [], 'all': []} [d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA'] [d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA'] for key, t in d.iteritems(): mu, var = thinkstats.TrimmedMeanVar(t) sigma = math.sqrt(var) cv = sigma / mu print key, len(t), mu, var, sigma, cv
def SummarizeWeight(self): """Print summary statistics for male and female weight.""" # make a dictionary that maps from gender code to list of weights d = {1: [], 2: [], 'all': []} [d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA'] [d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA'] print('Weight (kg):') print('key n mean var sigma cv') for key, t in d.items(): mu, var = thinkstats.TrimmedMeanVar(t) sigma = math.sqrt(var) cv = sigma / mu print(key, len(t), mu, var, sigma, cv)
def MakeNormalModel(values): """Plot the CDF of birthweights with a normal model.""" # estimate parameters: trimming outliers yields a better fit mu, var = thinkstats.TrimmedMeanVar(values, p=0.01) print 'Mean, Var', mu, var # plot the model sigma = math.sqrt(var) print 'Sigma', sigma xs, ps = RenderNormalCdf(mu, sigma, 200) pyplot.clf() pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8') # plot the data cdf = Cdf.MakeCdfFromList(values) xs, ps = cdf.Render() pyplot.plot(xs, ps, label='data', linewidth=2, color='red') myplot.Save(show=True, ylabel = 'CDF')
def MakeNormalModel(weights): """Plot the CDF of birthweights with a normal model.""" # estimate parameters: trimming outliers yields a better fit mu, var = thinkstats.TrimmedMeanVar(weights, p=0.01) print('Mean, Var', mu, var) # plot the model sigma = math.sqrt(var) print('Sigma', sigma) xs, ps = RenderNormalCdf(mu, sigma, 200) pyplot.clf() pyplot.plot(xs, ps, label='model', linewidth=4, color='0.8') # plot the data cdf = Cdf.MakeCdfFromList(weights) xs, ps = cdf.Render() pyplot.plot(xs, ps, label='data', linewidth=2, color='blue') myplot.Save('nsfg_birthwgt_model', title='Birth weights', xlabel='birth weight (oz)', ylabel='CDF')