def testRespondents(self): resp = brfss.Respondents() resp.ReadRecords(n=10000) self.assertEquals(len(resp.records), 10000) hist = MakeHist(resp, 'wtkg2') t = hist.Values() low, high = min(t), max(t) self.assertAlmostEquals(low, 22.73) self.assertEquals(hist.Freq('NA'), 343) hist = MakeHist(resp, 'weight2') t = hist.Values() low, high = min(t), max(t) self.assertAlmostEquals(low, 22.727272727) self.assertEquals(hist.Freq('NA'), 343) hist = MakeHist(resp, 'wtyrago') t = hist.Values() low, high = min(t), max(t) self.assertAlmostEquals(low, 27.27272727) self.assertEquals(hist.Freq('NA'), 616) hist = MakeHist(resp, 'htm3') t = hist.Values() low, high = min(t), max(t) self.assertAlmostEquals(low, 104) self.assertEquals(hist.Freq('NA'), 101) hist = MakeHist(resp, 'sex') self.assertEquals(hist.Freq(1), 3669) self.assertEquals(hist.Freq(2), 6331)
def DumpHeights(data_dir='.', n=10000): """Read the BRFSS dataset, extract the heights and pickle them.""" resp = brfss.Respondents() resp.ReadRecords(data_dir, n) d = {1: [], 2: []} [d[r.sex].append(r.htm3) for r in resp.records if r.htm3 != 'NA'] fp = open('variability_data.pkl', 'wb') pickle.dump(d, fp) fp.close()
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') heights, weights = resp.GetHeightAndWeight() r1 = Corr(heights, weights) # 方法2 r2 = correlation.Corr(heights, weights) print(r1, "vs", r2) print("E = 1 vs", Corr(heights, heights))
def main(): # test t = [7, 1, 2, 5] print(ToRanks(t)) # brfss resp = brfss.Respondents() resp.ReadRecords(data_dir='res') heights, weights = resp.GetHeightAndWeight() r1 = SpearmanCorr(heights, weights) r2 = correlation.SpearmanCorr(heights, weights) print("r1, r2 = ", r1, r2)
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_d = d[1] lady_d = d[2] # 男性的mu, var, sigma, 变异系数CV man_mu, man_var = thinkstats.TrimmedMeanVar(man_d) man_sigma = math.sqrt(man_var) man_cv = man_sigma/man_mu print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv)) # 女性的mu, var, sigma, 变异系数CV lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d) lady_sigma = math.sqrt(lady_var) lady_cv = lady_sigma/lady_mu print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv)) # 男性, 女性Hist分布 man_hist = Pmf.MakeHistFromList(man_d, name='man hist') myplot.Hist(man_hist) myplot.Show() myplot.Clf() lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist') myplot.Hist(lady_hist) myplot.Show() myplot.Clf() # 男性, 女性Pmf分布 man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf') myplot.Pmf(man_pmf) myplot.Show() myplot.Clf() lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf') myplot.Pmf(lady_pmf) myplot.Show() myplot.Clf() # 男性/女性Cdf累积分布 man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf') lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf') myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None) myplot.Show()
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_l = d[1] cdf = Cdf.MakeCdfFromList(man_l, name='man height') mu = cdf.Mean() var = cdf.Var(mu=mu) sigma = math.sqrt(var) print("man height: mean = %.3f var = %.3f sigma = %.3f" % (mu, var, sigma)) myplot.Cdf(cdf, complement=False, transform=None) myplot.Show() v = (cdf.Prob(185) - cdf.Prob(178)) * 100 print("178 - 185: %.3f%%" % v)
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') heights, weights = resp.GetHeightAndWeight() c1 = Cov(heights, weights) c2 = Cov(heights, heights) _, var = thinkstats.MeanVar(heights) print(c1, c2, var) print("-------------- ") # 官方方法2 c3 = correlation.Cov(heights, weights) c4 = correlation.Cov(heights, heights) print(c3, c4, var)
import Cdf import math import myplot import numpy import brfss resp = brfss.Respondents() resp.ReadRecords('.') filtered_records = filter(lambda (x): x.wtkg2 != 'NA', resp.records) xs = sorted(numpy.random.normal(0, 1, len(filtered_records))) ys = sorted(map(lambda (y): y.wtkg2, filtered_records)) logys = sorted(map(lambda (y): math.log10(y.wtkg2), filtered_records)) myplot.Plot(xs, ys) myplot.Show(title="Normal plot for weight") myplot.Plot(xs, logys) myplot.Show(title="Normal plot for log(weight)")