Esempio n. 1
0
    def testRespondents(self):
        resp = brfss.Respondents()
        resp.ReadRecords(n=10000)
        self.assertEquals(len(resp.records), 10000)

        hist = MakeHist(resp, 'wtkg2')
        t = hist.Values()
        low, high = min(t), max(t)
        self.assertAlmostEquals(low, 22.73)
        self.assertEquals(hist.Freq('NA'), 343)

        hist = MakeHist(resp, 'weight2')
        t = hist.Values()
        low, high = min(t), max(t)
        self.assertAlmostEquals(low, 22.727272727)
        self.assertEquals(hist.Freq('NA'), 343)

        hist = MakeHist(resp, 'wtyrago')
        t = hist.Values()
        low, high = min(t), max(t)
        self.assertAlmostEquals(low, 27.27272727)
        self.assertEquals(hist.Freq('NA'), 616)

        hist = MakeHist(resp, 'htm3')
        t = hist.Values()
        low, high = min(t), max(t)
        self.assertAlmostEquals(low, 104)
        self.assertEquals(hist.Freq('NA'), 101)

        hist = MakeHist(resp, 'sex')
        self.assertEquals(hist.Freq(1), 3669)
        self.assertEquals(hist.Freq(2), 6331)
Esempio n. 2
0
def DumpHeights(data_dir='.', n=10000):
    """Read the BRFSS dataset, extract the heights and pickle them."""
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir, n)

    d = {1: [], 2: []}
    [d[r.sex].append(r.htm3) for r in resp.records if r.htm3 != 'NA']

    fp = open('variability_data.pkl', 'wb')
    pickle.dump(d, fp)
    fp.close()
Esempio n. 3
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    heights, weights = resp.GetHeightAndWeight()

    r1 = Corr(heights, weights)
    # 方法2
    r2 = correlation.Corr(heights, weights)
    print(r1, "vs", r2)

    print("E = 1 vs", Corr(heights, heights))
Esempio n. 4
0
def main():
    # test
    t = [7, 1, 2, 5]
    print(ToRanks(t))
    
    # brfss
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    heights, weights = resp.GetHeightAndWeight()

    r1 = SpearmanCorr(heights, weights)
    r2 = correlation.SpearmanCorr(heights, weights)
    print("r1, r2 = ", r1, r2)
Esempio n. 5
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()
Esempio n. 6
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()
    man_l = d[1]
    cdf = Cdf.MakeCdfFromList(man_l, name='man height')
    mu = cdf.Mean()
    var = cdf.Var(mu=mu)
    sigma = math.sqrt(var)
    print("man height: mean = %.3f var = %.3f sigma = %.3f" % (mu, var, sigma))
    myplot.Cdf(cdf, complement=False, transform=None)
    myplot.Show()
    v = (cdf.Prob(185) - cdf.Prob(178)) * 100
    print("178 - 185: %.3f%%" % v)
Esempio n. 7
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    heights, weights = resp.GetHeightAndWeight()
    c1 = Cov(heights, weights)
    c2 = Cov(heights, heights)
    _, var = thinkstats.MeanVar(heights)
    print(c1, c2, var)

    print("-------------- ")
    # 官方方法2
    c3 = correlation.Cov(heights, weights)
    c4 = correlation.Cov(heights, heights)
    print(c3, c4, var)
Esempio n. 8
0
import Cdf
import math
import myplot
import numpy
import brfss

resp = brfss.Respondents()
resp.ReadRecords('.')
filtered_records = filter(lambda (x): x.wtkg2 != 'NA', resp.records)

xs = sorted(numpy.random.normal(0, 1, len(filtered_records)))
ys = sorted(map(lambda (y): y.wtkg2, filtered_records))
logys = sorted(map(lambda (y): math.log10(y.wtkg2), filtered_records))

myplot.Plot(xs, ys)
myplot.Show(title="Normal plot for weight")

myplot.Plot(xs, logys)
myplot.Show(title="Normal plot for log(weight)")