def Summarize(data_dir): """Prints summary statistics for first babies and others. Returns: tuple of Tables """ table, firsts, others = MakeTables(data_dir) ProcessTables(firsts, others) print("Number of first babies", firsts.n) print("Number of others", others.n) mu1, mu2 = firsts.mu, others.mu print("Mean gestation in weeks:") print("First babies", mu1) print("Others", mu2) print("Difference in days", (mu1 - mu2) * 7.0) var = thinkstats.Var(firsts.lengths) sd = numpy.sqrt(var) print("Firsts sd = ", sd) var = thinkstats.Var(others.lengths) sd = numpy.sqrt(var) print("Others sd = ", sd) histfirst = Pmf.MakeHistFromList(firsts.lengths) histother = Pmf.MakeHistFromList(others.lengths) myplot.Hists([histfirst, histother]) myplot.Show()
def testVar(self): t = [1, 1, 1, 3, 3, 591] mu = thinkstats.Mean(t) var1 = thinkstats.Var(t) var2 = thinkstats.Var(t, mu) print print 'Pumpkins' print 'mean', mu print 'var', var1 print 'var', var2 self.assertAlmostEquals(mu, 100.0) self.assertAlmostEquals(var1, 48217.0) self.assertAlmostEquals(var2, 48217.0)
def testCov(self): xs = [1, 2, 3] ys = [3, 4, 5] cov = correlation.Cov(xs, ys) self.assertAlmostEquals(cov, 0.666666666) var = thinkstats.Var(xs) cov = correlation.Cov(xs, xs) self.assertAlmostEquals(var, cov)
def pumpkin(weights): """ Given an iterable of pumpkin weights, compute the sequence's mean, variance, and standard deviation. """ mean = thinkstats.Mean(weights) variance = thinkstats.Var(weights, mean) stddev = std_dev(weights, mean, variance) return mean, variance, stddev
def Process(table, name): ''' 对数据表进行各种分析处理 ''' practice001.Process(table) table.name = name # 计算方差:根据序列、均值 table.var = thinkstats.Var(table.lengths, table.mu) # 计算切尾均值:根据序列 table.trim = thinkstats.TrimmedMean(table.lengths) table.hist = Pmf.MakeHistFromList(table.lengths, name=name) table.pmf = Pmf.MakePmfFromList(table.lengths, name=name)
def testMeanAndVar(self): t = [1, 2, 2, 3, 5] mu = thinkstats.Mean(t) var = thinkstats.Var(t, mu) pmf = Pmf.MakePmfFromList(t) mu2 = pmf.Mean() var2 = pmf.Var() var3 = pmf.Var(mu2) self.assertAlmostEquals(mu, mu2) self.assertAlmostEquals(var, var2) self.assertAlmostEquals(var, var3)
import thinkstats import math pumpkin_weights = [1, 1, 1, 3, 3, 591] print('Mean of the pumpkin is:', thinkstats.Mean(pumpkin_weights)) print('Variance of the pumpkin is:', thinkstats.Var(pumpkin_weights)) print('Standard Deviation of the pumpkin is:', math.sqrt(thinkstats.Var(pumpkin_weights)))
def variance(data): prglengths = map(lambda x: x.prglength, data) return thinkstats.Var(prglengths)
def Pumpkin(): pumpkins = [1, 1, 1, 3, 3, 591] pumpkins = [1, 1, 1, 3, 3, 2] print "Mean:", thinkstats.Mean(pumpkins), "lbs" print "Variance:", thinkstats.Var(pumpkins) print "SD:", math.sqrt(thinkstats.Var(pumpkins)), "lbs"
def size_variance(self): return thinkstats.Var(self.sizes())
def Pumpkin(): weights = [p.weight for p in pumpkins] mu, var = thinkstats.MeanVar(weights) s = math.sqrt(var) return (mu, var, s) print 'mean, variance, standard deviation', Pumpkin() # ex 2.2 all, firsts, others = first.MakeTables() first.ProcessTables(firsts, others) firsts.s = math.sqrt(thinkstats.Var(firsts.lengths, firsts.mu)) others.s = math.sqrt(thinkstats.Var(others.lengths, others.mu)) print 'first babies mean:', firsts.mu, 'standard deviation:', firsts.s print 'others mean:', others.mu, 'standard deviation:', others.s print 'mean difference (hours)', (firsts.mu - others.mu) * 7 * 24 print 'spread difference (hours)', (firsts.s - others.s) * 7 * 24 # Distributions firstsPrgLengthFreqDict = {} for length in firsts.lengths: firstsPrgLengthFreqDict[length] = firstsPrgLengthFreqDict.get(length, 0) + 1 print firstsPrgLengthFreqDict
def std_dev(seq, mean=None, var=None): if not var: var = thinkstats.Var(seq, mean) return math.sqrt(var)
import thinkstats import math import survey def pumpkin(): pumpkins = [1, 1, 1, 3, 3, 591] # pumpkin weights mean, variance = thinkstats.MeanVar(pumpkins) standardDeviation = math.sqrt(variance) return mean, variance, standardDeviation for p in pumpkin(): print(p) # prints mean, variance, and standardDeviation for pumpkin wights table = survey.Pregnancies() table.ReadRecords() firstPregLength = [p.prglength for p in table.records if p.birthord == 1 and p.outcome == 1] firstPregDeviation = math.sqrt(thinkstats.Var(firstPregLength)) followingPregLength = [p.prglength for p in table.records if p.birthord != 1 and p.outcome == 1] followingPregDeviation = math.sqrt(thinkstats.Var(followingPregLength)) print (firstPregDeviation, followingPregDeviation)
__author__ = 'Maxiee' # -*- coding: UTF-8 -*- import thinkstats import math import Pmf import matplotlib.pyplot as pyplot pumpkin_weight = [1, 1, 1, 3, 3, 8] print "均值:", thinkstats.Mean(pumpkin_weight) print "方差:", thinkstats.Var(pumpkin_weight) print "标准差:", math.sqrt(thinkstats.Var(pumpkin_weight)) hist = Pmf.MakeHistFromList(pumpkin_weight) # 返回众数 mode = sorted(hist.Values())[0] print "众数为:%d 频数为:%d" % (mode, hist.Freq(mode)) # 绘制直方图 vals, freqs = hist.Render() rectangles = pyplot.bar(vals, freqs) pyplot.show()