def Process(table, name): descriptive.Process(table, name) table.weights = [ p.totalwgt_oz for p in table.records if p.totalwgt_oz != 'NA' ] table.weight_pmf = Pmf.MakePmfFromList(table.weights, table.name) table.weight_cdf = Cdf.MakeCdfFromList(table.weights, table.name) table.lengths_pmf = Pmf.MakePmfFromList(table.lengths, table.name) table.legnths_cdf = Cdf.MakeCdfFromList(table.lengths, table.name)
def ex6_14(n, do_log=False, ns=1000): exp = randvar.Exponential(1) def prod_log(t): p = 1 for x in t: p = p * x if do_log == True: return math.log(p) else: return p s = [prod_log([exp.generate() for x in range(n)]) for i in range(ns)] mu, var = thinkstats.MeanVar(s) print n, mu, var cdf = Cdf.MakeCdfFromList(s) myplot.Clf() myplot.Cdf(cdf) myplot.Save('clt14' + str(do_log) + str(n)) rankit.MakeNormalPlot(s, 'clt14npp' + str(do_log) + str(n)) myplot.Close()
def main(): results = ReadResults() print '# of samples:', len(results) results = CondFilter(results) print '# of samples(age=30s):', len(results) h_speed, h_rank, h_rank_age = FindProfHeo(results) speeds = GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'speeds') cdf = Cdf.MakeCdfFromList(speeds, 'speeds') if h_speed: print '허준영s speed is ', h_speed print 'His percentile rank is ', cdf.Prob(h_speed) * 100 myplot.Clf() myplot.Pmf(pmf) myplot.Save(root='mbc_marathon_pmf', title='PMF of running speed', xlabel='speed (kmh)', ylabel='probability') myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='mbc_marathon_cdf', title='CDF of running speed', xlabel='speed (kmh)', ylabel='probability') myplot.Close()
def PValue(model1, model2, n, m, delta, iters=1000): deltas = [Resample(model1, model2, n, m) for i in range(iters)] cdf = Cdf.MakeCdfFromList(deltas) # compute the two tail probabilities left = cdf.Prob(-delta) right = 1.0 - cdf.Prob(delta) pvalue = left + right return cdf, pvalue
def poincare_bread(n=4, nsamples=365): poincare = [ max([random.normalvariate(950, 50) for x in range(n)]) for y in range(nsamples) ] mu, var = ts.MeanVar(poincare) sd = math.sqrt(var) cdf = Cdf.MakeCdfFromList(poincare) cdf.name = 'poincare' return cdf, mu, sd
def ex6_12(n, ns=1000): exp = randvar.Exponential(1) s = [sum([exp.generate() for x in range(n)]) / float(n) for i in range(ns)] mu, var = thinkstats.MeanVar(s) print n, mu, var cdf = Cdf.MakeCdfFromList(s) myplot.Clf() myplot.Cdf(cdf) myplot.Save('clt' + str(n)) myplot.Close()
def ex6_13(n, ns=1000): exp = randvar.Exponential(1) s = [sum([exp.generate() for x in range(n)]) for i in range(ns)] mu, var = thinkstats.MeanVar(s) print n, mu, var cdf = Cdf.MakeCdfFromList(s) myplot.Clf() myplot.Cdf(cdf) myplot.Save('clt13' + str(n)) rankit.MakeNormalPlot(s, 'clt13npp' + str(n)) myplot.Close()
26 , 23 , 14 , 101 , 125 , 0 , 53 , 10 , 56 , 165 , 46 , 5 , 0 , 38 , 29 , 4 , 30 , 89 , 14 , ] cdf = Cdf.MakeCdfFromList(birthday_arrival) print 'Mean:', cdf.Mean() myplot.Clf() myplot.Cdf(cdf) myplot.Save('birthday_cdf') myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Save('birthday_ccdf', yscale='log') myplot.Close()
from thinkstats import Cdf from thinkstats import myplot import random samples = [random.expovariate(2) for x in range(80)] cdf = Cdf.MakeCdfFromList(samples) myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Show(yscale='log')
from thinkstats import Cdf from thinkstats import myplot t = [2, 1, 3, 2, 5] cdf = Cdf.MakeCdfFromList(t) cdf.name = 'Sample' print 'cdf.Prob(2):', cdf.Prob(2) print 'cdf.Prob(3):', cdf.Prob(3) print 'cdf.Prob(6):', cdf.Prob(6) print 'cdf.Value(0.5):', cdf.Value(0.5) print 'cdf.Value(0.7):', cdf.Value(0.7) myplot.Clf() myplot.Cdf(cdf) myplot.Show(title='CDF', xlabel='x', ylabel='CDF(x)', axis=[0, 6, 0, 1])
from thinkstats import Cdf from thinkstats import myplot word_count = {} fp = open('thinkstats.txt', 'rt') text = fp.read() words = text.split() print len(words) for word in words: word_count[word] = word_count.get(word, 0) + 1 cdf = Cdf.MakeCdfFromList(word_count.values()) print 'Median:', cdf.Value(0.5) print 'Mean:', cdf.Mean() print 'Mode:', cdf.Value(1.0) myplot.Clf() myplot.Cdf(cdf, complement=True) myplot.Show(xscale='log', yscale='log')
from thinkstats import myplot def poincare_bread(n=4, nsamples=365): poincare = [ max([random.normalvariate(950, 50) for x in range(n)]) for y in range(nsamples) ] mu, var = ts.MeanVar(poincare) sd = math.sqrt(var) cdf = Cdf.MakeCdfFromList(poincare) cdf.name = 'poincare' return cdf, mu, sd if __name__ == '__main__': nsamples = 365 cdf_p, mu, sd = poincare_bread(4, nsamples) print mu, sd normal = [random.normalvariate(mu, sd) for y in range(nsamples)] cdf_n = Cdf.MakeCdfFromList(normal) cdf_n.name = 'normal' myplot.Clf() myplot.Cdfs([cdf_p, cdf_n]) myplot.Show() myplot.Close()