def main(): #cdf, place = total_percentile_rank(results) speeds = relay.GetSpeeds(results) speed = relay.ConvertPaceToSpeed('6:53') cdf = Cdf.MakeCdfFromList(speeds) print cdf.Prob(speed),'speed' print convert_speeds_to_time(speed),'time' myplot.Cdf(cdf) myplot.Show() speeds_old = GetSpeeds_M4049(results) cdf_old = Cdf.MakeCdfFromList(speeds_old) rank = cdf_old.Prob(speed) print rank,'rank', speed,'speed' print convert_speeds_to_time(speed),'time' myplot.Cdf(cdf_old) myplot.Show() speeds_5059 = GetSpeeds_M5059(results) cdf_5059 = Cdf.MakeCdfFromList(speeds_5059) future_speed = cdf_5059.Value(rank) print future_speed,'speed' print convert_speeds_to_time(future_speed),'time' myplot.Cdf(cdf_5059) myplot.Show() fspeeds = GetSpeeds_F2039(results) cdf_female = Cdf.MakeCdfFromList(fspeeds) fspeed = cdf_female.Value(rank) print fspeed,'speed' print convert_speeds_to_time(fspeed),'time' myplot.Cdf(cdf_female) myplot.Show()
def observe_data(l, name=None, show=False): cdf = pmf = None if isinstance(l, list): cdf = Cdf.MakeCdfFromList(l,name+' cdf') pmf = Pmf.MakePmfFromList(l, name+' pmf') elif isinstance(l, Pmf.Pmf): pmf = l cdf = Cdf.MakeCdfFromPmf(l) if name is None: name = pmf.name elif isinstance(l, Cdf.Cdf): cdf = l if name is None: name = cdf.name else: raise Exception('input parameter type is wrong') v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75) mean = cdf.Mean() print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \ (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median)) if show: if pmf is not None: myplot.Pmf(pmf) myplot.Show() myplot.Cdf(cdf) myplot.Show()
def Main(): truth = ReadTruth() truth_map = {} for pcode, label in truth: truth_map[pcode] = label labels = ReadLabels() photo_map, labeler_map = MakeObjects(labels) RunUpdates(photo_map, labeler_map, labels) yes = [] no = [] for pcode, photo in photo_map.iteritems(): if pcode in truth_map: mean = photo.Mean() if truth_map[pcode] == '1': yes.append(mean) else: no.append(mean) myplot.Clf() cdf_yes = thinkbayes.MakeCdfFromList(yes, name='yes') cdf_no = thinkbayes.MakeCdfFromList(no, name='no') myplot.Cdfs([cdf_yes, cdf_no]) myplot.Show() return myplot.Clf() PlotPosteriorMeans(photo_map, 'photos') PlotPosteriorMeans(labeler_map, 'labelers') myplot.Show()
def process(data): # Hist 分布图 hist = Pmf.MakeHistFromList(data, name='hist') myplot.Hist(hist, color='blue') myplot.Show() # Pmf 分布图 pmf = Pmf.MakePmfFromHist(hist, name='pmf') myplot.Pmf(pmf, color='yellow') myplot.Show() myplot.Clf() # 实际数据的CDF分布图 cdf = Cdf.MakeCdfFromList(data, name='loafs') myplot.Cdf(cdf) mu, var = thinkstats.MeanVar(data) sigma = math.sqrt(var) print("mu = %.3f, sigma = %.3f" % (mu, sigma)) # 正态分布 xs = normal_sample(len(data), mu, sigma) # xs = data ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs] myplot.Scatter(xs, ys, color='red', label='sample') myplot.Show()
def main(): import myplot pmf = Pmf.MakePmfFromList([1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 7]) remain = PmfRemainingLifeTime(pmf, age=4) myplot.Hist(pmf) myplot.Show() myplot.Hist(remain) myplot.Show()
def main(): list = [100 * random.random() for i in range(1000)] pmf = Pmf.MakePmfFromList(list, name='pfm') cdf = Cdf.MakeCdfFromList(list, name='cdf') myplot.Pmf(pmf) myplot.Show() myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def main(): resp = brfss.Respondents() resp.ReadRecords(data_dir='res') d = resp.SummarizeHeight() man_d = d[1] lady_d = d[2] # 男性的mu, var, sigma, 变异系数CV man_mu, man_var = thinkstats.TrimmedMeanVar(man_d) man_sigma = math.sqrt(man_var) man_cv = man_sigma/man_mu print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv)) # 女性的mu, var, sigma, 变异系数CV lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d) lady_sigma = math.sqrt(lady_var) lady_cv = lady_sigma/lady_mu print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv)) # 男性, 女性Hist分布 man_hist = Pmf.MakeHistFromList(man_d, name='man hist') myplot.Hist(man_hist) myplot.Show() myplot.Clf() lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist') myplot.Hist(lady_hist) myplot.Show() myplot.Clf() # 男性, 女性Pmf分布 man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf') myplot.Pmf(man_pmf) myplot.Show() myplot.Clf() lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf') myplot.Pmf(lady_pmf) myplot.Show() myplot.Clf() # 男性/女性Cdf累积分布 man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf') lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf') myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None) myplot.Show()
def Main(): # make a redditor with some trustworthiness (mean_t = 0.67) founder = Redditor(name='redditor') beta = thinkbayes.Beta(2, 1) for val, prob in beta.MakePmf().Items(): founder.Set(val * 100, prob) # make a new item with unknown quality (mean_q = 0.5) item = Item(range(0, 101), name='item') # compute the means mean_t = founder.Mean() / 100.0 mean_q = item.Mean() / 100.0 print mean_t print mean_q # perform simultaneous updates founder.Update(('up', mean_q)) item.Update(('up', mean_t)) Summarize(item) # display the posterior distributions myplot.Pmf(founder) myplot.Pmf(item) myplot.Show()
def PlotAges(resp): """Plot the distribution of ages.""" ages = [r.age for r in resp.records] cdf = Cdf.MakeCdfFromList(ages) myplot.Clf() myplot.Cdf(cdf) myplot.Show()
def ExpoErlangDemo(): num = 10 lam1 = 1 lam2 = 2 t = MakeSeries(num, lam1, num, lam2) series = Series(t) n, s1, m, s2 = series.Split(num) print n, s1, m, s2 low, high = 0.01, 5.01 lams = numpy.linspace(low, high, 101) expo = Expo(lams) expo.name = 'expo' expo.Update((n, s1)) erlang = Erlang(lams) erlang.name = 'erlang' erlang.Update((n, s1)) myplot.Pmf(expo) myplot.Pmf(erlang) myplot.Show()
def ClassSizes(): # start with the actual distribution of class sizes from the book d = { 7: 8, 12: 8, 17: 14, 22: 4, 27: 6, 32: 12, 37: 8, 42: 3, 47: 2, } # form the pmf pmf = Pmf.MakePmfFromDict(d, 'actual') print 'mean', pmf.Mean() print 'var', pmf.Var() # compute the biased pmf biased_pmf = BiasPmf(pmf, 'observed') print 'mean', biased_pmf.Mean() print 'var', biased_pmf.Var() # unbias the biased pmf unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased') print 'mean', unbiased_pmf.Mean() print 'var', unbiased_pmf.Var() # plot the Pmfs myplot.Pmfs([pmf, biased_pmf, unbiased_pmf]) myplot.Show(xlabel='Class size', ylabel='PMF')
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') # myplot.Clf() # myplot.Hist(pmf) # myplot.Save(root='observed_speeds', # title='PMF of running speed', # xlabel='speed (mph)', # ylabel='probability') # plot the biased distribution seen by the observer biased = BiasPmf(pmf, 7.5, name='observed speeds') myplot.Pmf(biased) myplot.Show(title='soln. PMF of running speed', xlabel='speed (mph)', ylabel='probability') myplot.Clf() myplot.Hist(biased) myplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') cdf = Cdf.MakeCdfFromPmf(biased) myplot.Clf() myplot.Cdf(cdf) myplot.Save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def main(): # Exercise 3.6 myBirthWeight = 163 table = survey.Pregnancies() table.ReadRecords() unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1] liveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights, name="live birth weights") print("My birth weight percentile rank (vs all births): %d" % (100 * liveBirthWeightsCdf.Prob(myBirthWeight))) unfilteredNotFirstLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz) for p in table.records if p.outcome == 1 and p.birthord != 1 ] notFirstLiveBirthWeights = [ lbs * 16 + oz for lbs, oz in unfilteredNotFirstLiveBirthWeights if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200 ] notFirstLiveBirthWeightsCdf = Cdf.MakeCdfFromList( notFirstLiveBirthWeights, name="not first live birth weights") print("My birth weight percentile rank (vs first births): %d" % (100 * notFirstLiveBirthWeightsCdf.Prob(myBirthWeight))) myplot.Cdf(notFirstLiveBirthWeightsCdf) myplot.Show(title="not first live birth weight CDF", xlabel="birth weight oz", ylabel="probability")
def Summarize(data_dir): """Prints summary statistics for first babies and others. Returns: tuple of Tables """ table, firsts, others = MakeTables(data_dir) ProcessTables(firsts, others) print("Number of first babies", firsts.n) print("Number of others", others.n) mu1, mu2 = firsts.mu, others.mu print("Mean gestation in weeks:") print("First babies", mu1) print("Others", mu2) print("Difference in days", (mu1 - mu2) * 7.0) var = thinkstats.Var(firsts.lengths) sd = numpy.sqrt(var) print("Firsts sd = ", sd) var = thinkstats.Var(others.lengths) sd = numpy.sqrt(var) print("Others sd = ", sd) histfirst = Pmf.MakeHistFromList(firsts.lengths) histother = Pmf.MakeHistFromList(others.lengths) myplot.Hists([histfirst, histother]) myplot.Show()
def main(): results = ReadResults() speeds = GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'speeds') myplot.Pmf(pmf) myplot.Show(title='PMF of running speed', xlabel='speed (mph)', ylabel='probability') import Cdf cdf = Cdf.MakeCdfFromList(speeds, 'speeds') myplot.Cdf(cdf) myplot.Show() myplot.Cdfs(cdf) myplot.Show()
def ScatterPlot(self, root, heights, weights, alpha=1.0): pyplot.scatter(heights, weights, alpha=alpha, edgecolors='none') # myplot.Save(root=root, myplot.Show( xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def main(): sz, alph, exem = 1000, 1.7, 100 lst = paretovariate(sz, alph, exem) lst_cdf = Cdf.MakeCdfFromList(lst) myplot.Clf() myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log') myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
def main(): all_recs = cyb_records.Stats() all_recs.ReadRecords() print 'Number of total stats', len(all_recs.records) cdf = CdfPerDay(all_recs.records) myplot.Cdfs(cdf) myplot.Show(title="CDF: daily usage of machines at the YMCA", xlabel = 'Distance (in m / day)', ylabel = 'Percentile')
def main(): babies = BabyBoom.Babies() babies.ReadRecords(data_dir='res', n=None) lastmin = 0 interval = [] for item in babies.records: interval.append(item.minutes - lastmin) lastmin = item.minutes cdf = Cdf.MakeCdfFromList(interval, name='baby interval') myplot.Cdf(cdf, complement=False, transform=None) myplot.Show() # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X) myplot.Clf() myplot.Cdf(cdf, complement=True, yscale='log') myplot.Show()
def HexBin(self, root, heights, weights, cmap=matplotlib.cm.Blues): pyplot.hexbin(heights, weights, cmap=cmap) # myplot.Save(root=root, myplot.Show( xlabel='Height (cm)', ylabel='Weight (kg)', axis=[140, 210, 20, 200], legend=False)
def main(): hypos = xrange(100, 1001) suite = Train(hypos) suite.Update(321) print suite.Mean() myplot.Pmf(suite) myplot.Show()
def CheckCdf2(): """Compare chi2 values from the simulation with a chi-squared dist.""" df = 3 t = [SimulateChi2() for i in range(1000)] t2 = [scipy.stats.chi2.cdf(x, df) for x in t] cdf = Cdf.MakeCdfFromList(t2) myplot.Cdf(cdf) myplot.Show()
def create_error_pmf(): all_events = cyb_records.Events() all_events.ReadRecords() all_records = all_events.records my_pmfs = Pmf_errors(all_records) myplot.Pmfs(my_pmfs) myplot.Show(title="PDF of different types of errors Per Machine", xlabel='Error Codes', ylabel='Probability')
def main(): all_recs = cyb_records.Stats() all_recs.ReadRecords() print 'Number of total stats', len(all_recs.records) cdf = CdfPerMachine(all_recs.records) myplot.Cdfs(cdf) myplot.Show(title="CDF of cardio machine average distances", xlabel='Average Distances', ylabel='Probability')
def CheckCdf(): """ """ xs, ys = Chi2Cdf(df=3, high=15) pyplot.plot(xs, ys) t = [SimulateChi2() for i in range(1000)] cdf = Cdf.MakeCdfFromList(t) myplot.Cdf(cdf) myplot.Show()
def Q2(results): results.sort() # print(results) cdf = Cdf.MakeCdfFromList(results, name='cdf') myplot.Cdf(cdf) myplot.Show() p = [0.95, 0.99] for i in p: significant_value = cdf.Value(i) print("p:%4.2f significant_value = %d" % (i, significant_value)) pass
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) # plot the distribution of actual speeds cdf = Cdf.MakeCdfFromList(speeds, 'speeds') myplot.Cdf(cdf) myplot.Show(title='CDF of running speed', xlabel='speed (mph)', ylabel='cumulative probability')
def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) pmf = Pmf.MakePmfFromList(speeds, 'actual speeds') observed = BiasPmf(pmf, 7.5, 'observed speeds') myplot.Clf() myplot.Hist(observed) myplot.Show(title='observed speeds', xlabel='speed (mph)', ylabel='probability')
def PlotPMF(records, machine_filter=[]): pmfs = [] errors = GetErrorsPerWeek(records, machine_filter) for key in errors.keys(): if SumErrors(errors.get(key)) > 10: pmf = Pmf.MakeHistFromDict(errors.get(key), key) pmfs.append(pmf) myplot.Pmfs(pmfs) myplot.Show(title="Histogram: Error Rate per Week", xlabel='Date', ylabel='Errors per week')
def main(): data_dir = '../chap1/' preg = survey.Pregnancies() preg.ReadRecords(data_dir) cdf = weight_cdf(preg) myplot.Cdf(cdf) myplot.show() sample = Sample(cdf, 1000) cdf_sample = Cdf.MakeCdfFromList(sample) myplot.Cdf(cdf_sample) myplot.Show()