Ejemplo n.º 1
0
def main():
    #cdf, place = total_percentile_rank(results)
    speeds = relay.GetSpeeds(results)
    speed = relay.ConvertPaceToSpeed('6:53')
    cdf = Cdf.MakeCdfFromList(speeds)
    print cdf.Prob(speed),'speed'
    print convert_speeds_to_time(speed),'time'
    myplot.Cdf(cdf)
    myplot.Show()
    
    speeds_old = GetSpeeds_M4049(results)
    cdf_old = Cdf.MakeCdfFromList(speeds_old)
    rank = cdf_old.Prob(speed)
    print rank,'rank', speed,'speed'
    print convert_speeds_to_time(speed),'time'
    myplot.Cdf(cdf_old)
    myplot.Show()
    
    speeds_5059 = GetSpeeds_M5059(results)
    cdf_5059 = Cdf.MakeCdfFromList(speeds_5059)
    future_speed = cdf_5059.Value(rank)
    print future_speed,'speed'
    print convert_speeds_to_time(future_speed),'time'
    myplot.Cdf(cdf_5059)
    myplot.Show()
   
    fspeeds = GetSpeeds_F2039(results)
    cdf_female = Cdf.MakeCdfFromList(fspeeds)
    fspeed = cdf_female.Value(rank)
    print fspeed,'speed'
    print convert_speeds_to_time(fspeed),'time'
    myplot.Cdf(cdf_female)
    myplot.Show()
Ejemplo n.º 2
0
def observe_data(l, name=None, show=False):	
	cdf = pmf = None
	if isinstance(l, list):
		cdf = Cdf.MakeCdfFromList(l,name+' cdf')
		pmf = Pmf.MakePmfFromList(l, name+' pmf')
	elif isinstance(l, Pmf.Pmf):
		pmf = l
		cdf = Cdf.MakeCdfFromPmf(l)
		if name is None: name = pmf.name 
	elif isinstance(l, Cdf.Cdf):
		cdf = l
		if name is None: name = cdf.name 
	else:
		raise Exception('input parameter type is wrong')

	v_25, median, v_75 = cdf.Percentile(25), cdf.Percentile(50), cdf.Percentile(75)
	mean = cdf.Mean()
	print('%s: 1/4:%4.2f(%4.2f), 1/2:%4.2f(mean-median:%4.2f), mean:%4.2f, 3/4:%4.2f(%4.2f)' % \
	      (name, v_25, median-v_25, median, mean-median, mean, v_75,v_75-median))
	
	if show:
		if pmf is not None:
			myplot.Pmf(pmf)	
			myplot.Show()
		myplot.Cdf(cdf)
		myplot.Show()
Ejemplo n.º 3
0
def Main():
    truth = ReadTruth()
    truth_map = {}
    for pcode, label in truth:
        truth_map[pcode] = label

    labels = ReadLabels()
    photo_map, labeler_map = MakeObjects(labels)

    RunUpdates(photo_map, labeler_map, labels)

    yes = []
    no = []
    for pcode, photo in photo_map.iteritems():
        if pcode in truth_map:
            mean = photo.Mean()

            if truth_map[pcode] == '1':
                yes.append(mean)
            else:
                no.append(mean)

    myplot.Clf()
    cdf_yes = thinkbayes.MakeCdfFromList(yes, name='yes')
    cdf_no = thinkbayes.MakeCdfFromList(no, name='no')
    myplot.Cdfs([cdf_yes, cdf_no])
    myplot.Show()

    return

    myplot.Clf()
    PlotPosteriorMeans(photo_map, 'photos')
    PlotPosteriorMeans(labeler_map, 'labelers')
    myplot.Show()
Ejemplo n.º 4
0
def process(data):
    # Hist 分布图
    hist = Pmf.MakeHistFromList(data, name='hist')
    myplot.Hist(hist, color='blue')
    myplot.Show()

    # Pmf 分布图
    pmf = Pmf.MakePmfFromHist(hist, name='pmf')
    myplot.Pmf(pmf, color='yellow')
    myplot.Show()

    myplot.Clf()

    # 实际数据的CDF分布图
    cdf = Cdf.MakeCdfFromList(data, name='loafs')
    myplot.Cdf(cdf)

    mu, var = thinkstats.MeanVar(data)
    sigma = math.sqrt(var)
    print("mu = %.3f, sigma = %.3f" % (mu, sigma))

    # 正态分布
    xs = normal_sample(len(data), mu, sigma)  # xs = data
    ys = [erf.NormalCdf(x, mu=mu, sigma=sigma) for x in xs]
    myplot.Scatter(xs, ys, color='red', label='sample')
    myplot.Show()
Ejemplo n.º 5
0
def main():
    import myplot
    pmf = Pmf.MakePmfFromList([1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 7])
    remain = PmfRemainingLifeTime(pmf, age=4)
    myplot.Hist(pmf)
    myplot.Show()

    myplot.Hist(remain)
    myplot.Show()
Ejemplo n.º 6
0
def main():
    list = [100 * random.random() for i in range(1000)]
    pmf = Pmf.MakePmfFromList(list, name='pfm')
    cdf = Cdf.MakeCdfFromList(list, name='cdf')
    myplot.Pmf(pmf)
    myplot.Show()
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
Ejemplo n.º 7
0
def main():
    resp = brfss.Respondents()
    resp.ReadRecords(data_dir='res')
    d = resp.SummarizeHeight()

    man_d = d[1]
    lady_d = d[2]

    # 男性的mu, var, sigma, 变异系数CV
    man_mu, man_var = thinkstats.TrimmedMeanVar(man_d)
    man_sigma = math.sqrt(man_var)
    man_cv = man_sigma/man_mu
    print("man: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (man_mu, man_var, man_sigma, man_cv))

    # 女性的mu, var, sigma, 变异系数CV
    lady_mu, lady_var = thinkstats.TrimmedMeanVar(lady_d)
    lady_sigma = math.sqrt(lady_var)
    lady_cv = lady_sigma/lady_mu
    print("lady: mu = %.3f, var = %.3f, sigma = %.3f, cv = %.3f" % (lady_mu, lady_var, lady_sigma, lady_cv))

    # 男性, 女性Hist分布
    man_hist = Pmf.MakeHistFromList(man_d, name='man hist')
    myplot.Hist(man_hist)
    myplot.Show()

    myplot.Clf()

    lady_hist = Pmf.MakeHistFromList(lady_d, name='lady hist')
    myplot.Hist(lady_hist)
    myplot.Show()

    myplot.Clf()

    # 男性, 女性Pmf分布
    man_pmf = Pmf.MakePmfFromHist(man_hist, name='man pmf')
    myplot.Pmf(man_pmf)
    myplot.Show()

    myplot.Clf()

    lady_pmf = Pmf.MakePmfFromHist(lady_hist, name='lady pmf')
    myplot.Pmf(lady_pmf)
    myplot.Show()

    myplot.Clf()

    # 男性/女性Cdf累积分布
    man_cdf = Cdf.MakeCdfFromPmf(man_pmf, name='man cdf')
    lady_cdf = Cdf.MakeCdfFromPmf(lady_pmf, name='lady cdf')
    myplot.Cdfs((man_cdf, lady_cdf), complement=False, transform=None)
    myplot.Show()
Ejemplo n.º 8
0
def Main():
    # make a redditor with some trustworthiness (mean_t = 0.67)
    founder = Redditor(name='redditor')
    beta = thinkbayes.Beta(2, 1)
    for val, prob in beta.MakePmf().Items():
        founder.Set(val * 100, prob)

    # make a new item with unknown quality (mean_q = 0.5)
    item = Item(range(0, 101), name='item')

    # compute the means
    mean_t = founder.Mean() / 100.0
    mean_q = item.Mean() / 100.0

    print mean_t
    print mean_q

    # perform simultaneous updates
    founder.Update(('up', mean_q))
    item.Update(('up', mean_t))

    Summarize(item)

    # display the posterior distributions
    myplot.Pmf(founder)
    myplot.Pmf(item)
    myplot.Show()
def PlotAges(resp):
    """Plot the distribution of ages."""
    ages = [r.age for r in resp.records]
    cdf = Cdf.MakeCdfFromList(ages)
    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Show()
Ejemplo n.º 10
0
def ExpoErlangDemo():
    num = 10

    lam1 = 1
    lam2 = 2
    t = MakeSeries(num, lam1, num, lam2)
    series = Series(t)
    n, s1, m, s2 = series.Split(num)

    print n, s1, m, s2

    low, high = 0.01, 5.01
    lams = numpy.linspace(low, high, 101)

    expo = Expo(lams)
    expo.name = 'expo'
    expo.Update((n, s1))

    erlang = Erlang(lams)
    erlang.name = 'erlang'
    erlang.Update((n, s1))

    myplot.Pmf(expo)
    myplot.Pmf(erlang)
    myplot.Show()
Ejemplo n.º 11
0
def ClassSizes():

    # start with the actual distribution of class sizes from the book
    d = {
        7: 8,
        12: 8,
        17: 14,
        22: 4,
        27: 6,
        32: 12,
        37: 8,
        42: 3,
        47: 2,
    }

    # form the pmf
    pmf = Pmf.MakePmfFromDict(d, 'actual')
    print 'mean', pmf.Mean()
    print 'var', pmf.Var()

    # compute the biased pmf
    biased_pmf = BiasPmf(pmf, 'observed')
    print 'mean', biased_pmf.Mean()
    print 'var', biased_pmf.Var()

    # unbias the biased pmf
    unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased')
    print 'mean', unbiased_pmf.Mean()
    print 'var', unbiased_pmf.Var()

    # plot the Pmfs
    myplot.Pmfs([pmf, biased_pmf, unbiased_pmf])
    myplot.Show(xlabel='Class size', ylabel='PMF')
Ejemplo n.º 12
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    # myplot.Clf()
    # myplot.Hist(pmf)
    # myplot.Save(root='observed_speeds',
    #             title='PMF of running speed',
    #             xlabel='speed (mph)',
    #             ylabel='probability')

    # plot the biased distribution seen by the observer
    biased = BiasPmf(pmf, 7.5, name='observed speeds')
    myplot.Pmf(biased)
    myplot.Show(title='soln. PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')
    myplot.Clf()
    myplot.Hist(biased)
    myplot.Save(root='observed_speeds',
                title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')

    cdf = Cdf.MakeCdfFromPmf(biased)

    myplot.Clf()
    myplot.Cdf(cdf)
    myplot.Save(root='observed_speeds_cdf',
                title='CDF of running speed',
                xlabel='speed (mph)',
                ylabel='cumulative probability')
Ejemplo n.º 13
0
def main():
    # Exercise 3.6
    myBirthWeight = 163
    table = survey.Pregnancies()
    table.ReadRecords()

    unfilteredLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                  for p in table.records if p.outcome == 1]
    liveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    liveBirthWeightsCdf = Cdf.MakeCdfFromList(liveBirthWeights,
                                              name="live birth weights")
    print("My birth weight percentile rank (vs all births): %d" %
          (100 * liveBirthWeightsCdf.Prob(myBirthWeight)))

    unfilteredNotFirstLiveBirthWeights = [(p.birthwgt_lb, p.birthwgt_oz)
                                          for p in table.records
                                          if p.outcome == 1 and p.birthord != 1
                                          ]
    notFirstLiveBirthWeights = [
        lbs * 16 + oz for lbs, oz in unfilteredNotFirstLiveBirthWeights
        if type(lbs) == int and type(oz) == int and lbs * 16 + oz <= 200
    ]
    notFirstLiveBirthWeightsCdf = Cdf.MakeCdfFromList(
        notFirstLiveBirthWeights, name="not first live birth weights")
    print("My birth weight percentile rank (vs first births): %d" %
          (100 * notFirstLiveBirthWeightsCdf.Prob(myBirthWeight)))

    myplot.Cdf(notFirstLiveBirthWeightsCdf)
    myplot.Show(title="not first live birth weight CDF",
                xlabel="birth weight oz",
                ylabel="probability")
Ejemplo n.º 14
0
def Summarize(data_dir):
    """Prints summary statistics for first babies and others.
    
    Returns:
        tuple of Tables
    """
    table, firsts, others = MakeTables(data_dir)
    ProcessTables(firsts, others)

    print("Number of first babies", firsts.n)
    print("Number of others", others.n)

    mu1, mu2 = firsts.mu, others.mu

    print("Mean gestation in weeks:")
    print("First babies", mu1)
    print("Others", mu2)

    print("Difference in days", (mu1 - mu2) * 7.0)

    var = thinkstats.Var(firsts.lengths)
    sd = numpy.sqrt(var)
    print("Firsts sd = ", sd)

    var = thinkstats.Var(others.lengths)
    sd = numpy.sqrt(var)
    print("Others sd = ", sd)

    histfirst = Pmf.MakeHistFromList(firsts.lengths)
    histother = Pmf.MakeHistFromList(others.lengths)

    myplot.Hists([histfirst, histother])
    myplot.Show()
Ejemplo n.º 15
0
def main():
    results = ReadResults()
    speeds = GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'speeds')
    myplot.Pmf(pmf)
    myplot.Show(title='PMF of running speed',
                xlabel='speed (mph)',
                ylabel='probability')
    import Cdf

    cdf = Cdf.MakeCdfFromList(speeds, 'speeds')
    myplot.Cdf(cdf)
    myplot.Show()

    myplot.Cdfs(cdf)
    myplot.Show()
Ejemplo n.º 16
0
 def ScatterPlot(self, root, heights, weights, alpha=1.0):
     pyplot.scatter(heights, weights, alpha=alpha, edgecolors='none')
     #  myplot.Save(root=root,
     myplot.Show(
                 xlabel='Height (cm)',
                 ylabel='Weight (kg)',
                 axis=[140, 210, 20, 200],
                 legend=False)
Ejemplo n.º 17
0
def main():
    sz, alph, exem = 1000, 1.7, 100
    lst = paretovariate(sz, alph, exem)
    lst_cdf = Cdf.MakeCdfFromList(lst)

    myplot.Clf()
    myplot.Cdf(lst_cdf, complement=True, xscale='log', yscale='log')
    myplot.Show(title='CCDF of {0} random paretovariates'.format(sz))
Ejemplo n.º 18
0
def main():
	all_recs = cyb_records.Stats()
	all_recs.ReadRecords()
	print 'Number of total stats', len(all_recs.records)

	cdf = CdfPerDay(all_recs.records)
	myplot.Cdfs(cdf)
	myplot.Show(title="CDF: daily usage of machines at the YMCA", xlabel = 'Distance (in m / day)', ylabel = 'Percentile')
Ejemplo n.º 19
0
def main():
    babies = BabyBoom.Babies()
    babies.ReadRecords(data_dir='res', n=None)
    lastmin = 0
    interval = []
    for item in babies.records:
        interval.append(item.minutes - lastmin)
        lastmin = item.minutes

    cdf = Cdf.MakeCdfFromList(interval, name='baby interval')
    myplot.Cdf(cdf, complement=False, transform=None)
    myplot.Show()

    # y轴取log(CCDF) : CCDF(X) = 1 - CDF(X)
    myplot.Clf()
    myplot.Cdf(cdf, complement=True, yscale='log')
    myplot.Show()
Ejemplo n.º 20
0
 def HexBin(self, root, heights, weights, cmap=matplotlib.cm.Blues):
     pyplot.hexbin(heights, weights, cmap=cmap)
     #  myplot.Save(root=root,
     myplot.Show(
                 xlabel='Height (cm)',
                 ylabel='Weight (kg)',
                 axis=[140, 210, 20, 200],
                 legend=False)
Ejemplo n.º 21
0
def main():
    hypos = xrange(100, 1001)
    suite = Train(hypos)

    suite.Update(321)
    print suite.Mean()

    myplot.Pmf(suite)
    myplot.Show()
Ejemplo n.º 22
0
def CheckCdf2():
    """Compare chi2 values from the simulation with a chi-squared dist."""
    df = 3
    t = [SimulateChi2() for i in range(1000)]
    t2 = [scipy.stats.chi2.cdf(x, df) for x in t]
    cdf = Cdf.MakeCdfFromList(t2)

    myplot.Cdf(cdf)
    myplot.Show()
Ejemplo n.º 23
0
def create_error_pmf():
    all_events = cyb_records.Events()
    all_events.ReadRecords()
    all_records = all_events.records
    my_pmfs = Pmf_errors(all_records)
    myplot.Pmfs(my_pmfs)
    myplot.Show(title="PDF of different types of errors Per Machine",
                xlabel='Error Codes',
                ylabel='Probability')
Ejemplo n.º 24
0
def main():
    all_recs = cyb_records.Stats()
    all_recs.ReadRecords()
    print 'Number of total stats', len(all_recs.records)

    cdf = CdfPerMachine(all_recs.records)
    myplot.Cdfs(cdf)
    myplot.Show(title="CDF of cardio machine average distances",
                xlabel='Average Distances',
                ylabel='Probability')
Ejemplo n.º 25
0
def CheckCdf():
    """
    """
    xs, ys = Chi2Cdf(df=3, high=15)
    pyplot.plot(xs, ys)

    t = [SimulateChi2() for i in range(1000)]
    cdf = Cdf.MakeCdfFromList(t)

    myplot.Cdf(cdf)
    myplot.Show()
Ejemplo n.º 26
0
def Q2(results):
    results.sort()
    #  print(results)
    cdf = Cdf.MakeCdfFromList(results, name='cdf')
    myplot.Cdf(cdf)
    myplot.Show()
    p = [0.95, 0.99]
    for i in p:
        significant_value = cdf.Value(i)
        print("p:%4.2f significant_value = %d" % (i, significant_value))
    pass
Ejemplo n.º 27
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)

    # plot the distribution of actual speeds
    cdf = Cdf.MakeCdfFromList(speeds, 'speeds')

    myplot.Cdf(cdf)
    myplot.Show(title='CDF of running speed',
                xlabel='speed (mph)',
                ylabel='cumulative probability')
Ejemplo n.º 28
0
def main():
    results = relay.ReadResults()
    speeds = relay.GetSpeeds(results)
    pmf = Pmf.MakePmfFromList(speeds, 'actual speeds')

    observed = BiasPmf(pmf, 7.5, 'observed speeds')
    myplot.Clf()
    myplot.Hist(observed)
    myplot.Show(title='observed speeds',
                xlabel='speed (mph)',
                ylabel='probability')
Ejemplo n.º 29
0
def PlotPMF(records, machine_filter=[]):
    pmfs = []

    errors = GetErrorsPerWeek(records, machine_filter)
    for key in errors.keys():
        if SumErrors(errors.get(key)) > 10:
            pmf = Pmf.MakeHistFromDict(errors.get(key), key)
            pmfs.append(pmf)
    myplot.Pmfs(pmfs)
    myplot.Show(title="Histogram: Error Rate per Week",
                xlabel='Date',
                ylabel='Errors per week')
Ejemplo n.º 30
0
def main():
    data_dir = '../chap1/'
    preg = survey.Pregnancies()
    preg.ReadRecords(data_dir)
    cdf = weight_cdf(preg)
    myplot.Cdf(cdf)
    myplot.show()

    sample = Sample(cdf, 1000)
    cdf_sample = Cdf.MakeCdfFromList(sample)
    myplot.Cdf(cdf_sample)
    myplot.Show()