Esempio n. 1
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)
Esempio n. 2
0
 def MakePlot(self):
     """Plot the CDFs."""
     thinkplot.Cdf(self.pmf_y.MakeCdf())
     thinkplot.Cdf(self.prior_zb.MakeCdf())
     thinkplot.Cdf(self.post_zb.MakeCdf())
     thinkplot.Cdf(self.pmf_mean_zb.MakeCdf())
     thinkplot.Show()
Esempio n. 3
0
def MakePosteriorPlot(suite):
    """Plots the posterior marginal distributions for alpha and beta.

    suite: posterior joint distribution of location
    """
    marginal_alpha = suite.Marginal(0)
    marginal_alpha.name = 'alpha'
    marginal_beta = suite.Marginal(1)
    marginal_beta.name = 'beta'

    print('alpha CI', marginal_alpha.CredibleInterval(50))
    print('beta CI', marginal_beta.CredibleInterval(50))

    thinkplot.PrePlot(num=2)

    #thinkplot.Pmf(marginal_alpha)
    #thinkplot.Pmf(marginal_beta)
    
    thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_alpha))
    thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_beta))
    
    thinkplot.Save('paintball2',
                xlabel='Distance',
                ylabel='Prob',
                loc=4,
                formats=FORMATS)
Esempio n. 4
0
def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')
Esempio n. 5
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()
Esempio n. 6
0
def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)
Esempio n. 7
0
def generate_cdf(fb, hk):
    cdf_fb = Cdf(degrees(fb))
    cdf_hk = Cdf(degrees(hk))

    thinkplot.Cdf(cdf_fb, color='gray', label="Facebook CDF")
    thinkplot.Cdf(cdf_hk, label='RPA CDF')
    thinkplot.config(xlabel='degree', xscale='log', ylabel='CDF')

    plt.savefig('CDFGraphs_Modified.png')
Esempio n. 8
0
def generate_ccdf(fb, hk):
    cdf_fb = Cdf(degrees(fb))
    cdf_hk = Cdf(degrees(hk))

    thinkplot.Cdf(cdf_fb, label='Facebook CCDF', color='gray', complement=True)
    thinkplot.Cdf(cdf_hk, label="RPA CCDF", complement=True)
    thinkplot.config(xlabel='degree',
                     xscale='log',
                     ylabel='CCDF',
                     yscale='log')

    plt.savefig("CCDFGraphs_Modified.png")
def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')
Esempio n. 10
0
def main():
    df = ReadData()
    cdf = thinkstats2.Cdf(df['ps'])

    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.show(legend=False)
    
    print(df)
Esempio n. 11
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
 def testPmfMax(self):
     d6 = thinkstats2.Pmf(range(1, 7))
     two = d6 + d6
     three = two + d6
     cdf = three.Max(6)
     thinkplot.Cdf(cdf)
     self.assertAlmostEqual(cdf[14], 0.558230962626)
Esempio n. 13
0
    def SimulateSample(self, n=9, m=1000):
        """Plots the sampling distribution of the sample mean.

        mu: hypothetical population mean
        sigma: hypothetical population standard deviation
        n: sample size
        m: number of iterations
        """
        def VertLine(x, y=1):
            thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

        means = []
        for _ in range(m):
            xs = genextreme.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=n)
            xbar = np.mean(xs)
            means.append(xbar)

        stderr = self.RMSE(means, self.loc)
        print('Erro Padrão', stderr)

        cdf = thinkstats2.Cdf(means)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print('Intervalo de Confiança: ', ci)
        VertLine(ci[0])
        VertLine(ci[1])

        # plot the CDF
        thinkplot.Cdf(cdf)
        #thinkplot.Save(root='estimation1',
         #              xlabel='sample mean',
          #             ylabel='CDF',
           #            title='Sampling distribution')
Esempio n. 14
0
def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    means = []
    for j in range(m):
        xs = np.random.normal(mu, sigma, n)
        xbar = np.mean(xs)
        means.append(xbar)

    stderr = RMSE(means, mu)
    print('standard error', stderr)

    cdf = thinkstats2.MakeCdfFromList(means)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution')
Esempio n. 15
0
def ProcessScoresPairwise(pairs):
    """Average number of goals for each team against each opponent.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from (team1, team2) to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault((t1, t2), []).append(g1)
            goals_scored.setdefault((t2, t1), []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        if len(goals) < 3:
            continue
        lam = thinkstats.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkstats.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))

    print('BOS v VAN', pairs['BOS', 'VAN'])
Esempio n. 16
0
def ProcessScoresTeamwise(pairs):
    """Average number of goals for each team.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from team to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault(t1, []).append(g1)
            goals_scored.setdefault(t2, []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        lam = thinkbayes2.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes2.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkbayes2.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))
Esempio n. 17
0
def SimulateSample(lam=2, n=10, m=1000):
    """Sampling distribution of L as an estimator of exponential parameter.

    lam: parameter of an exponential distribution
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1 / lam, n)
        lamhat = 1 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation2',
                   xlabel='estimate',
                   ylabel='CDF',
                   title='Sampling distribution')

    return stderr
Esempio n. 18
0
def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    """Plots the sampling distribution of the sample mean.

    mu: hypothetical population mean
    sigma: hypothetical population standard deviation
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    means = []
    for _ in range(m):
        xs = np.random.normal(mu, sigma, n)
        xbar = np.mean(xs)
        means.append(xbar)

    stderr = RMSE(means, mu)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(means)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution')
Esempio n. 19
0
def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()
Esempio n. 20
0
def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')
Esempio n. 21
0
def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    
    means = []
    for j in range(m):
        xs = [random.gauss(mu, sigma) for i in range(n)]
        xbar = numpy.mean(xs)
        means.append(xbar)

    print 'rmse', RMSE(means, mu)

    cdf = thinkstats2.MakeCdfFromList(means)
    print 'confidence interval', cdf.Percentile(5), cdf.Percentile(95) 

    # estimate the PDF by KDE
    pdf = thinkstats2.EstimatedPdf(means)
    stderr = sigma / math.sqrt(n)
    vals = numpy.linspace(mu-3*stderr, mu+3*stderr, 101)
    pmf = pdf.MakePmf(vals)
    #thinkplot.Pmf(pmf)

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimate1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution'
                   )
Esempio n. 22
0
def CH5_5():
    """
    最大值操作:
    转动3个6面的骰子, 计算它们的最大值 采用下面三种方式, 对比分布图.
    
    模拟:
    枚举:
    指数计算:

    """

    d6 = Die(6)
    k = 3

    # 模拟
    N = 1000
    dists = [d6] * k
    pmf = SampleMax(dists, N)
    pmf.name = 'sim'
    thinkplot.Pmf(pmf)

    # 枚举 km^2
    pmf = PmfMax(d6, d6)
    print("pmf1.Total() = %.3f" % pmf.Total())
    pmf = PmfMax(pmf, d6)
    print("pmf2.Total() = %.3f" % pmf.Total())
    pmf.name = 'enum'
    thinkplot.Pmf(pmf)

    # CDF (指数max) TODO 不是很明白???
    cdf = d6.Max(k)
    cdf.name = "expo"
    thinkplot.Cdf(cdf)

    thinkplot.Show(xlabel='max([d6]*3)', ylabel='probablity')
Esempio n. 23
0
def ex3():
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lam = 4
    goal_totals = [SimulateGame(lam=lam) for _ in range(1000)]
    print('RMSE', RMSE(goal_totals, lam))
    hist = thinkstats2.Hist(goal_totals)
    cdf = thinkstats2.Cdf(goal_totals)
    thinkplot.PrePlot(rows=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf)
    VertLine(cdf.Percentile(5))
    VertLine(cdf.Percentile(95))
    thinkplot.SubPlot(3)

    # lambda vs. rmse
    # rmse goes up as lambda goes up
    lams = range(1, 15)
    rmses = [RMSE([SimulateGame(lam=l) for _ in range(1000)], l) for l in lams]
    thinkplot.Plot(lams, rmses)
    thinkplot.SubPlot(4)

    # m vs. rmse
    # maybe rmse very slowly goes down as m goes up?
    # not at all clear that's really the case...
    ms = np.arange(10, 1000, 10)
    rmses = [RMSE([SimulateGame() for _ in range(m)], 4) for m in ms]
    thinkplot.Plot(ms, rmses)

    thinkplot.show()
Esempio n. 24
0
def TestGte():
    """Tests the GapTimeEstimator."""
    random.seed(17)

    xs = [60, 120, 240]

    gap_times = [60, 60, 60, 60, 60, 120, 120, 120, 240, 240]

    # distribution of gap time (z)
    pdf_z = thinkbayes.EstimatedPdf(gap_times)
    pmf_z = pdf_z.MakePmf(xs, name="z")

    wtc = WaitTimeCalculator(pmf_z, inverse=False)

    lam = 0.0333
    n = 100
    passenger_data = wtc.GenerateSamplePassengers(lam, n)

    pcounts = [0, 0, 0]

    ite = GapTimeEstimator(xs, pcounts, passenger_data)

    thinkplot.Clf()

    # thinkplot.Cdf(wtc.pmf_z.MakeCdf(name="actual z"))
    thinkplot.Cdf(wtc.pmf_zb.MakeCdf(name="actual zb"))
    ite.MakePlot()
Esempio n. 25
0
def SimulateSample(lam=2, n=10, m=1000):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1.0/lam, n)
        lamhat = 1.0 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.MakeCdfFromList(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation2',
                   xlabel='estimate',
                   ylabel='CDF',
                   title='Sampling distribution')

    return stderr
Esempio n. 26
0
    def MakePlot(self, root='redline4'):
        """Makes a plot showing the mixture."""
        thinkplot.Clf()

        # plot the MetaPmf
        for pmf, prob in sorted(self.metapmf.Items()):
            cdf = pmf.MakeCdf().Scale(1.0 / 60)
            width = 2 / math.log(-math.log(prob))
            thinkplot.Plot(cdf.xs,
                           cdf.ps,
                           alpha=0.2,
                           linewidth=width,
                           color='blue',
                           label='')

        # plot the mixture and the distribution based on a point estimate
        thinkplot.PrePlot(2)
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
        thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0 / 60))

        thinkplot.Save(root=root,
                       xlabel='Wait time (min)',
                       ylabel='CDF',
                       formats=FORMATS,
                       axis=[0, 10, 0, 1])
Esempio n. 27
0
def MakeNormalModel(arrivalDelays):
    """Plot the CDF of arrival delays with a normal model.
       This is a modified copy from analytic.py
    """

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(arrivalDelays, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(arrivalDelays, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='NormalModel_arrivaldelay_model',
                   title='Arrival Delays',
                   xlabel='arrival delays (min)',
                   ylabel='CDF')
Esempio n. 28
0
def PlotCoefVariation(suites):
    """Plot the posterior distributions for CV.

    suites: map from label to Pmf of CVs.
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    pmfs = {}
    for label, suite in suites.iteritems():
        pmf = CoefVariation(suite)
        print('CV posterior mean', pmf.Mean())
        cdf = thinkbayes.MakeCdfFromPmf(pmf, label)
        thinkplot.Cdf(cdf)

        pmfs[label] = pmf

    thinkplot.Save(root='variability_cv',
                   xlabel='Coefficient of variation',
                   ylabel='Probability')

    print('female bigger',
          thinkbayes.PmfProbGreater(pmfs['female'], pmfs['male']))
    print('male bigger', thinkbayes.PmfProbGreater(pmfs['male'],
                                                   pmfs['female']))
Esempio n. 29
0
def Sample(lam=2, iters=1000):
    """ Sampling dist. of L as an estimator of exp parameter
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    # repreat for multiple values of n
    for n in [5, 10, 15]:
        estimates = []

        for i in range(iters):
            xs = np.random.exponential(1 / lam, n)
            lamhat = 1 / np.mean(xs)
            estimates.append(lamhat)

        stderr = RMSE(estimates, lam)
        print("The standard error for n = {} is: {:.5f}".format(n, stderr))

        cdf = thinkstats2.Cdf(estimates)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print("The 90'%' confidence interval is: {}".format(ci))

        if n == 10:
            # set the lower/upper ends of confidence interval
            VertLine(ci[0])
            VertLine(ci[1])

            # plot the CDF
            thinkplot.Cdf(cdf)
            thinkplot.Config(xlabel='estimate',
                             ylabel='CDF',
                             title='Sampling Distribution')
Esempio n. 30
0
def RandomLiveWeightAnalysis(live):
    weight = live.totalwgt_lb
    live_cdf = thinkstats2.Cdf(weight, label="live")
    random_sample = np.random.choice(weight, 100, replace=True)
    ranks = [live_cdf.PercentileRank(x) for x in random_sample]
    rank_cdf = thinkstats2.Cdf(ranks)
    thinkplot.Cdf(rank_cdf)
    thinkplot.Show(xlabel="Percentile Rank", ylabel="CDF")