Esempi in Python per Cdf, esempi in Python per thinkplot.Cdf

Esempio n. 1

0

Mostra file

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)

Esempio n. 2

0

Mostra file

 def MakePlot(self):
     """Plot the CDFs."""
     thinkplot.Cdf(self.pmf_y.MakeCdf())
     thinkplot.Cdf(self.prior_zb.MakeCdf())
     thinkplot.Cdf(self.post_zb.MakeCdf())
     thinkplot.Cdf(self.pmf_mean_zb.MakeCdf())
     thinkplot.Show()

Esempio n. 3

0

Mostra file

File: paintball.py Progetto: junghh21/ThinkBayes2-1

def MakePosteriorPlot(suite):
    """Plots the posterior marginal distributions for alpha and beta.

    suite: posterior joint distribution of location
    """
    marginal_alpha = suite.Marginal(0)
    marginal_alpha.name = 'alpha'
    marginal_beta = suite.Marginal(1)
    marginal_beta.name = 'beta'

    print('alpha CI', marginal_alpha.CredibleInterval(50))
    print('beta CI', marginal_beta.CredibleInterval(50))

    thinkplot.PrePlot(num=2)

    #thinkplot.Pmf(marginal_alpha)
    #thinkplot.Pmf(marginal_beta)
    
    thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_alpha))
    thinkplot.Cdf(thinkbayes2.MakeCdfFromPmf(marginal_beta))
    
    thinkplot.Save('paintball2',
                xlabel='Distance',
                ylabel='Prob',
                loc=4,
                formats=FORMATS)

Esempio n. 4

0

Mostra file

File: analytic.py Progetto: wu12345/ThinkStats2

def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')

Esempio n. 5

0

Mostra file

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()

Esempio n. 6

0

Mostra file

File: test_models.py Progetto: RachelONelson/DSC530

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)

Esempio n. 7

0

Mostra file

File: modification.py Progetto: ericasaywhat/Epidemix

def generate_cdf(fb, hk):
    cdf_fb = Cdf(degrees(fb))
    cdf_hk = Cdf(degrees(hk))

    thinkplot.Cdf(cdf_fb, color='gray', label="Facebook CDF")
    thinkplot.Cdf(cdf_hk, label='RPA CDF')
    thinkplot.config(xlabel='degree', xscale='log', ylabel='CDF')

    plt.savefig('CDFGraphs_Modified.png')

Esempio n. 8

0

Mostra file

File: modification.py Progetto: ericasaywhat/Epidemix

def generate_ccdf(fb, hk):
    cdf_fb = Cdf(degrees(fb))
    cdf_hk = Cdf(degrees(hk))

    thinkplot.Cdf(cdf_fb, label='Facebook CCDF', color='gray', complement=True)
    thinkplot.Cdf(cdf_hk, label="RPA CCDF", complement=True)
    thinkplot.config(xlabel='degree',
                     xscale='log',
                     ylabel='CCDF',
                     yscale='log')

    plt.savefig("CCDFGraphs_Modified.png")

Esempio n. 9

0

Mostra file

File: populations.py Progetto: UnderPaidMathematician/ThinkStats2

def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')

Esempio n. 10

0

Mostra file

File: hinc.py Progetto: seppomerimaa/ThinkStats2

def main():
    df = ReadData()
    cdf = thinkstats2.Cdf(df['ps'])

    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.show(legend=False)
    
    print(df)

Esempio n. 11

0

Mostra file

File: hinc2.py Progetto: wu12345/ThinkStats2

def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)
    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')

Esempio n. 12

0

Mostra file

File: thinkstats2_test.py Progetto: Patsonstats/ThinkStats2-1

 def testPmfMax(self):
     d6 = thinkstats2.Pmf(range(1, 7))
     two = d6 + d6
     three = two + d6
     cdf = three.Max(6)
     thinkplot.Cdf(cdf)
     self.assertAlmostEqual(cdf[14], 0.558230962626)

Esempio n. 13

0

Mostra file

    def SimulateSample(self, n=9, m=1000):
        """Plots the sampling distribution of the sample mean.

        mu: hypothetical population mean
        sigma: hypothetical population standard deviation
        n: sample size
        m: number of iterations
        """
        def VertLine(x, y=1):
            thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

        means = []
        for _ in range(m):
            xs = genextreme.rvs(c=self.shape, loc=self.loc, scale=self.scale, size=n)
            xbar = np.mean(xs)
            means.append(xbar)

        stderr = self.RMSE(means, self.loc)
        print('Erro Padrão', stderr)

        cdf = thinkstats2.Cdf(means)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print('Intervalo de Confiança: ', ci)
        VertLine(ci[0])
        VertLine(ci[1])

        # plot the CDF
        thinkplot.Cdf(cdf)
        #thinkplot.Save(root='estimation1',
         #              xlabel='sample mean',
          #             ylabel='CDF',
           #            title='Sampling distribution')

Esempio n. 14

0

Mostra file

def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    means = []
    for j in range(m):
        xs = np.random.normal(mu, sigma, n)
        xbar = np.mean(xs)
        means.append(xbar)

    stderr = RMSE(means, mu)
    print('standard error', stderr)

    cdf = thinkstats2.MakeCdfFromList(means)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution')

Esempio n. 15

0

Mostra file

File: hockey.py Progetto: anishpurohit/dropbox

def ProcessScoresPairwise(pairs):
    """Average number of goals for each team against each opponent.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from (team1, team2) to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault((t1, t2), []).append(g1)
            goals_scored.setdefault((t2, t1), []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        if len(goals) < 3:
            continue
        lam = thinkstats.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkstats.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))

    print('BOS v VAN', pairs['BOS', 'VAN'])

Esempio n. 16

0

Mostra file

def ProcessScoresTeamwise(pairs):
    """Average number of goals for each team.

    pairs: map from (team1, team2) to (score1, score2)
    """
    # map from team to list of goals scored
    goals_scored = {}
    for key, entries in pairs.iteritems():
        t1, t2 = key
        for entry in entries:
            g1, g2 = entry
            goals_scored.setdefault(t1, []).append(g1)
            goals_scored.setdefault(t2, []).append(g2)

    # make a list of average goals scored
    lams = []
    for key, goals in goals_scored.iteritems():
        lam = thinkbayes2.Mean(goals)
        lams.append(lam)

    # make the distribution of average goals scored
    cdf = thinkbayes2.MakeCdfFromList(lams)
    thinkplot.Cdf(cdf)
    thinkplot.Show()

    mu, var = thinkbayes2.MeanVar(lams)
    print('mu, sig', mu, math.sqrt(var))

Esempio n. 17

0

Mostra file

def SimulateSample(lam=2, n=10, m=1000):
    """Sampling distribution of L as an estimator of exponential parameter.

    lam: parameter of an exponential distribution
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1 / lam, n)
        lamhat = 1 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation2',
                   xlabel='estimate',
                   ylabel='CDF',
                   title='Sampling distribution')

    return stderr

Esempio n. 18

0

Mostra file

File: estimation.py Progetto: kangxi5200/Think-Stats2

def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    """Plots the sampling distribution of the sample mean.

    mu: hypothetical population mean
    sigma: hypothetical population standard deviation
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    means = []
    for _ in range(m):
        xs = np.random.normal(mu, sigma, n)
        xbar = np.mean(xs)
        means.append(xbar)

    stderr = RMSE(means, mu)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(means)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution')

Esempio n. 19

0

Mostra file

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()

Esempio n. 20

0

Mostra file

File: hinc2.py Progetto: seppomerimaa/ThinkStats2

def main():
    df = hinc.ReadData()
    log_sample = InterpolateSample(df, log_upper=6.0)

    log_cdf = thinkstats2.Cdf(log_sample)

    print("median", thinkstats2.Median(log_sample))
    print("pearson's median skewness",
          thinkstats2.PearsonMedianSkewness(log_sample))
    print("skewness", thinkstats2.Skewness(log_sample))
    print("mean", log_cdf.Mean())

    print(
        "the higher our log_upper, the more right-skewed (according to g_1) or at least less left-skewed (according to g_p) things get"
    )
    print("the mean moves to the right a bit, too.")

    print("proportion of the population with income < mean",
          log_cdf.Prob(log_cdf.Mean()))
    print(
        "the higher the upper bound, the greater the proprtion below the mean."
    )

    thinkplot.Cdf(log_cdf)
    thinkplot.Show(xlabel='household income', ylabel='CDF')

Esempio n. 21

0

Mostra file

File: estimate1.py Progetto: wu12345/ThinkStats2

def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
    
    means = []
    for j in range(m):
        xs = [random.gauss(mu, sigma) for i in range(n)]
        xbar = numpy.mean(xs)
        means.append(xbar)

    print 'rmse', RMSE(means, mu)

    cdf = thinkstats2.MakeCdfFromList(means)
    print 'confidence interval', cdf.Percentile(5), cdf.Percentile(95) 

    # estimate the PDF by KDE
    pdf = thinkstats2.EstimatedPdf(means)
    stderr = sigma / math.sqrt(n)
    vals = numpy.linspace(mu-3*stderr, mu+3*stderr, 101)
    pmf = pdf.MakePmf(vals)
    #thinkplot.Pmf(pmf)

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimate1',
                   xlabel='sample mean',
                   ylabel='CDF',
                   title='Sampling distribution'
                   )

Esempio n. 22

0

Mostra file

File: ch05.py Progetto: qrsforever/workspace

def CH5_5():
    """
    最大值操作:
    转动3个6面的骰子, 计算它们的最大值 采用下面三种方式, 对比分布图.
    
    模拟:
    枚举:
    指数计算:

    """

    d6 = Die(6)
    k = 3

    # 模拟
    N = 1000
    dists = [d6] * k
    pmf = SampleMax(dists, N)
    pmf.name = 'sim'
    thinkplot.Pmf(pmf)

    # 枚举 km^2
    pmf = PmfMax(d6, d6)
    print("pmf1.Total() = %.3f" % pmf.Total())
    pmf = PmfMax(pmf, d6)
    print("pmf2.Total() = %.3f" % pmf.Total())
    pmf.name = 'enum'
    thinkplot.Pmf(pmf)

    # CDF (指数max) TODO 不是很明白???
    cdf = d6.Max(k)
    cdf.name = "expo"
    thinkplot.Cdf(cdf)

    thinkplot.Show(xlabel='max([d6]*3)', ylabel='probablity')

Esempio n. 23

0

Mostra file

File: chap8ex.py Progetto: seppomerimaa/ThinkStats2

def ex3():
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lam = 4
    goal_totals = [SimulateGame(lam=lam) for _ in range(1000)]
    print('RMSE', RMSE(goal_totals, lam))
    hist = thinkstats2.Hist(goal_totals)
    cdf = thinkstats2.Cdf(goal_totals)
    thinkplot.PrePlot(rows=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf)
    VertLine(cdf.Percentile(5))
    VertLine(cdf.Percentile(95))
    thinkplot.SubPlot(3)

    # lambda vs. rmse
    # rmse goes up as lambda goes up
    lams = range(1, 15)
    rmses = [RMSE([SimulateGame(lam=l) for _ in range(1000)], l) for l in lams]
    thinkplot.Plot(lams, rmses)
    thinkplot.SubPlot(4)

    # m vs. rmse
    # maybe rmse very slowly goes down as m goes up?
    # not at all clear that's really the case...
    ms = np.arange(10, 1000, 10)
    rmses = [RMSE([SimulateGame() for _ in range(m)], 4) for m in ms]
    thinkplot.Plot(ms, rmses)

    thinkplot.show()

Esempio n. 24

0

Mostra file

def TestGte():
    """Tests the GapTimeEstimator."""
    random.seed(17)

    xs = [60, 120, 240]

    gap_times = [60, 60, 60, 60, 60, 120, 120, 120, 240, 240]

    # distribution of gap time (z)
    pdf_z = thinkbayes.EstimatedPdf(gap_times)
    pmf_z = pdf_z.MakePmf(xs, name="z")

    wtc = WaitTimeCalculator(pmf_z, inverse=False)

    lam = 0.0333
    n = 100
    passenger_data = wtc.GenerateSamplePassengers(lam, n)

    pcounts = [0, 0, 0]

    ite = GapTimeEstimator(xs, pcounts, passenger_data)

    thinkplot.Clf()

    # thinkplot.Cdf(wtc.pmf_z.MakeCdf(name="actual z"))
    thinkplot.Cdf(wtc.pmf_zb.MakeCdf(name="actual zb"))
    ite.MakePlot()

Esempio n. 25

0

Mostra file

def SimulateSample(lam=2, n=10, m=1000):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1.0/lam, n)
        lamhat = 1.0 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.MakeCdfFromList(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation2',
                   xlabel='estimate',
                   ylabel='CDF',
                   title='Sampling distribution')

    return stderr

Esempio n. 26

0

Mostra file

    def MakePlot(self, root='redline4'):
        """Makes a plot showing the mixture."""
        thinkplot.Clf()

        # plot the MetaPmf
        for pmf, prob in sorted(self.metapmf.Items()):
            cdf = pmf.MakeCdf().Scale(1.0 / 60)
            width = 2 / math.log(-math.log(prob))
            thinkplot.Plot(cdf.xs,
                           cdf.ps,
                           alpha=0.2,
                           linewidth=width,
                           color='blue',
                           label='')

        # plot the mixture and the distribution based on a point estimate
        thinkplot.PrePlot(2)
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
        thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0 / 60))

        thinkplot.Save(root=root,
                       xlabel='Wait time (min)',
                       ylabel='CDF',
                       formats=FORMATS,
                       axis=[0, 10, 0, 1])

Esempio n. 27

0

Mostra file

def MakeNormalModel(arrivalDelays):
    """Plot the CDF of arrival delays with a normal model.
       This is a modified copy from analytic.py
    """

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(arrivalDelays, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(arrivalDelays, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='NormalModel_arrivaldelay_model',
                   title='Arrival Delays',
                   xlabel='arrival delays (min)',
                   ylabel='CDF')

Esempio n. 28

0

Mostra file

def PlotCoefVariation(suites):
    """Plot the posterior distributions for CV.

    suites: map from label to Pmf of CVs.
    """
    thinkplot.Clf()
    thinkplot.PrePlot(num=2)

    pmfs = {}
    for label, suite in suites.iteritems():
        pmf = CoefVariation(suite)
        print('CV posterior mean', pmf.Mean())
        cdf = thinkbayes.MakeCdfFromPmf(pmf, label)
        thinkplot.Cdf(cdf)

        pmfs[label] = pmf

    thinkplot.Save(root='variability_cv',
                   xlabel='Coefficient of variation',
                   ylabel='Probability')

    print('female bigger',
          thinkbayes.PmfProbGreater(pmfs['female'], pmfs['male']))
    print('male bigger', thinkbayes.PmfProbGreater(pmfs['male'],
                                                   pmfs['female']))

Esempio n. 29

0

Mostra file

def Sample(lam=2, iters=1000):
    """ Sampling dist. of L as an estimator of exp parameter
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    # repreat for multiple values of n
    for n in [5, 10, 15]:
        estimates = []

        for i in range(iters):
            xs = np.random.exponential(1 / lam, n)
            lamhat = 1 / np.mean(xs)
            estimates.append(lamhat)

        stderr = RMSE(estimates, lam)
        print("The standard error for n = {} is: {:.5f}".format(n, stderr))

        cdf = thinkstats2.Cdf(estimates)
        ci = cdf.Percentile(5), cdf.Percentile(95)
        print("The 90'%' confidence interval is: {}".format(ci))

        if n == 10:
            # set the lower/upper ends of confidence interval
            VertLine(ci[0])
            VertLine(ci[1])

            # plot the CDF
            thinkplot.Cdf(cdf)
            thinkplot.Config(xlabel='estimate',
                             ylabel='CDF',
                             title='Sampling Distribution')

Esempio n. 30

0

Mostra file

File: chap04Ex.py Progetto: pansh94/Maths_Stats

def RandomLiveWeightAnalysis(live):
    weight = live.totalwgt_lb
    live_cdf = thinkstats2.Cdf(weight, label="live")
    random_sample = np.random.choice(weight, 100, replace=True)
    ranks = [live_cdf.PercentileRank(x) for x in random_sample]
    rank_cdf = thinkstats2.Cdf(ranks)
    thinkplot.Cdf(rank_cdf)
    thinkplot.Show(xlabel="Percentile Rank", ylabel="CDF")