예제 #1
0
def SimulateSample(lam=2, n=10, m=1000):
    """Sampling distribution of L as an estimator of exponential parameter.

    lam: parameter of an exponential distribution
    n: sample size
    m: number of iterations
    """
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    estimates = []
    for j in range(m):
        xs = np.random.exponential(1 / lam, n)
        lamhat = 1 / np.mean(xs)
        estimates.append(lamhat)

    stderr = RMSE(estimates, lam)
    print('standard error', stderr)

    cdf = thinkstats2.Cdf(estimates)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    print('confidence interval', ci)
    VertLine(ci[0])
    VertLine(ci[1])

    # plot the CDF
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='estimation2',
                   xlabel='estimate',
                   ylabel='CDF',
                   title='Sampling distribution')

    return stderr
예제 #2
0
def Estimate2(n=7, m=100000):
    """RMSE for biased and unbiased estimators of population variance.

    n: sample size
    m: number of iterations
    """
    mu = 0
    sigma = 1

    estimates1 = []
    estimates2 = []
    for _ in range(m):
        xs = [random.gauss(mu, sigma) for i in range(n)]
        biased = np.var(xs)
        unbiased = np.var(xs, ddof=1)
        estimates1.append(biased)
        estimates2.append(unbiased)

    print('Experiment 2')
    print('RMSE biased', RMSE(estimates1, sigma**2))
    print('RMSE unbiased', RMSE(estimates2, sigma**2))
예제 #3
0
def SimulateManyGames(lam, iters=1000000):
    lam_est = []
    for _ in np.arange(iters):
        lam_est.append(SimulateGame(lam))
    print('Mean Error =', MeanError(lam_est, lam))
    print('RMSE =', RMSE(lam_est, lam))
    lam_cdf = thinkstats2.Cdf(lam_est)
    ci = lam_cdf.Percentile(5), lam_cdf.Percentile(95)
    lam_pmf = thinkstats2.Pmf(lam_est)
    thinkplot.Cdf(lam_cdf)
    thinkplot.Plot([ci[0], ci[0]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Plot([ci[1], ci[1]], [0, 1], linewidth=2, color='0.8')
    thinkplot.Config(xlabel='Goals per game', ylabel='CDF', legend=False)
예제 #4
0
def SampleDistrPLot(estimates, n, lam):
    label = 'n=%d' % n
    cdf = thinkstats2.Cdf(estimates, label=label)
    conf_int = cdf.Percentile(5), cdf.Percentile(95)
    stderr = RMSE(estimates, lam)
    print('n=', n, 'Std Error=', stderr, 'Conf Int=', conf_int)
    thinkplot.Cdf(cdf)
    thinkplot.Plot([conf_int[0], conf_int[0]], [0, 1],
                   color='0.8',
                   linewidth=2)
    thinkplot.Plot([conf_int[1], conf_int[1]], [0, 1],
                   color='0.8',
                   linewidth=2)
예제 #5
0
def Estimate4(lam=2, m=1000000):
    estimates = []
    for i in range(m):
        L = SimulateGame(lam)
        estimates.append(L)

    print('Experiment 4')
    print('rmse L', RMSE(estimates, lam))
    print('mean error L', MeanError(estimates, lam))

    pmf = thinkstats2.Pmf(estimates)

    thinkplot.Hist(pmf)
    thinkplot.Show()