def SimulateSample(lam=2, n=10, m=1000): """Sampling distribution of L as an estimator of exponential parameter. lam: parameter of an exponential distribution n: sample size m: number of iterations """ def VertLine(x, y=1): thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3) estimates = [] for j in range(m): xs = np.random.exponential(1 / lam, n) lamhat = 1 / np.mean(xs) estimates.append(lamhat) stderr = RMSE(estimates, lam) print('standard error', stderr) cdf = thinkstats2.Cdf(estimates) ci = cdf.Percentile(5), cdf.Percentile(95) print('confidence interval', ci) VertLine(ci[0]) VertLine(ci[1]) # plot the CDF thinkplot.Cdf(cdf) thinkplot.Save(root='estimation2', xlabel='estimate', ylabel='CDF', title='Sampling distribution') return stderr
def Estimate2(n=7, m=100000): """RMSE for biased and unbiased estimators of population variance. n: sample size m: number of iterations """ mu = 0 sigma = 1 estimates1 = [] estimates2 = [] for _ in range(m): xs = [random.gauss(mu, sigma) for i in range(n)] biased = np.var(xs) unbiased = np.var(xs, ddof=1) estimates1.append(biased) estimates2.append(unbiased) print('Experiment 2') print('RMSE biased', RMSE(estimates1, sigma**2)) print('RMSE unbiased', RMSE(estimates2, sigma**2))
def SimulateManyGames(lam, iters=1000000): lam_est = [] for _ in np.arange(iters): lam_est.append(SimulateGame(lam)) print('Mean Error =', MeanError(lam_est, lam)) print('RMSE =', RMSE(lam_est, lam)) lam_cdf = thinkstats2.Cdf(lam_est) ci = lam_cdf.Percentile(5), lam_cdf.Percentile(95) lam_pmf = thinkstats2.Pmf(lam_est) thinkplot.Cdf(lam_cdf) thinkplot.Plot([ci[0], ci[0]], [0, 1], linewidth=2, color='0.8') thinkplot.Plot([ci[1], ci[1]], [0, 1], linewidth=2, color='0.8') thinkplot.Config(xlabel='Goals per game', ylabel='CDF', legend=False)
def SampleDistrPLot(estimates, n, lam): label = 'n=%d' % n cdf = thinkstats2.Cdf(estimates, label=label) conf_int = cdf.Percentile(5), cdf.Percentile(95) stderr = RMSE(estimates, lam) print('n=', n, 'Std Error=', stderr, 'Conf Int=', conf_int) thinkplot.Cdf(cdf) thinkplot.Plot([conf_int[0], conf_int[0]], [0, 1], color='0.8', linewidth=2) thinkplot.Plot([conf_int[1], conf_int[1]], [0, 1], color='0.8', linewidth=2)
def Estimate4(lam=2, m=1000000): estimates = [] for i in range(m): L = SimulateGame(lam) estimates.append(L) print('Experiment 4') print('rmse L', RMSE(estimates, lam)) print('mean error L', MeanError(estimates, lam)) pmf = thinkstats2.Pmf(estimates) thinkplot.Hist(pmf) thinkplot.Show()