def test_conf_int_norm(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, method='norm') bs.reset() ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, method='var') bs.reset() ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, method='cov') bs.reset() cov = bs.cov(func, reps=num_bootstrap) mu = func(self.x) std_err = np.sqrt(np.diag(cov)) upper = mu + stats.norm.ppf(0.95) * std_err lower = mu + stats.norm.ppf(0.05) * std_err assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_bias_corrected(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, method='bc') bs.reset() ci_db = bs.conf_int(func, reps=num_bootstrap, method='debiased') assert_equal(ci, ci_db) base, results = bs._base, bs._results p = np.zeros(2) p[0] = np.mean(results[:, 0] < base[0]) p[1] = np.mean(results[:, 1] < base[1]) b = stats.norm.ppf(p) q = stats.norm.ppf(np.array([0.025, 0.975])) q = q[:, None] percentiles = 100 * stats.norm.cdf(2 * b + q) ci = np.zeros((2, 2)) for i in range(2): ci[i] = np.percentile(results[:, i], list(percentiles[:, i])) ci = ci.T assert_allclose(ci_db, ci)
def test_conf_int_percentile(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, method='percentile') bs.reset() ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kw in bs.bootstrap(num_bootstrap): results[count] = func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_basic(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='basic') bs.reset() ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95, method='basic') bs.reset() ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95, method='basic') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = self.func(*pos) count += 1 mu = self.func(self.x) upper = mu + (mu - np.percentile(results, 5, axis=0)) lower = mu + (mu - np.percentile(results, 95, axis=0)) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_percentile(bs_setup): num_bootstrap = 200 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap, size=0.90, method="percentile") bs.reset() ci_u = bs.conf_int( bs_setup.func, tail="upper", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() ci_l = bs.conf_int( bs_setup.func, tail="lower", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = bs_setup.func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_norm(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='norm') bs.reset() ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95, method='var') bs.reset() ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95, method='cov') bs.reset() cov = bs.cov(self.func, reps=num_bootstrap) mu = self.func(self.x) std_err = np.sqrt(np.diag(cov)) upper = mu + stats.norm.ppf(0.95) * std_err lower = mu + stats.norm.ppf(0.05) * std_err assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_norm(bs_setup): num_bootstrap = 200 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap, size=0.90, method="norm") bs.reset() ci_u = bs.conf_int( bs_setup.func, tail="upper", reps=num_bootstrap, size=0.95, method="var" ) bs.reset() ci_l = bs.conf_int( bs_setup.func, tail="lower", reps=num_bootstrap, size=0.95, method="cov" ) bs.reset() cov = bs.cov(bs_setup.func, reps=num_bootstrap) mu = bs_setup.func(bs_setup.x) std_err = np.sqrt(np.diag(cov)) upper = mu + stats.norm.ppf(0.95) * std_err lower = mu + stats.norm.ppf(0.05) * std_err assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_studentization_error(): def f(x): return np.array([x.mean(), 3]) x = np.random.standard_normal(100) bs = IIDBootstrap(x) with pytest.raises(StudentizationError): bs.conf_int(f, 100, method="studentized")
def test_reuse(self): num_bootstrap = 100 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap) old_results = bs._results.copy() ci_reuse = bs.conf_int(self.func, reps=num_bootstrap, reuse=True) results = bs._results assert_equal(results, old_results) assert_equal(ci, ci_reuse) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", RuntimeWarning) warnings.simplefilter("always") bs.conf_int(self.func, tail='lower', reps=num_bootstrap // 2, reuse=True) assert_equal(len(w), 1)
def test_reuse(bs_setup): num_bootstrap = 100 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap) old_results = bs._results.copy() ci_reuse = bs.conf_int(bs_setup.func, reps=num_bootstrap, reuse=True) results = bs._results assert_equal(results, old_results) assert_equal(ci, ci_reuse) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", RuntimeWarning) warnings.simplefilter("always") bs.conf_int(bs_setup.func, tail="lower", reps=num_bootstrap // 2, reuse=True) assert_equal(len(w), 1)
def test_bca(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) ci_direct = bs.conf_int(self.func, reps=num_bootstrap, method='bca') bs.reset() base, results = bs._base, bs._results p = np.zeros(2) p[0] = np.mean(results[:, 0] < base[0]) p[1] = np.mean(results[:, 1] < base[1]) b = stats.norm.ppf(p) b = b[:, None] q = stats.norm.ppf(np.array([0.025, 0.975])) base = self.func(self.x) nobs = self.x.shape[0] jk = _loo_jackknife(self.func, nobs, [self.x], {}) u = jk.mean() - jk u2 = np.sum(u * u, 0) u3 = np.sum(u * u * u, 0) a = u3 / (6.0 * (u2**1.5)) a = a[:, None] percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q))) ci = np.zeros((2, 2)) for i in range(2): ci[i] = np.percentile(results[:, i], list(percentiles[i])) ci = ci.T assert_allclose(ci_direct, ci)
def mean_ci(data, alpha): ''' Compute the bootstraped confidence intervals (to alpha%) of the mean of data in series Input: series: pandas Series of data alpha: numeric for percentile Ouptut: Dicitonary of mean, lower and upper bound. ''' # Compute the mean of the Series mean = series.mean() # Obtain the values of the Sereis as an array array = series.values # Bootstrap the array (sample with replacement) bs = IIDBootstrap(array) # Compute confidence intervals of bootstrapped distribution ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha) # Lower and upper bounds lower = ci[0, 0] upper = ci[1, 0] # Output dictionary dict_out = {"Mean": mean, "Lower": lower, "Upper": upper} return dict_out
def get_confidence_interval(scores, ci_method='bca', ci_size=0.95, replications=100000, seed_value=None): """ Compute two sided bootstrap confidence interval """ def score(x): return np.array([x.mean()]) data = np.array( [float(score) for score in scores if not math.isnan(score)]) if len(data) == 0: return { 'size': ci_size, 'lower': float('nan'), 'upper': float('nan') } if max(data) - min(data) < 0.000001: return {'size': ci_size, 'lower': min(data), 'upper': max(data)} bs = IIDBootstrap(data) if seed_value is not None: bs.seed(seed_value) ci = bs.conf_int(score, replications, method=ci_method, size=ci_size, tail='two') return {'size': ci_size, 'lower': ci[0][0], 'upper': ci[1][0]}
def mean_ci(data, alpha=0.95): ''' Compute confidence intervals (to alpha%) of the mean of data. This is performed using bootstrapping. Args ---- data: pd.Series Data provided as a Pandas Series alpha: float Confidence percentage. Returns ------- dict: Dicitonary of mean, lower and upper bound of data ''' # Compute the mean of the Series mean = data.mean() # Obtain the values of the Series as an array array = data.values # Bootstrap the array (sample with replacement) bs = IIDBootstrap(array) # Compute confidence intervals of bootstrapped distribution ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha) # Lower and upper bounds lower = ci[0,0] upper = ci[1,0] # Output dictionary dict_out = {"Mean": mean, "Lower": lower, "Upper": upper} return dict_out
def test_bca(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(axis=0) ci_direct = bs.conf_int(func, reps=num_bootstrap, method='bca') bs.reset() base, results = bs._base, bs._results p = np.zeros(2) p[0] = np.mean(results[:, 0] < base[0]) p[1] = np.mean(results[:, 1] < base[1]) b = stats.norm.ppf(p) b = b[:, None] q = stats.norm.ppf(np.array([0.025, 0.975])) base = func(self.x) nobs = self.x.shape[0] jk = _loo_jackknife(func, nobs, [self.x], {}) u = (nobs - 1) * (jk - base) u2 = np.sum(u * u, 0) u3 = np.sum(u * u * u, 0) a = u3 / (6.0 * (u2 ** 1.5)) a = a[:, None] percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q))) ci = np.zeros((2, 2)) for i in range(2): ci[i] = np.percentile(results[:, i], list(percentiles[i])) ci = ci.T assert_allclose(ci_direct, ci)
def test_bca_against_bcajack(self): # import rpy2.rinterface as ri # import rpy2.robjects as robjects # import rpy2.robjects.numpy2ri # from rpy2.robjects.packages import importr # rpy2.robjects.numpy2ri.activate() # utils = importr('utils') # try: # bcaboot = importr('bcaboot') # except Exception: # utils.install_packages('bcaboot', # repos='http://cran.us.r-project.org') # bcaboot = importr('bcaboot') rng_seed_obs = 42 rs = np.random.RandomState(rng_seed_obs) observations = rs.multivariate_normal(mean=[8, 4], cov=np.identity(2), size=20) B = 2000 rng_seed = 123 rs = np.random.RandomState(rng_seed) arch_bs = IIDBootstrap(observations, random_state=rs) confidence_interval_size = 0.90 def func(x): sample = x.mean(axis=0) return sample[1] / sample[0] arch_ci = arch_bs.conf_int( func=func, reps=B, size=confidence_interval_size, method='bca', ) # # callable from R # @ri.rternalize # def func_r(x): # x = np.asarray(x) # _mean = x.mean(axis=0) # return float(_mean[1] / _mean[0]) # output = bcaboot.bcajack(x=observations, B=float(B), func=func_r) a = arch_bs._bca_acceleration(func) b = arch_bs._bca_bias() # bca_lims = np.array(output[1])[:, 0] # # bca confidence intervals for: 0.025, 0.05, 0.1, 0.16, 0.5, # 0.84, 0.9, 0.95, 0.975 # bcajack_ci_90 = [bca_lims[1], bca_lims[-2]] # bcajack should estimate similar "a" using jackknife on # the same observations assert_allclose(a, -0.0004068984) # bcajack returns b (or z0) = -0.03635412, but based on # different bootstrap samples assert_allclose(b, 0.04764396) # bcajack_ci_90 = [0.42696, 0.53188] arch_ci = list(arch_ci[:, -1]) saved_arch_ci_90 = [0.42719805360154717, 0.5336561953393736] assert_allclose(arch_ci, saved_arch_ci_90)
def test_conf_int_parametric(self): def param_func(x, params=None, state=None): if state is not None: mu = params e = state.standard_normal(x.shape) return (mu + e).mean(0) else: return x.mean(0) def semi_func(x, params=None): if params is not None: mu = params e = x - mu return (mu + e).mean(0) else: return x.mean(0) reps = 100 bs = IIDBootstrap(self.x) bs.seed(23456) ci = bs.conf_int(func=param_func, reps=reps, sampling='parametric') assert len(ci) == 2 assert np.all(ci[0] < ci[1]) bs.reset() results = np.zeros((reps, 2)) count = 0 mu = self.x.mean(0) for pos, _ in bs.bootstrap(100): results[count] = param_func(*pos, params=mu, state=bs.random_state) count += 1 assert_equal(bs._results, results) bs.reset() ci = bs.conf_int(func=semi_func, reps=100, sampling='semi') assert len(ci) == 2 assert np.all(ci[0] < ci[1]) bs.reset() results = np.zeros((reps, 2)) count = 0 for pos, _ in bs.bootstrap(100): results[count] = semi_func(*pos, params=mu) count += 1 assert_allclose(bs._results, results)
def test_conf_int_bca_scaler(self): num_bootstrap = 100 bs = IIDBootstrap(self.y) bs.seed(23456) ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca') msg = 'conf_int(method=\'bca\') scalar input regression. Ensure ' \ 'output is at least 1D with numpy.atleast_1d().' assert ci.shape == (2, 1), msg
def test_conf_int_bca_scaler(bs_setup): num_bootstrap = 100 bs = IIDBootstrap(bs_setup.y) bs.seed(23456) ci = bs.conf_int(np.mean, reps=num_bootstrap, method="bca") msg = ("conf_int(method='bca') scalar input regression. Ensure " "output is at least 1D with numpy.atleast_1d().") assert ci.shape == (2, 1), msg
def test_reuse(self): num_bootstrap = 100 bs = IIDBootstrap(self.x) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap) old_results = bs._results.copy() ci_reuse = bs.conf_int(func, reps=num_bootstrap, reuse=True) results = bs._results assert_equal(results, old_results) assert_equal(ci, ci_reuse) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", RuntimeWarning) warnings.simplefilter("always") bs.conf_int(func, tail='lower', reps=num_bootstrap // 2, reuse=True) assert_equal(len(w), 1)
def test_iid_semiparametric(bs_setup): bs = IIDBootstrap(bs_setup.y) def func(y, axis=0, params=None): if params is not None: return (y - params).mean(axis=axis) return y.mean(axis=axis) ci = bs.conf_int(func, reps=10, sampling="semiparametric") assert ci.shape == (2, 1)
def test_bca_extra_kwarg(): # GH 366 def f(a, b): return a.mean(0) x = np.random.standard_normal(1000) bs = IIDBootstrap(x) ci = bs.conf_int(f, extra_kwargs={"b": "anything"}, reps=100, method="bca") assert isinstance(ci, np.ndarray) assert ci.shape == (2, 1)
def test_conf_int_bca_scaler(self): num_bootstrap = 100 bs = IIDBootstrap(self.y) bs.seed(23456) try: ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca') assert (ci.shape == (2, 1)) except IndexError: pytest.fail('conf_int(method=\'bca\') scalar input regression. ' 'Ensure output is at least 1D with ' 'numpy.atleast_1d().')
def test_conf_int_bca_scaler(self): num_bootstrap = 100 bs = IIDBootstrap(self.y) bs.seed(23456) try: ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca') assert(ci.shape == (2, 1)) except IndexError: pytest.fail('conf_int(method=\'bca\') scaler input regression. ' 'Ensure output is at least 1D with ' 'numpy.atleast_1d().')
def test_bc_extremum_error(): # GH 496 def profile_function(scores): tau = np.linspace(-0.1, 1.0, 10) comparisons = np.expand_dims(scores.flatten(), axis=0) >= tau[:, np.newaxis] return np.mean(comparisons, axis=-1) val = np.array([ 0.14333333, 0.6576, 0.35882353, 0.48982389, 0.35660377, 0.7, -0.00457143, 0.87817109, -0.01538462, 0.54444444, ]) bs = IIDBootstrap(val, random_state=np.random.RandomState(0)) with pytest.raises(RuntimeError, match="Empirical probability used"): bs.conf_int(profile_function, 100, method="bc")
def test_errors(self): x = np.arange(10) y = np.arange(100) with pytest.raises(ValueError): IIDBootstrap(x, y) with pytest.raises(ValueError): IIDBootstrap(index=x) bs = IIDBootstrap(y) with pytest.raises(ValueError): bs.conf_int(self.func, method='unknown') with pytest.raises(ValueError): bs.conf_int(self.func, tail='dragon') with pytest.raises(ValueError): bs.conf_int(self.func, size=95)
def test_errors(bs_setup): x = np.arange(10) y = np.arange(100) with pytest.raises(ValueError): IIDBootstrap(x, y) with pytest.raises(ValueError): IIDBootstrap(index=x) bs = IIDBootstrap(y) with pytest.raises(ValueError): bs.conf_int(bs_setup.func, method="unknown") with pytest.raises(ValueError): bs.conf_int(bs_setup.func, tail="dragon") with pytest.raises(ValueError): bs.conf_int(bs_setup.func, size=95)
def test_studentized(bs_setup): num_bootstrap = 20 bs = IIDBootstrap(bs_setup.x) bs.seed(23456) def std_err_func(mu, y): errors = y - mu var = (errors ** 2.0).mean(axis=0) return np.sqrt(var / y.shape[0]) ci = bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", std_err_func=std_err_func, ) bs.reset() base = bs_setup.func(bs_setup.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(reps=num_bootstrap): results[count] = bs_setup.func(*pos) std_err = std_err_func(results[count], *pos) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) bs.reset() ci = bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", studentize_reps=50 ) bs.reset() base = bs_setup.func(bs_setup.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(reps=num_bootstrap): results[count] = bs_setup.func(*pos) inner_bs = IIDBootstrap(*pos) seed = bs.random_state.randint(2 ** 31 - 1) inner_bs.seed(seed) cov = inner_bs.cov(bs_setup.func, reps=50) std_err = np.sqrt(np.diag(cov)) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", std_err_func=std_err_func, reuse=True, ) assert_equal(len(w), 1)
def test_studentized(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(axis=0) def std_err_func(mu, y): errors = y - mu var = (errors ** 2.0).mean(axis=0) return np.sqrt(var / y.shape[0]) ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', std_err_func=std_err_func) bs.reset() base = func(self.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kwdata in bs.bootstrap(reps=num_bootstrap): results[count] = func(*pos) std_err = std_err_func(results[count], *pos) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) bs.reset() ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', studentize_reps=50) bs.reset() base = func(self.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kwdata in bs.bootstrap(reps=num_bootstrap): results[count] = func(*pos) inner_bs = IIDBootstrap(*pos) seed = bs.random_state.randint(2 ** 31 - 1) inner_bs.seed(seed) cov = inner_bs.cov(func, reps=50) std_err = np.sqrt(np.diag(cov)) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") bs.conf_int(func, reps=num_bootstrap, method='studentized', std_err_func=std_err_func, reuse=True) assert_equal(len(w), 1)
############################################################################### ## BOOTSTRAPPED OLS # call statsmodels OLS def ols_stats(input_data): return smf.ols('RE ~ liked', data=input_data).fit().pvalues #return smf.ols('RE ~ liked', data = input_data).fit().tvalues #return smf.ols('RE ~ liked', data = input_data).fit().params # run bootstrapping by arch input_data = raw_data_frame bs = IIDBootstrap(input_data) ci = pd.DataFrame(data=(bs.conf_int(ols_stats, 10000, method='basic', tail='two')), columns=['intercept_ci', 'first_variable_ci'], index=['lower', 'upper']) mean_ci = ci.mean(axis=0) ci = ci.append(mean_ci, ignore_index=True) ci = ci.T.copy() print(ci) print(mean_ci, "summary_statistics: mean_ci") # computing p-values manually from t-values #pval = stats.t.sf(np.abs(4.4), n-1)*2 # two-sided pvalue = Prob(abs(t)>tt) #print 't-statistic = %6.3f pvalue = %6.4f' % (tt, pval) # shows execution time print(time.time() - start_time, "seconds")