def test_pass_random_state(): x = np.arange(1000) rs = RandomState(0) IIDBootstrap(x, random_state=rs) with pytest.raises(TypeError): IIDBootstrap(x, random_state=0)
def test_iid_args_kwargs(bs_setup): bs1 = IIDBootstrap(bs_setup.y) bs1.seed(0) bs2 = IIDBootstrap(y=bs_setup.y) bs2.seed(0) for a, b in zip(bs1.bootstrap(1), bs2.bootstrap(1)): assert np.all(a[0][0] == b[1]["y"])
def test_list_input(): # GH 315 with pytest.raises(TypeError, match="Positional input 0 "): vals = np.random.standard_normal(25).tolist() IIDBootstrap(vals) with pytest.raises(TypeError, match="Input `data` "): vals = np.random.standard_normal(25).tolist() IIDBootstrap(data=vals)
def test_extra_kwargs(self): extra_kwargs = {'axis': 0} bs = IIDBootstrap(self.x) bs.seed(23456) num_bootstrap = 100 bs.cov(self.func, reps=num_bootstrap, extra_kwargs=extra_kwargs) bs = IIDBootstrap(axis=self.x) bs.seed(23456) with pytest.raises(ValueError): bs.cov(self.func, reps=num_bootstrap, extra_kwargs=extra_kwargs)
def test_extra_kwargs(bs_setup): extra_kwargs = {"axis": 0} bs = IIDBootstrap(bs_setup.x) bs.seed(23456) num_bootstrap = 100 bs.cov(bs_setup.func, reps=num_bootstrap, extra_kwargs=extra_kwargs) bs = IIDBootstrap(axis=bs_setup.x) bs.seed(23456) with pytest.raises(ValueError): bs.cov(bs_setup.func, reps=num_bootstrap, extra_kwargs=extra_kwargs)
def test_errors(self): x = np.arange(10) y = np.arange(100) with pytest.raises(ValueError): IIDBootstrap(x, y) with pytest.raises(ValueError): IIDBootstrap(index=x) bs = IIDBootstrap(y) with pytest.raises(ValueError): bs.conf_int(self.func, method='unknown') with pytest.raises(ValueError): bs.conf_int(self.func, tail='dragon') with pytest.raises(ValueError): bs.conf_int(self.func, size=95)
def test_errors(bs_setup): x = np.arange(10) y = np.arange(100) with pytest.raises(ValueError): IIDBootstrap(x, y) with pytest.raises(ValueError): IIDBootstrap(index=x) bs = IIDBootstrap(y) with pytest.raises(ValueError): bs.conf_int(bs_setup.func, method="unknown") with pytest.raises(ValueError): bs.conf_int(bs_setup.func, tail="dragon") with pytest.raises(ValueError): bs.conf_int(bs_setup.func, size=95)
def test_extra_kwargs(self): extra_kwargs = {'axis': 0} bs = IIDBootstrap(self.x) bs.seed(23456) num_bootstrap = 100 def func(y, axis=0): return y.mean(axis=axis) bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs) bs = IIDBootstrap(axis=self.x) bs.seed(23456) with pytest.raises(ValueError): bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs)
def test_conf_int_percentile(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, method='percentile') bs.reset() ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kw in bs.bootstrap(num_bootstrap): results[count] = func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_norm(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='norm') bs.reset() ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95, method='var') bs.reset() ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95, method='cov') bs.reset() cov = bs.cov(self.func, reps=num_bootstrap) mu = self.func(self.x) std_err = np.sqrt(np.diag(cov)) upper = mu + stats.norm.ppf(0.95) * std_err lower = mu + stats.norm.ppf(0.05) * std_err assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def mean_ci(data, alpha=0.95): ''' Compute confidence intervals (to alpha%) of the mean of data. This is performed using bootstrapping. Args ---- data: pd.Series Data provided as a Pandas Series alpha: float Confidence percentage. Returns ------- dict: Dicitonary of mean, lower and upper bound of data ''' # Compute the mean of the Series mean = data.mean() # Obtain the values of the Series as an array array = data.values # Bootstrap the array (sample with replacement) bs = IIDBootstrap(array) # Compute confidence intervals of bootstrapped distribution ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha) # Lower and upper bounds lower = ci[0,0] upper = ci[1,0] # Output dictionary dict_out = {"Mean": mean, "Lower": lower, "Upper": upper} return dict_out
def test_str(self): bs = IIDBootstrap(self.y_series) expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>IID Bootstrap</strong>(' + \ '<strong>no. pos. inputs</strong>: 1, ' + \ '<strong>no. keyword inputs</strong>: 0, ' + \ '<strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected) bs = StationaryBootstrap(10, self.y_series, self.x_df) expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) bs = CircularBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df) expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)' assert_equal(str(bs), expected) expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')' assert_equal(bs.__repr__(), expected) expected = '<strong>Circular Block Bootstrap</strong>' + \ '(<strong>block size</strong>: 20, ' \ + '<strong>no. pos. inputs</strong>: 0, ' + \ '<strong>no. keyword inputs</strong>: 2,' + \ ' <strong>ID</strong>: ' + hex(id(bs)) + ')' assert_equal(bs._repr_html(), expected)
def mean_confidence_interval_normed(data, metric, normalizer=None, bootstrap_reps=1000, norm='none', test_type='own', confidence=0.95): ''' norm can be st or iqr or range or none. test_type: bs for BCa, t for t-stud, own for symmetric bootstrap, z for z-transform Normalizer should have all the data https://arch.readthedocs.io/en/latest/bootstrap/generated/generated/arch.bootstrap.IIDBootstrap.conf_int.html#arch.bootstrap.IIDBootstrap.conf_int ''' if metric in ['css_ri', 'synergy_zip', 'synergy_bliss', 'synergy_hsa', 'synergy_loewe']: if norm in ['sd','iqr','range']: a = np.array(data)/normalizer[norm][metric] mean_val = np.mean(a) elif norm=='none': a = np.array(data) mean_val = np.mean(a) else: print('no norming info!') return if test_type == 't': # standard t-dist ci = _mean_confidence_interval(data=a, confidence=confidence) elif test_type == 'bs': # # the idea is that we take mean to be as is, but we take its 95% CI bootstrapped n = len(a) # batch-correct and accelerated bootstrap. For now it defaults to 0.95. Fix by using partial ci = IIDBootstrap(a).conf_int(_mean_confidence_interval, reps=bootstrap_reps, method='bca') ci = ci[1] mean_val = np.mean(a) elif test_type == 'own': '''(standard symmetrical bootstrap)''' mean_val, ci = _bootstrap(data=a, confidence=confidence, bootstrap_reps=bootstrap_reps) elif test_type == 'z': mean_val, ci = _pearsonr_ci(data=a, confidence=confidence) return round(mean_val, 4), round(float(ci), 4) # return upper bound
def test_conf_int_basic(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='basic') bs.reset() ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95, method='basic') bs.reset() ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95, method='basic') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = self.func(*pos) count += 1 mu = self.func(self.x) upper = mu + (mu - np.percentile(results, 5, axis=0)) lower = mu + (mu - np.percentile(results, 95, axis=0)) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def mean_ci(data, alpha): ''' Compute the bootstraped confidence intervals (to alpha%) of the mean of data in series Input: series: pandas Series of data alpha: numeric for percentile Ouptut: Dicitonary of mean, lower and upper bound. ''' # Compute the mean of the Series mean = series.mean() # Obtain the values of the Sereis as an array array = series.values # Bootstrap the array (sample with replacement) bs = IIDBootstrap(array) # Compute confidence intervals of bootstrapped distribution ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha) # Lower and upper bounds lower = ci[0, 0] upper = ci[1, 0] # Output dictionary dict_out = {"Mean": mean, "Lower": lower, "Upper": upper} return dict_out
def test_bca(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) ci_direct = bs.conf_int(self.func, reps=num_bootstrap, method='bca') bs.reset() base, results = bs._base, bs._results p = np.zeros(2) p[0] = np.mean(results[:, 0] < base[0]) p[1] = np.mean(results[:, 1] < base[1]) b = stats.norm.ppf(p) b = b[:, None] q = stats.norm.ppf(np.array([0.025, 0.975])) base = self.func(self.x) nobs = self.x.shape[0] jk = _loo_jackknife(self.func, nobs, [self.x], {}) u = jk.mean() - jk u2 = np.sum(u * u, 0) u3 = np.sum(u * u * u, 0) a = u3 / (6.0 * (u2**1.5)) a = a[:, None] percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q))) ci = np.zeros((2, 2)) for i in range(2): ci[i] = np.percentile(results[:, i], list(percentiles[i])) ci = ci.T assert_allclose(ci_direct, ci)
def test_conf_int_norm(bs_setup): num_bootstrap = 200 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap, size=0.90, method="norm") bs.reset() ci_u = bs.conf_int( bs_setup.func, tail="upper", reps=num_bootstrap, size=0.95, method="var" ) bs.reset() ci_l = bs.conf_int( bs_setup.func, tail="lower", reps=num_bootstrap, size=0.95, method="cov" ) bs.reset() cov = bs.cov(bs_setup.func, reps=num_bootstrap) mu = bs_setup.func(bs_setup.x) std_err = np.sqrt(np.diag(cov)) upper = mu + stats.norm.ppf(0.95) * std_err lower = mu + stats.norm.ppf(0.05) * std_err assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_bias_corrected(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, method='bc') bs.reset() ci_db = bs.conf_int(func, reps=num_bootstrap, method='debiased') assert_equal(ci, ci_db) base, results = bs._base, bs._results p = np.zeros(2) p[0] = np.mean(results[:, 0] < base[0]) p[1] = np.mean(results[:, 1] < base[1]) b = stats.norm.ppf(p) q = stats.norm.ppf(np.array([0.025, 0.975])) q = q[:, None] percentiles = 100 * stats.norm.cdf(2 * b + q) ci = np.zeros((2, 2)) for i in range(2): ci[i] = np.percentile(results[:, i], list(percentiles[:, i])) ci = ci.T assert_allclose(ci_db, ci)
def test_conf_int_percentile(bs_setup): num_bootstrap = 200 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap, size=0.90, method="percentile") bs.reset() ci_u = bs.conf_int( bs_setup.func, tail="upper", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() ci_l = bs.conf_int( bs_setup.func, tail="lower", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = bs_setup.func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def get_confidence_interval(scores, ci_method='bca', ci_size=0.95, replications=100000, seed_value=None): """ Compute two sided bootstrap confidence interval """ def score(x): return np.array([x.mean()]) data = np.array( [float(score) for score in scores if not math.isnan(score)]) if len(data) == 0: return { 'size': ci_size, 'lower': float('nan'), 'upper': float('nan') } if max(data) - min(data) < 0.000001: return {'size': ci_size, 'lower': min(data), 'upper': max(data)} bs = IIDBootstrap(data) if seed_value is not None: bs.seed(seed_value) ci = bs.conf_int(score, replications, method=ci_method, size=ci_size, tail='two') return {'size': ci_size, 'lower': ci[0][0], 'upper': ci[1][0]}
def test_bca_against_bcajack(self): # import rpy2.rinterface as ri # import rpy2.robjects as robjects # import rpy2.robjects.numpy2ri # from rpy2.robjects.packages import importr # rpy2.robjects.numpy2ri.activate() # utils = importr('utils') # try: # bcaboot = importr('bcaboot') # except Exception: # utils.install_packages('bcaboot', # repos='http://cran.us.r-project.org') # bcaboot = importr('bcaboot') rng_seed_obs = 42 rs = np.random.RandomState(rng_seed_obs) observations = rs.multivariate_normal(mean=[8, 4], cov=np.identity(2), size=20) B = 2000 rng_seed = 123 rs = np.random.RandomState(rng_seed) arch_bs = IIDBootstrap(observations, random_state=rs) confidence_interval_size = 0.90 def func(x): sample = x.mean(axis=0) return sample[1] / sample[0] arch_ci = arch_bs.conf_int( func=func, reps=B, size=confidence_interval_size, method='bca', ) # # callable from R # @ri.rternalize # def func_r(x): # x = np.asarray(x) # _mean = x.mean(axis=0) # return float(_mean[1] / _mean[0]) # output = bcaboot.bcajack(x=observations, B=float(B), func=func_r) a = arch_bs._bca_acceleration(func) b = arch_bs._bca_bias() # bca_lims = np.array(output[1])[:, 0] # # bca confidence intervals for: 0.025, 0.05, 0.1, 0.16, 0.5, # 0.84, 0.9, 0.95, 0.975 # bcajack_ci_90 = [bca_lims[1], bca_lims[-2]] # bcajack should estimate similar "a" using jackknife on # the same observations assert_allclose(a, -0.0004068984) # bcajack returns b (or z0) = -0.03635412, but based on # different bootstrap samples assert_allclose(b, 0.04764396) # bcajack_ci_90 = [0.42696, 0.53188] arch_ci = list(arch_ci[:, -1]) saved_arch_ci_90 = [0.42719805360154717, 0.5336561953393736] assert_allclose(arch_ci, saved_arch_ci_90)
def test_pandas_integer_index(bs_setup): x = bs_setup.x x_int = bs_setup.x_df.copy() x_int.index = 10 + np.arange(x.shape[0]) bs = IIDBootstrap(x, x_int) bs.seed(23456) for pdata, _ in bs.bootstrap(10): assert_equal(pdata[0], np.asarray(pdata[1]))
def test_studentization_error(): def f(x): return np.array([x.mean(), 3]) x = np.random.standard_normal(100) bs = IIDBootstrap(x) with pytest.raises(StudentizationError): bs.conf_int(f, 100, method="studentized")
def test_pandas_integer_index(self): x = self.x x_int = self.x_df.copy() x_int.index = 10 + np.arange(x.shape[0]) bs = IIDBootstrap(x, x_int) bs.seed(23456) for pdata, _ in bs.bootstrap(10): assert_equal(pdata[0], pdata[1].values)
def test_conf_int_bca_scaler(bs_setup): num_bootstrap = 100 bs = IIDBootstrap(bs_setup.y) bs.seed(23456) ci = bs.conf_int(np.mean, reps=num_bootstrap, method="bca") msg = ("conf_int(method='bca') scalar input regression. Ensure " "output is at least 1D with numpy.atleast_1d().") assert ci.shape == (2, 1), msg
def test_conf_int_bca_scaler(self): num_bootstrap = 100 bs = IIDBootstrap(self.y) bs.seed(23456) ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca') msg = 'conf_int(method=\'bca\') scalar input regression. Ensure ' \ 'output is at least 1D with numpy.atleast_1d().' assert ci.shape == (2, 1), msg
def test_bca_extra_kwarg(): # GH 366 def f(a, b): return a.mean(0) x = np.random.standard_normal(1000) bs = IIDBootstrap(x) ci = bs.conf_int(f, extra_kwargs={"b": "anything"}, reps=100, method="bca") assert isinstance(ci, np.ndarray) assert ci.shape == (2, 1)
def test_iid_semiparametric(bs_setup): bs = IIDBootstrap(bs_setup.y) def func(y, axis=0, params=None): if params is not None: return (y - params).mean(axis=axis) return y.mean(axis=axis) ci = bs.conf_int(func, reps=10, sampling="semiparametric") assert ci.shape == (2, 1)
def test_numpy(bs_setup): x, y, z = bs_setup.x, bs_setup.y, bs_setup.z bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_equal(y[index], data[0]) # Ensure no changes to original data assert_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_equal(y[index], kwdata["y"]) assert_equal(y[index], bs.y) # Ensure no changes to original data assert_equal(bs._kwargs["y"], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_equal(x[index], data[0]) assert_equal(y[index], data[1]) assert_equal(z[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_equal(x[index], data[0]) assert_equal(y[index], kwdata["y"]) assert_equal(z[index], kwdata["z"]) assert_equal(y[index], bs.y) assert_equal(z[index], bs.z)
def test_pandas(bs_setup): x, y, z = bs_setup.x_df, bs_setup.y_series, bs_setup.z_df bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_series_equal(y.iloc[index], data[0]) # Ensure no changes to original data assert_series_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_series_equal(y.iloc[index], kwdata["y"]) assert_series_equal(y.iloc[index], bs.y) # Ensure no changes to original data assert_series_equal(bs._kwargs["y"], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], data[1]) assert_frame_equal(z.iloc[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], kwdata["y"]) assert_frame_equal(z.iloc[index], kwdata["z"]) assert_series_equal(y.iloc[index], bs.y) assert_frame_equal(z.iloc[index], bs.z)