def test_iid_args_kwargs(bs_setup): bs1 = IIDBootstrap(bs_setup.y) bs1.seed(0) bs2 = IIDBootstrap(y=bs_setup.y) bs2.seed(0) for a, b in zip(bs1.bootstrap(1), bs2.bootstrap(1)): assert np.all(a[0][0] == b[1]["y"])
def test_reset(self): bs = IIDBootstrap(np.arange(100)) state = bs.get_state() for data, _ in bs.bootstrap(10): final = data[0] bs.reset() state_reset = bs.get_state() for data, _ in bs.bootstrap(10): final_reset = data[0] assert_equal(final, final_reset) assert_equal(state, state_reset)
def test_reset(self): bs = IIDBootstrap(np.arange(100)) state = bs.get_state() for data, kwdata in bs.bootstrap(10): final = data[0] bs.reset() state_reset = bs.get_state() for data, kwdata in bs.bootstrap(10): final_reset = data[0] assert_equal(final, final_reset) assert_equal(state, state_reset)
def test_state(self): bs = IIDBootstrap(np.arange(100)) bs.seed(23456) state = bs.get_state() for data, _ in bs.bootstrap(10): final = data[0] bs.seed(23456) for data, _ in bs.bootstrap(10): final_seed = data[0] bs.set_state(state) for data, _ in bs.bootstrap(10): final_state = data[0] assert_equal(final, final_seed) assert_equal(final, final_state)
def test_state(self): bs = IIDBootstrap(np.arange(100)) bs.seed(23456) state = bs.get_state() for data, kwdata in bs.bootstrap(10): final = data[0] bs.seed(23456) for data, kwdata in bs.bootstrap(10): final_seed = data[0] bs.set_state(state) for data, kwdata in bs.bootstrap(10): final_state = data[0] assert_equal(final, final_seed) assert_equal(final, final_state)
def test_conf_int_basic(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='basic') bs.reset() ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95, method='basic') bs.reset() ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95, method='basic') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = self.func(*pos) count += 1 mu = self.func(self.x) upper = mu + (mu - np.percentile(results, 5, axis=0)) lower = mu + (mu - np.percentile(results, 95, axis=0)) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_percentile(self): num_bootstrap = 200 bs = IIDBootstrap(self.x) def func(y): return y.mean(axis=0) ci = bs.conf_int(func, reps=num_bootstrap, size=0.90, method='percentile') bs.reset() ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95, method='percentile') bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kw in bs.bootstrap(num_bootstrap): results[count] = func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_conf_int_percentile(bs_setup): num_bootstrap = 200 bs = IIDBootstrap(bs_setup.x) ci = bs.conf_int(bs_setup.func, reps=num_bootstrap, size=0.90, method="percentile") bs.reset() ci_u = bs.conf_int( bs_setup.func, tail="upper", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() ci_l = bs.conf_int( bs_setup.func, tail="lower", reps=num_bootstrap, size=0.95, method="percentile" ) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(num_bootstrap): results[count] = bs_setup.func(*pos) count += 1 upper = np.percentile(results, 95, axis=0) lower = np.percentile(results, 5, axis=0) assert_allclose(lower, ci[0, :]) assert_allclose(upper, ci[1, :]) assert_allclose(ci[1, :], ci_u[1, :]) assert_allclose(ci[0, :], ci_l[0, :]) inf = np.empty_like(ci_l[0, :]) inf.fill(np.inf) assert_equal(inf, ci_l[1, :]) assert_equal(-1 * inf, ci_u[0, :])
def test_pandas_integer_index(self): x = self.x x_int = self.x_df.copy() x_int.index = 10 + np.arange(x.shape[0]) bs = IIDBootstrap(x, x_int) bs.seed(23456) for pdata, kwdata in bs.bootstrap(10): assert_equal(pdata[0], pdata[1].values)
def test_pandas_integer_index(self): x = self.x x_int = self.x_df.copy() x_int.index = 10 + np.arange(x.shape[0]) bs = IIDBootstrap(x, x_int) bs.seed(23456) for pdata, _ in bs.bootstrap(10): assert_equal(pdata[0], pdata[1].values)
def test_pandas_integer_index(bs_setup): x = bs_setup.x x_int = bs_setup.x_df.copy() x_int.index = 10 + np.arange(x.shape[0]) bs = IIDBootstrap(x, x_int) bs.seed(23456) for pdata, _ in bs.bootstrap(10): assert_equal(pdata[0], np.asarray(pdata[1]))
def test_conf_int_parametric(self): def param_func(x, params=None, state=None): if state is not None: mu = params e = state.standard_normal(x.shape) return (mu + e).mean(0) else: return x.mean(0) def semi_func(x, params=None): if params is not None: mu = params e = x - mu return (mu + e).mean(0) else: return x.mean(0) reps = 100 bs = IIDBootstrap(self.x) bs.seed(23456) ci = bs.conf_int(func=param_func, reps=reps, sampling='parametric') assert len(ci) == 2 assert np.all(ci[0] < ci[1]) bs.reset() results = np.zeros((reps, 2)) count = 0 mu = self.x.mean(0) for pos, _ in bs.bootstrap(100): results[count] = param_func(*pos, params=mu, state=bs.random_state) count += 1 assert_equal(bs._results, results) bs.reset() ci = bs.conf_int(func=semi_func, reps=100, sampling='semi') assert len(ci) == 2 assert np.all(ci[0] < ci[1]) bs.reset() results = np.zeros((reps, 2)) count = 0 for pos, _ in bs.bootstrap(100): results[count] = semi_func(*pos, params=mu) count += 1 assert_allclose(bs._results, results)
def test_numpy(self): x, y, z = self.x, self.y, self.z bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_equal(y[index], data[0]) # Ensure no changes to original data assert_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_equal(y[index], kwdata['y']) assert_equal(y[index], bs.y) # Ensure no changes to original data assert_equal(bs._kwargs['y'], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_equal(x[index], data[0]) assert_equal(y[index], data[1]) assert_equal(z[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_equal(x[index], data[0]) assert_equal(y[index], kwdata['y']) assert_equal(z[index], kwdata['z']) assert_equal(y[index], bs.y) assert_equal(z[index], bs.z)
def test_numpy(bs_setup): x, y, z = bs_setup.x, bs_setup.y, bs_setup.z bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_equal(y[index], data[0]) # Ensure no changes to original data assert_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_equal(y[index], kwdata["y"]) assert_equal(y[index], bs.y) # Ensure no changes to original data assert_equal(bs._kwargs["y"], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_equal(x[index], data[0]) assert_equal(y[index], data[1]) assert_equal(z[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_equal(x[index], data[0]) assert_equal(y[index], kwdata["y"]) assert_equal(z[index], kwdata["z"]) assert_equal(y[index], bs.y) assert_equal(z[index], bs.z)
def test_pandas(self): x, y, z = self.x_df, self.y_series, self.z_df bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_series_equal(y.iloc[index], data[0]) # Ensure no changes to original data assert_series_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_series_equal(y.iloc[index], kwdata['y']) assert_series_equal(y.iloc[index], bs.y) # Ensure no changes to original data assert_series_equal(bs._kwargs['y'], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], data[1]) assert_frame_equal(z.iloc[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], kwdata['y']) assert_frame_equal(z.iloc[index], kwdata['z']) assert_series_equal(y.iloc[index], bs.y) assert_frame_equal(z.iloc[index], bs.z)
def test_pandas(bs_setup): x, y, z = bs_setup.x_df, bs_setup.y_series, bs_setup.z_df bs = IIDBootstrap(y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(kwdata.keys()), 0) assert_series_equal(y.iloc[index], data[0]) # Ensure no changes to original data assert_series_equal(bs._args[0], y) bs = IIDBootstrap(y=y) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 0) assert_series_equal(y.iloc[index], kwdata["y"]) assert_series_equal(y.iloc[index], bs.y) # Ensure no changes to original data assert_series_equal(bs._kwargs["y"], y) bs = IIDBootstrap(x, y, z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 3) assert_equal(len(kwdata.keys()), 0) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], data[1]) assert_frame_equal(z.iloc[index], data[2]) bs = IIDBootstrap(x, y=y, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_frame_equal(x.iloc[index], data[0]) assert_series_equal(y.iloc[index], kwdata["y"]) assert_frame_equal(z.iloc[index], kwdata["z"]) assert_series_equal(y.iloc[index], bs.y) assert_frame_equal(z.iloc[index], bs.z)
def test_apply(bs_setup): bs = IIDBootstrap(bs_setup.x) bs.seed(23456) results = bs.apply(bs_setup.func, 1000) bs.reset(True) direct_results = [] for pos, _ in bs.bootstrap(1000): direct_results.append(bs_setup.func(*pos)) direct_results = np.array(direct_results) assert_equal(results, direct_results)
def test_apply(self): bs = IIDBootstrap(self.x) bs.seed(23456) results = bs.apply(self.func, 1000) bs.reset(23456) direct_results = [] for pos, _ in bs.bootstrap(1000): direct_results.append(self.func(*pos)) direct_results = np.array(direct_results) assert_equal(results, direct_results)
def test_cov(bs_setup): bs = IIDBootstrap(bs_setup.x) num_bootstrap = 10 cov = bs.cov(func=bs_setup.func, reps=num_bootstrap, recenter=False) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for data, _ in bs.bootstrap(num_bootstrap): results[count] = data[0].mean(axis=0) count += 1 errors = results - bs_setup.x.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) bs.reset() cov = bs.cov(func=bs_setup.func, recenter=True, reps=num_bootstrap) errors = results - results.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) bs = IIDBootstrap(bs_setup.x_df) cov = bs.cov(func=bs_setup.func, reps=num_bootstrap, recenter=False) bs.reset() var = bs.var(func=bs_setup.func, reps=num_bootstrap, recenter=False) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for data, _ in bs.bootstrap(num_bootstrap): results[count] = data[0].mean(axis=0) count += 1 errors = results - bs_setup.x.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) assert_allclose(var, np.diag(direct_cov)) bs.reset() cov = bs.cov(func=bs_setup.func, recenter=True, reps=num_bootstrap) errors = results - results.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov)
def test_cov(self): def func(y): return y.mean(axis=0) bs = IIDBootstrap(self.x) num_bootstrap = 10 cov = bs.cov(func=func, reps=num_bootstrap, recenter=False) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for data, kw in bs.bootstrap(num_bootstrap): results[count] = data[0].mean(axis=0) count += 1 errors = results - self.x.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) bs.reset() cov = bs.cov(func=func, recenter=True, reps=num_bootstrap) errors = results - results.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) bs = IIDBootstrap(self.x_df) cov = bs.cov(func=func, reps=num_bootstrap, recenter=False) bs.reset() results = np.zeros((num_bootstrap, 2)) count = 0 for data, kw in bs.bootstrap(num_bootstrap): results[count] = data[0].mean(axis=0) count += 1 errors = results - self.x.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov) bs.reset() cov = bs.cov(func=func, recenter=True, reps=num_bootstrap) errors = results - results.mean(axis=0) direct_cov = errors.T.dot(errors) / num_bootstrap assert_allclose(cov, direct_cov)
def test_apply_series(self): bs = IIDBootstrap(self.y_series) bs.seed(23456) results = bs.apply(self.func, 1000) bs.reset(23456) direct_results = [] for pos, _ in bs.bootstrap(1000): direct_results.append(self.func(*pos)) direct_results = np.array(direct_results) direct_results = direct_results[:, None] assert_equal(results, direct_results)
def test_mixed_types(self): x, y, z = self.x_df, self.y_series, self.z bs = IIDBootstrap(y, x=x, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_frame_equal(x.iloc[index], kwdata['x']) assert_frame_equal(x.iloc[index], bs.x) assert_series_equal(y.iloc[index], data[0]) assert_equal(z[index], kwdata['z']) assert_equal(z[index], bs.z)
def test_mixed_types(bs_setup): x, y, z = bs_setup.x_df, bs_setup.y_series, bs_setup.z bs = IIDBootstrap(y, x=x, z=z) bs.seed(23456) for data, kwdata in bs.bootstrap(10): index = bs.index assert_equal(len(data), 1) assert_equal(len(kwdata.keys()), 2) assert_frame_equal(x.iloc[index], kwdata["x"]) assert_frame_equal(x.iloc[index], bs.x) assert_series_equal(y.iloc[index], data[0]) assert_equal(z[index], kwdata["z"]) assert_equal(z[index], bs.z)
def test_apply(self): bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(0) results = bs.apply(func, 1000) bs.reset(23456) direct_results = [] for pos, kw in bs.bootstrap(1000): direct_results.append(func(*pos)) direct_results = np.array(direct_results) assert_equal(results, direct_results)
def test_apply_series(self): bs = IIDBootstrap(self.y_series) bs.seed(23456) def func(y): return y.mean(0) results = bs.apply(func, 1000) bs.reset(23456) direct_results = [] for pos, kw in bs.bootstrap(1000): direct_results.append(func(*pos)) direct_results = np.array(direct_results) direct_results = direct_results[:, None] assert_equal(results, direct_results)
def test_studentized(bs_setup): num_bootstrap = 20 bs = IIDBootstrap(bs_setup.x) bs.seed(23456) def std_err_func(mu, y): errors = y - mu var = (errors ** 2.0).mean(axis=0) return np.sqrt(var / y.shape[0]) ci = bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", std_err_func=std_err_func, ) bs.reset() base = bs_setup.func(bs_setup.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(reps=num_bootstrap): results[count] = bs_setup.func(*pos) std_err = std_err_func(results[count], *pos) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) bs.reset() ci = bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", studentize_reps=50 ) bs.reset() base = bs_setup.func(bs_setup.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, _ in bs.bootstrap(reps=num_bootstrap): results[count] = bs_setup.func(*pos) inner_bs = IIDBootstrap(*pos) seed = bs.random_state.randint(2 ** 31 - 1) inner_bs.seed(seed) cov = inner_bs.cov(bs_setup.func, reps=50) std_err = np.sqrt(np.diag(cov)) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile(stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") bs.conf_int( bs_setup.func, reps=num_bootstrap, method="studentized", std_err_func=std_err_func, reuse=True, ) assert_equal(len(w), 1)
def test_studentized(self): num_bootstrap = 20 bs = IIDBootstrap(self.x) bs.seed(23456) def func(y): return y.mean(axis=0) def std_err_func(mu, y): errors = y - mu var = (errors ** 2.0).mean(axis=0) return np.sqrt(var / y.shape[0]) ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', std_err_func=std_err_func) bs.reset() base = func(self.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kwdata in bs.bootstrap(reps=num_bootstrap): results[count] = func(*pos) std_err = std_err_func(results[count], *pos) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) bs.reset() ci = bs.conf_int(func, reps=num_bootstrap, method='studentized', studentize_reps=50) bs.reset() base = func(self.x) results = np.zeros((num_bootstrap, 2)) stud_results = np.zeros((num_bootstrap, 2)) count = 0 for pos, kwdata in bs.bootstrap(reps=num_bootstrap): results[count] = func(*pos) inner_bs = IIDBootstrap(*pos) seed = bs.random_state.randint(2 ** 31 - 1) inner_bs.seed(seed) cov = inner_bs.cov(func, reps=50) std_err = np.sqrt(np.diag(cov)) stud_results[count] = (results[count] - base) / std_err count += 1 assert_allclose(results, bs._results) assert_allclose(stud_results, bs._studentized_results) errors = results - results.mean(0) std_err = np.sqrt(np.mean(errors ** 2.0, axis=0)) ci_direct = np.zeros((2, 2)) for i in range(2): ci_direct[0, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 97.5) ci_direct[1, i] = base[i] - std_err[i] * np.percentile( stud_results[:, i], 2.5) assert_allclose(ci, ci_direct) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") bs.conf_int(func, reps=num_bootstrap, method='studentized', std_err_func=std_err_func, reuse=True) assert_equal(len(w), 1)