def mean_ci(data, alpha):
    '''
    Compute the bootstraped confidence intervals (to alpha%) of the mean of data in series
    Input:
        series: pandas Series of data
        alpha: numeric for percentile
    Ouptut:
        Dicitonary of mean, lower and upper bound.
    '''

    # Compute the mean of the Series
    mean = series.mean()
    # Obtain the values of the Sereis as an array
    array = series.values
    # Bootstrap the array (sample with replacement)
    bs = IIDBootstrap(array)
    # Compute confidence intervals of bootstrapped distribution
    ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha)
    # Lower and upper bounds
    lower = ci[0, 0]
    upper = ci[1, 0]

    # Output dictionary
    dict_out = {"Mean": mean, "Lower": lower, "Upper": upper}
    return dict_out
Example #2
0
def test_iid_args_kwargs(bs_setup):
    bs1 = IIDBootstrap(bs_setup.y)
    bs1.seed(0)
    bs2 = IIDBootstrap(y=bs_setup.y)
    bs2.seed(0)
    for a, b in zip(bs1.bootstrap(1), bs2.bootstrap(1)):
        assert np.all(a[0][0] == b[1]["y"])
Example #3
0
def mean_ci(data, alpha=0.95):
    '''
    Compute confidence intervals (to alpha%) of the mean of data.
    This is performed using bootstrapping.
    
    Args
    ----
    data: pd.Series
        Data provided as a Pandas Series
    alpha: float
        Confidence percentage. 
        
    Returns
    -------
    dict:
        Dicitonary of mean, lower and upper bound of data
    '''
        
    
    # Compute the mean of the Series
    mean = data.mean()
    # Obtain the values of the Series as an array
    array = data.values
    # Bootstrap the array (sample with replacement)
    bs = IIDBootstrap(array)
    # Compute confidence intervals of bootstrapped distribution
    ci = bs.conf_int(np.mean, 1000, method='percentile', size=alpha)
    # Lower and upper bounds
    lower = ci[0,0]
    upper = ci[1,0]
    
    # Output dictionary
    dict_out = {"Mean": mean, "Lower": lower, "Upper": upper}
    return dict_out
Example #4
0
def test_pass_random_state():
    x = np.arange(1000)
    rs = RandomState(0)
    IIDBootstrap(x, random_state=rs)

    with pytest.raises(TypeError):
        IIDBootstrap(x, random_state=0)
    def get_confidence_interval(scores,
                                ci_method='bca',
                                ci_size=0.95,
                                replications=100000,
                                seed_value=None):
        """
        Compute two sided bootstrap confidence interval
        """
        def score(x):
            return np.array([x.mean()])

        data = np.array(
            [float(score) for score in scores if not math.isnan(score)])
        if len(data) == 0:
            return {
                'size': ci_size,
                'lower': float('nan'),
                'upper': float('nan')
            }
        if max(data) - min(data) < 0.000001:
            return {'size': ci_size, 'lower': min(data), 'upper': max(data)}
        bs = IIDBootstrap(data)
        if seed_value is not None:
            bs.seed(seed_value)
        ci = bs.conf_int(score,
                         replications,
                         method=ci_method,
                         size=ci_size,
                         tail='two')
        return {'size': ci_size, 'lower': ci[0][0], 'upper': ci[1][0]}
Example #6
0
    def test_bca_against_bcajack(self):
        # import rpy2.rinterface as ri
        # import rpy2.robjects as robjects
        # import rpy2.robjects.numpy2ri
        # from rpy2.robjects.packages import importr
        # rpy2.robjects.numpy2ri.activate()
        # utils = importr('utils')
        # try:
        #     bcaboot = importr('bcaboot')
        # except Exception:
        #     utils.install_packages('bcaboot',
        #                            repos='http://cran.us.r-project.org')
        #     bcaboot = importr('bcaboot')

        rng_seed_obs = 42
        rs = np.random.RandomState(rng_seed_obs)
        observations = rs.multivariate_normal(mean=[8, 4],
                                              cov=np.identity(2),
                                              size=20)
        B = 2000
        rng_seed = 123
        rs = np.random.RandomState(rng_seed)
        arch_bs = IIDBootstrap(observations, random_state=rs)
        confidence_interval_size = 0.90

        def func(x):
            sample = x.mean(axis=0)
            return sample[1] / sample[0]

        arch_ci = arch_bs.conf_int(
            func=func,
            reps=B,
            size=confidence_interval_size,
            method='bca',
        )

        # # callable from R
        # @ri.rternalize
        # def func_r(x):
        #     x = np.asarray(x)
        #     _mean = x.mean(axis=0)
        #     return float(_mean[1] / _mean[0])
        # output = bcaboot.bcajack(x=observations, B=float(B), func=func_r)
        a = arch_bs._bca_acceleration(func)
        b = arch_bs._bca_bias()
        # bca_lims = np.array(output[1])[:, 0]
        # # bca confidence intervals for: 0.025, 0.05, 0.1, 0.16, 0.5,
        #                                 0.84, 0.9, 0.95, 0.975
        # bcajack_ci_90 = [bca_lims[1], bca_lims[-2]]
        # bcajack should estimate similar "a" using jackknife on
        # the same observations
        assert_allclose(a, -0.0004068984)
        # bcajack returns b (or z0) = -0.03635412, but based on
        # different bootstrap samples
        assert_allclose(b, 0.04764396)
        # bcajack_ci_90 = [0.42696, 0.53188]
        arch_ci = list(arch_ci[:, -1])
        saved_arch_ci_90 = [0.42719805360154717, 0.5336561953393736]
        assert_allclose(arch_ci, saved_arch_ci_90)
Example #7
0
 def test_pandas_integer_index(self):
     x = self.x
     x_int = self.x_df.copy()
     x_int.index = 10 + np.arange(x.shape[0])
     bs = IIDBootstrap(x, x_int)
     bs.seed(23456)
     for pdata, _ in bs.bootstrap(10):
         assert_equal(pdata[0], pdata[1].values)
Example #8
0
 def test_pandas_integer_index(self):
     x = self.x
     x_int = self.x_df.copy()
     x_int.index = 10 + np.arange(x.shape[0])
     bs = IIDBootstrap(x, x_int)
     bs.seed(23456)
     for pdata, kwdata in bs.bootstrap(10):
         assert_equal(pdata[0], pdata[1].values)
Example #9
0
def test_pandas_integer_index(bs_setup):
    x = bs_setup.x
    x_int = bs_setup.x_df.copy()
    x_int.index = 10 + np.arange(x.shape[0])
    bs = IIDBootstrap(x, x_int)
    bs.seed(23456)
    for pdata, _ in bs.bootstrap(10):
        assert_equal(pdata[0], np.asarray(pdata[1]))
Example #10
0
def test_studentization_error():
    def f(x):
        return np.array([x.mean(), 3])

    x = np.random.standard_normal(100)
    bs = IIDBootstrap(x)
    with pytest.raises(StudentizationError):
        bs.conf_int(f, 100, method="studentized")
Example #11
0
def test_list_input():
    # GH 315
    with pytest.raises(TypeError, match="Positional input 0 "):
        vals = np.random.standard_normal(25).tolist()
        IIDBootstrap(vals)
    with pytest.raises(TypeError, match="Input `data` "):
        vals = np.random.standard_normal(25).tolist()
        IIDBootstrap(data=vals)
Example #12
0
def test_conf_int_bca_scaler(bs_setup):
    num_bootstrap = 100
    bs = IIDBootstrap(bs_setup.y)
    bs.seed(23456)

    ci = bs.conf_int(np.mean, reps=num_bootstrap, method="bca")
    msg = ("conf_int(method='bca') scalar input regression. Ensure "
           "output is at least 1D with numpy.atleast_1d().")
    assert ci.shape == (2, 1), msg
Example #13
0
    def test_conf_int_bca_scaler(self):
        num_bootstrap = 100
        bs = IIDBootstrap(self.y)
        bs.seed(23456)

        ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca')
        msg = 'conf_int(method=\'bca\') scalar input regression. Ensure ' \
              'output is at least 1D with numpy.atleast_1d().'
        assert ci.shape == (2, 1), msg
Example #14
0
    def test_conf_int_bca_scaler(self):
        num_bootstrap = 100
        bs = IIDBootstrap(self.y)
        bs.seed(23456)

        ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca')
        msg = 'conf_int(method=\'bca\') scalar input regression. Ensure ' \
              'output is at least 1D with numpy.atleast_1d().'
        assert ci.shape == (2, 1), msg
Example #15
0
def test_iid_semiparametric(bs_setup):
    bs = IIDBootstrap(bs_setup.y)

    def func(y, axis=0, params=None):
        if params is not None:
            return (y - params).mean(axis=axis)
        return y.mean(axis=axis)

    ci = bs.conf_int(func, reps=10, sampling="semiparametric")
    assert ci.shape == (2, 1)
Example #16
0
def test_bca_extra_kwarg():
    # GH 366
    def f(a, b):
        return a.mean(0)

    x = np.random.standard_normal(1000)
    bs = IIDBootstrap(x)
    ci = bs.conf_int(f, extra_kwargs={"b": "anything"}, reps=100, method="bca")
    assert isinstance(ci, np.ndarray)
    assert ci.shape == (2, 1)
Example #17
0
    def test_str(self):
        bs = IIDBootstrap(self.y_series)
        expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>IID Bootstrap</strong>(' + \
                   '<strong>no. pos. inputs</strong>: 1, ' + \
                   '<strong>no. keyword inputs</strong>: 0, ' + \
                   '<strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = StationaryBootstrap(10, self.y_series, self.x_df)
        expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)

        bs = CircularBlockBootstrap(block_size=20,
                                    y=self.y_series,
                                    x=self.x_df)
        expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Circular Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)
Example #18
0
    def test_extra_kwargs(self):
        extra_kwargs = {'axis': 0}
        bs = IIDBootstrap(self.x)
        bs.seed(23456)
        num_bootstrap = 100

        bs.cov(self.func, reps=num_bootstrap, extra_kwargs=extra_kwargs)

        bs = IIDBootstrap(axis=self.x)
        bs.seed(23456)
        with pytest.raises(ValueError):
            bs.cov(self.func, reps=num_bootstrap, extra_kwargs=extra_kwargs)
Example #19
0
def test_iid_unequal_equiv():
    rs = RandomState(0)
    x = rs.randn(500)
    rs1 = RandomState(0)
    bs1 = IIDBootstrap(x, random_state=rs1)

    rs2 = RandomState(0)
    bs2 = IndependentSamplesBootstrap(x, random_state=rs2)

    v1 = bs1.var(np.mean)
    v2 = bs2.var(np.mean)
    assert_allclose(v1, v2)
Example #20
0
    def test_conf_int_bca_scaler(self):
        num_bootstrap = 100
        bs = IIDBootstrap(self.y)
        bs.seed(23456)

        try:
            ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca')
            assert(ci.shape == (2, 1))
        except IndexError:
            pytest.fail('conf_int(method=\'bca\') scaler input regression. '
                        'Ensure output is at least 1D with '
                        'numpy.atleast_1d().')
Example #21
0
def test_iid_unequal_equiv():
    rs = RandomState(0)
    x = rs.standard_normal(500)
    rs1 = RandomState(0)
    bs1 = IIDBootstrap(x, random_state=rs1)

    rs2 = RandomState(0)
    bs2 = IndependentSamplesBootstrap(x, random_state=rs2)

    v1 = bs1.var(np.mean)
    v2 = bs2.var(np.mean)
    assert_allclose(v1, v2)
Example #22
0
    def test_conf_int_bca_scaler(self):
        num_bootstrap = 100
        bs = IIDBootstrap(self.y)
        bs.seed(23456)

        try:
            ci = bs.conf_int(np.mean, reps=num_bootstrap, method='bca')
            assert (ci.shape == (2, 1))
        except IndexError:
            pytest.fail('conf_int(method=\'bca\') scalar input regression. '
                        'Ensure output is at least 1D with '
                        'numpy.atleast_1d().')
Example #23
0
 def test_mixed_types(self):
     x, y, z = self.x_df, self.y_series, self.z
     bs = IIDBootstrap(y, x=x, z=z)
     bs.seed(23456)
     for data, kwdata in bs.bootstrap(10):
         index = bs.index
         assert_equal(len(data), 1)
         assert_equal(len(kwdata.keys()), 2)
         assert_frame_equal(x.iloc[index], kwdata['x'])
         assert_frame_equal(x.iloc[index], bs.x)
         assert_series_equal(y.iloc[index], data[0])
         assert_equal(z[index], kwdata['z'])
         assert_equal(z[index], bs.z)
Example #24
0
def test_mixed_types(bs_setup):
    x, y, z = bs_setup.x_df, bs_setup.y_series, bs_setup.z
    bs = IIDBootstrap(y, x=x, z=z)
    bs.seed(23456)
    for data, kwdata in bs.bootstrap(10):
        index = bs.index
        assert_equal(len(data), 1)
        assert_equal(len(kwdata.keys()), 2)
        assert_frame_equal(x.iloc[index], kwdata["x"])
        assert_frame_equal(x.iloc[index], bs.x)
        assert_series_equal(y.iloc[index], data[0])
        assert_equal(z[index], kwdata["z"])
        assert_equal(z[index], bs.z)
Example #25
0
    def test_conf_int_bias_corrected(self):
        num_bootstrap = 20
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        def func(y):
            return y.mean(axis=0)

        ci = bs.conf_int(func, reps=num_bootstrap, method='bc')
        bs.reset()
        ci_db = bs.conf_int(func, reps=num_bootstrap, method='debiased')
        assert_equal(ci, ci_db)
        base, results = bs._base, bs._results
        p = np.zeros(2)
        p[0] = np.mean(results[:, 0] < base[0])
        p[1] = np.mean(results[:, 1] < base[1])
        b = stats.norm.ppf(p)
        q = stats.norm.ppf(np.array([0.025, 0.975]))
        q = q[:, None]
        percentiles = 100 * stats.norm.cdf(2 * b + q)

        ci = np.zeros((2, 2))
        for i in range(2):
            ci[i] = np.percentile(results[:, i], list(percentiles[:, i]))
        ci = ci.T
        assert_allclose(ci_db, ci)
Example #26
0
    def test_str(self):
        bs = IIDBootstrap(self.y_series)
        expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>IID Bootstrap</strong>(' + \
                   '<strong>no. pos. inputs</strong>: 1, ' + \
                   '<strong>no. keyword inputs</strong>: 0, ' + \
                   '<strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = StationaryBootstrap(10, self.y_series, self.x_df)
        expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)

        bs = CircularBlockBootstrap(block_size=20, y=self.y_series, x=self.x_df)
        expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Circular Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)
Example #27
0
    def test_errors(self):
        x = np.arange(10)
        y = np.arange(100)
        with pytest.raises(ValueError):
            IIDBootstrap(x, y)
        with pytest.raises(ValueError):
            IIDBootstrap(index=x)
        bs = IIDBootstrap(y)

        with pytest.raises(ValueError):
            bs.conf_int(self.func, method='unknown')
        with pytest.raises(ValueError):
            bs.conf_int(self.func, tail='dragon')
        with pytest.raises(ValueError):
            bs.conf_int(self.func, size=95)
Example #28
0
    def test_extra_kwargs(self):
        extra_kwargs = {'axis': 0}
        bs = IIDBootstrap(self.x)
        bs.seed(23456)
        num_bootstrap = 100

        def func(y, axis=0):
            return y.mean(axis=axis)

        bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs)

        bs = IIDBootstrap(axis=self.x)
        bs.seed(23456)
        with pytest.raises(ValueError):
            bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs)
Example #29
0
    def test_reuse(self):
        num_bootstrap = 100
        bs = IIDBootstrap(self.x)

        ci = bs.conf_int(self.func, reps=num_bootstrap)
        old_results = bs._results.copy()
        ci_reuse = bs.conf_int(self.func, reps=num_bootstrap, reuse=True)
        results = bs._results
        assert_equal(results, old_results)
        assert_equal(ci, ci_reuse)
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", RuntimeWarning)
            warnings.simplefilter("always")
            bs.conf_int(self.func, tail='lower', reps=num_bootstrap // 2, reuse=True)
            assert_equal(len(w), 1)
Example #30
0
def test_errors(bs_setup):
    x = np.arange(10)
    y = np.arange(100)
    with pytest.raises(ValueError):
        IIDBootstrap(x, y)
    with pytest.raises(ValueError):
        IIDBootstrap(index=x)
    bs = IIDBootstrap(y)

    with pytest.raises(ValueError):
        bs.conf_int(bs_setup.func, method="unknown")
    with pytest.raises(ValueError):
        bs.conf_int(bs_setup.func, tail="dragon")
    with pytest.raises(ValueError):
        bs.conf_int(bs_setup.func, size=95)
Example #31
0
def mean_confidence_interval_normed(data, metric, normalizer=None, bootstrap_reps=1000, norm='none', test_type='own', confidence=0.95): 
    '''
    norm can be st or iqr or range or none. 
    test_type: bs for BCa, t for t-stud, own for symmetric bootstrap, z for z-transform
    Normalizer should have all the data
    https://arch.readthedocs.io/en/latest/bootstrap/generated/generated/arch.bootstrap.IIDBootstrap.conf_int.html#arch.bootstrap.IIDBootstrap.conf_int
    '''
    if metric in ['css_ri', 'synergy_zip', 'synergy_bliss', 'synergy_hsa', 'synergy_loewe']:
        if norm in ['sd','iqr','range']:
            a = np.array(data)/normalizer[norm][metric]
            mean_val = np.mean(a)
        elif norm=='none':
            a = np.array(data)
            mean_val = np.mean(a)
        else:
            print('no norming info!')
            return
    if test_type == 't': # standard t-dist
        ci = _mean_confidence_interval(data=a, confidence=confidence)
    elif test_type == 'bs': # 
        # the idea is that we take mean to be as is, but we take its 95% CI bootstrapped
        n = len(a)
        # batch-correct and accelerated bootstrap. For now it defaults to 0.95. Fix by using partial 
        ci = IIDBootstrap(a).conf_int(_mean_confidence_interval, 
                                      reps=bootstrap_reps, 
                                      method='bca')
        ci = ci[1]
        mean_val = np.mean(a)
    elif test_type == 'own':
        '''(standard symmetrical bootstrap)'''
        mean_val, ci = _bootstrap(data=a, confidence=confidence, bootstrap_reps=bootstrap_reps)
    elif test_type == 'z':
        mean_val, ci = _pearsonr_ci(data=a, confidence=confidence)
        
    return round(mean_val, 4), round(float(ci), 4) # return upper bound
Example #32
0
def test_iid_unequal_equiv():
    rs = RandomState(0)
    x = rs.standard_normal(500)
    rs1 = RandomState(0)
    bs1 = IIDBootstrap(x, random_state=rs1)

    rs2 = RandomState(0)
    bs2 = IndependentSamplesBootstrap(x, random_state=rs2)

    v1 = bs1.var(np.mean)
    v2 = bs2.var(np.mean)
    assert_allclose(v1, v2)
    assert isinstance(bs2.index, tuple)
    assert isinstance(bs2.index[0], list)
    assert isinstance(bs2.index[0][0], np.ndarray)
    assert bs2.index[0][0].shape == x.shape
Example #33
0
    def test_conf_int_bias_corrected(self):
        num_bootstrap = 20
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        def func(y):
            return y.mean(axis=0)

        ci = bs.conf_int(func, reps=num_bootstrap, method='bc')
        bs.reset()
        ci_db = bs.conf_int(func, reps=num_bootstrap, method='debiased')
        assert_equal(ci, ci_db)
        base, results = bs._base, bs._results
        p = np.zeros(2)
        p[0] = np.mean(results[:, 0] < base[0])
        p[1] = np.mean(results[:, 1] < base[1])
        b = stats.norm.ppf(p)
        q = stats.norm.ppf(np.array([0.025, 0.975]))
        q = q[:, None]
        percentiles = 100 * stats.norm.cdf(2 * b + q)

        ci = np.zeros((2, 2))
        for i in range(2):
            ci[i] = np.percentile(results[:, i], list(percentiles[:, i]))
        ci = ci.T
        assert_allclose(ci_db, ci)
Example #34
0
    def test_bca(self):
        num_bootstrap = 20
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        ci_direct = bs.conf_int(self.func, reps=num_bootstrap, method='bca')
        bs.reset()
        base, results = bs._base, bs._results
        p = np.zeros(2)
        p[0] = np.mean(results[:, 0] < base[0])
        p[1] = np.mean(results[:, 1] < base[1])
        b = stats.norm.ppf(p)
        b = b[:, None]
        q = stats.norm.ppf(np.array([0.025, 0.975]))

        base = self.func(self.x)
        nobs = self.x.shape[0]
        jk = _loo_jackknife(self.func, nobs, [self.x], {})
        u = jk.mean() - jk
        u2 = np.sum(u * u, 0)
        u3 = np.sum(u * u * u, 0)
        a = u3 / (6.0 * (u2**1.5))
        a = a[:, None]
        percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q)))

        ci = np.zeros((2, 2))
        for i in range(2):
            ci[i] = np.percentile(results[:, i], list(percentiles[i]))
        ci = ci.T
        assert_allclose(ci_direct, ci)
Example #35
0
    def test_extra_kwargs(self):
        extra_kwargs = {'axis': 0}
        bs = IIDBootstrap(self.x)
        bs.seed(23456)
        num_bootstrap = 100

        def func(y, axis=0):
            return y.mean(axis=axis)

        bs.cov(func, reps=num_bootstrap, extra_kwargs=extra_kwargs)

        bs = IIDBootstrap(axis=self.x)
        bs.seed(23456)
        assert_raises(ValueError,
                      bs.cov,
                      func,
                      reps=num_bootstrap,
                      extra_kwargs=extra_kwargs)
Example #36
0
 def test_reset(self):
     bs = IIDBootstrap(np.arange(100))
     state = bs.get_state()
     for data, _ in bs.bootstrap(10):
         final = data[0]
     bs.reset()
     state_reset = bs.get_state()
     for data, _ in bs.bootstrap(10):
         final_reset = data[0]
     assert_equal(final, final_reset)
     assert_equal(state, state_reset)
Example #37
0
def test_apply(bs_setup):
    bs = IIDBootstrap(bs_setup.x)
    bs.seed(23456)

    results = bs.apply(bs_setup.func, 1000)
    bs.reset(True)
    direct_results = []
    for pos, _ in bs.bootstrap(1000):
        direct_results.append(bs_setup.func(*pos))
    direct_results = np.array(direct_results)
    assert_equal(results, direct_results)
Example #38
0
    def test_apply_series(self):
        bs = IIDBootstrap(self.y_series)
        bs.seed(23456)

        results = bs.apply(self.func, 1000)
        bs.reset(23456)
        direct_results = []
        for pos, _ in bs.bootstrap(1000):
            direct_results.append(self.func(*pos))
        direct_results = np.array(direct_results)
        direct_results = direct_results[:, None]
        assert_equal(results, direct_results)
Example #39
0
    def test_apply(self):
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        def func(y):
            return y.mean(0)

        results = bs.apply(func, 1000)
        bs.reset(23456)
        direct_results = []
        for pos, kw in bs.bootstrap(1000):
            direct_results.append(func(*pos))
        direct_results = np.array(direct_results)
        assert_equal(results, direct_results)
Example #40
0
    def test_bca(self):
        num_bootstrap = 20
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        def func(y):
            return y.mean(axis=0)

        ci_direct = bs.conf_int(func, reps=num_bootstrap, method='bca')
        bs.reset()
        base, results = bs._base, bs._results
        p = np.zeros(2)
        p[0] = np.mean(results[:, 0] < base[0])
        p[1] = np.mean(results[:, 1] < base[1])
        b = stats.norm.ppf(p)
        b = b[:, None]
        q = stats.norm.ppf(np.array([0.025, 0.975]))

        base = func(self.x)
        nobs = self.x.shape[0]
        jk = _loo_jackknife(func, nobs, [self.x], {})
        u = (nobs - 1) * (jk - base)
        u2 = np.sum(u * u, 0)
        u3 = np.sum(u * u * u, 0)
        a = u3 / (6.0 * (u2 ** 1.5))
        a = a[:, None]
        percentiles = 100 * stats.norm.cdf(b + (b + q) / (1 - a * (b + q)))

        ci = np.zeros((2, 2))
        for i in range(2):
            ci[i] = np.percentile(results[:, i], list(percentiles[i]))
        ci = ci.T
        assert_allclose(ci_direct, ci)
Example #41
0
 def test_reset(self):
     bs = IIDBootstrap(np.arange(100))
     state = bs.get_state()
     for data, kwdata in bs.bootstrap(10):
         final = data[0]
     bs.reset()
     state_reset = bs.get_state()
     for data, kwdata in bs.bootstrap(10):
         final_reset = data[0]
     assert_equal(final, final_reset)
     assert_equal(state, state_reset)
Example #42
0
    def test_apply(self):
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        results = bs.apply(self.func, 1000)
        bs.reset(23456)
        direct_results = []
        for pos, _ in bs.bootstrap(1000):
            direct_results.append(self.func(*pos))
        direct_results = np.array(direct_results)
        assert_equal(results, direct_results)
Example #43
0
    def test_errors(self):
        x = np.arange(10)
        y = np.arange(100)
        assert_raises(ValueError, IIDBootstrap, x, y)
        assert_raises(ValueError, IIDBootstrap, index=x)
        bs = IIDBootstrap(y)

        def func(y):
            return y.mean(axis=0)

        assert_raises(ValueError, bs.conf_int, func, method='unknown')
        assert_raises(ValueError, bs.conf_int, func, tail='dragon')
        assert_raises(ValueError, bs.conf_int, func, size=95)
Example #44
0
    def test_errors(self):
        x = np.arange(10)
        y = np.arange(100)
        with pytest.raises(ValueError):
            IIDBootstrap(x, y)
        with pytest.raises(ValueError):
            IIDBootstrap(index=x)
        bs = IIDBootstrap(y)

        with pytest.raises(ValueError):
            bs.conf_int(self.func, method='unknown')
        with pytest.raises(ValueError):
            bs.conf_int(self.func, tail='dragon')
        with pytest.raises(ValueError):
            bs.conf_int(self.func, size=95)
Example #45
0
    def test_apply_series(self):
        bs = IIDBootstrap(self.y_series)
        bs.seed(23456)

        def func(y):
            return y.mean(0)

        results = bs.apply(func, 1000)
        bs.reset(23456)
        direct_results = []
        for pos, kw in bs.bootstrap(1000):
            direct_results.append(func(*pos))
        direct_results = np.array(direct_results)
        direct_results = direct_results[:, None]
        assert_equal(results, direct_results)
Example #46
0
    def test_conf_int_parametric(self):
        def param_func(x, params=None, state=None):
            if state is not None:
                mu = params
                e = state.standard_normal(x.shape)
                return (mu + e).mean(0)
            else:
                return x.mean(0)

        def semi_func(x, params=None):
            if params is not None:
                mu = params
                e = x - mu
                return (mu + e).mean(0)
            else:
                return x.mean(0)

        reps = 100
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        ci = bs.conf_int(func=param_func, reps=reps, sampling='parametric')
        bs.reset()
        results = np.zeros((reps, 2))
        count = 0
        mu = self.x.mean(0)
        for pos, kw in bs.bootstrap(100):
            results[count] = param_func(*pos, params=mu,
                                        state=bs.random_state)
            count += 1
        assert_equal(bs._results, results)

        bs.reset()
        ci = bs.conf_int(func=semi_func, reps=100, sampling='semi')
        bs.reset()
        results = np.zeros((reps, 2))
        count = 0
        for pos, kw in bs.bootstrap(100):
            results[count] = semi_func(*pos, params=mu)
            count += 1
        assert_allclose(bs._results, results)
Example #47
0
    def test_conf_int_basic(self):
        num_bootstrap = 200
        bs = IIDBootstrap(self.x)

        ci = bs.conf_int(self.func, reps=num_bootstrap, size=0.90, method='basic')
        bs.reset()
        ci_u = bs.conf_int(self.func, tail='upper', reps=num_bootstrap, size=0.95,
                           method='basic')
        bs.reset()
        ci_l = bs.conf_int(self.func, tail='lower', reps=num_bootstrap, size=0.95,
                           method='basic')
        bs.reset()
        results = np.zeros((num_bootstrap, 2))
        count = 0
        for pos, _ in bs.bootstrap(num_bootstrap):
            results[count] = self.func(*pos)
            count += 1
        mu = self.func(self.x)
        upper = mu + (mu - np.percentile(results, 5, axis=0))
        lower = mu + (mu - np.percentile(results, 95, axis=0))

        assert_allclose(lower, ci[0, :])
        assert_allclose(upper, ci[1, :])

        assert_allclose(ci[1, :], ci_u[1, :])
        assert_allclose(ci[0, :], ci_l[0, :])
        inf = np.empty_like(ci_l[0, :])
        inf.fill(np.inf)
        assert_equal(inf, ci_l[1, :])
        assert_equal(-1 * inf, ci_u[0, :])
Example #48
0
    def test_studentized(self):
        num_bootstrap = 20
        bs = IIDBootstrap(self.x)
        bs.seed(23456)

        def func(y):
            return y.mean(axis=0)

        def std_err_func(mu, y):
            errors = y - mu
            var = (errors ** 2.0).mean(axis=0)
            return np.sqrt(var / y.shape[0])

        ci = bs.conf_int(func, reps=num_bootstrap, method='studentized',
                         std_err_func=std_err_func)
        bs.reset()
        base = func(self.x)
        results = np.zeros((num_bootstrap, 2))
        stud_results = np.zeros((num_bootstrap, 2))
        count = 0
        for pos, kwdata in bs.bootstrap(reps=num_bootstrap):
            results[count] = func(*pos)
            std_err = std_err_func(results[count], *pos)
            stud_results[count] = (results[count] - base) / std_err
            count += 1

        assert_allclose(results, bs._results)
        assert_allclose(stud_results, bs._studentized_results)
        errors = results - results.mean(0)
        std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))
        ci_direct = np.zeros((2, 2))
        for i in range(2):
            ci_direct[0, i] = base[i] - std_err[i] * np.percentile(
                stud_results[:, i], 97.5)
            ci_direct[1, i] = base[i] - std_err[i] * np.percentile(
                stud_results[:, i], 2.5)
        assert_allclose(ci, ci_direct)

        bs.reset()
        ci = bs.conf_int(func, reps=num_bootstrap, method='studentized',
                         studentize_reps=50)

        bs.reset()
        base = func(self.x)
        results = np.zeros((num_bootstrap, 2))
        stud_results = np.zeros((num_bootstrap, 2))
        count = 0
        for pos, kwdata in bs.bootstrap(reps=num_bootstrap):
            results[count] = func(*pos)
            inner_bs = IIDBootstrap(*pos)
            seed = bs.random_state.randint(2 ** 31 - 1)
            inner_bs.seed(seed)
            cov = inner_bs.cov(func, reps=50)
            std_err = np.sqrt(np.diag(cov))
            stud_results[count] = (results[count] - base) / std_err
            count += 1

        assert_allclose(results, bs._results)
        assert_allclose(stud_results, bs._studentized_results)
        errors = results - results.mean(0)
        std_err = np.sqrt(np.mean(errors ** 2.0, axis=0))

        ci_direct = np.zeros((2, 2))
        for i in range(2):
            ci_direct[0, i] = base[i] - std_err[i] * np.percentile(
                stud_results[:, i], 97.5)
            ci_direct[1, i] = base[i] - std_err[i] * np.percentile(
                stud_results[:, i], 2.5)
        assert_allclose(ci, ci_direct)

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            bs.conf_int(func, reps=num_bootstrap, method='studentized',
                        std_err_func=std_err_func, reuse=True)
            assert_equal(len(w), 1)
Example #49
0
    def test_conf_int_norm(self):
        num_bootstrap = 200
        bs = IIDBootstrap(self.x)

        def func(y):
            return y.mean(axis=0)

        ci = bs.conf_int(func, reps=num_bootstrap, size=0.90,
                         method='norm')
        bs.reset()
        ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95,
                           method='var')
        bs.reset()
        ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95,
                           method='cov')
        bs.reset()
        cov = bs.cov(func, reps=num_bootstrap)
        mu = func(self.x)
        std_err = np.sqrt(np.diag(cov))
        upper = mu + stats.norm.ppf(0.95) * std_err
        lower = mu + stats.norm.ppf(0.05) * std_err
        assert_allclose(lower, ci[0, :])
        assert_allclose(upper, ci[1, :])

        assert_allclose(ci[1, :], ci_u[1, :])
        assert_allclose(ci[0, :], ci_l[0, :])
        inf = np.empty_like(ci_l[0, :])
        inf.fill(np.inf)
        assert_equal(inf, ci_l[1, :])
        assert_equal(-1 * inf, ci_u[0, :])
Example #50
0
    def test_conf_int_percentile(self):
        num_bootstrap = 200
        bs = IIDBootstrap(self.x)

        def func(y):
            return y.mean(axis=0)

        ci = bs.conf_int(func, reps=num_bootstrap, size=0.90,
                         method='percentile')
        bs.reset()
        ci_u = bs.conf_int(func, tail='upper', reps=num_bootstrap, size=0.95,
                           method='percentile')
        bs.reset()
        ci_l = bs.conf_int(func, tail='lower', reps=num_bootstrap, size=0.95,
                           method='percentile')
        bs.reset()
        results = np.zeros((num_bootstrap, 2))
        count = 0
        for pos, kw in bs.bootstrap(num_bootstrap):
            results[count] = func(*pos)
            count += 1

        upper = np.percentile(results, 95, axis=0)
        lower = np.percentile(results, 5, axis=0)

        assert_allclose(lower, ci[0, :])
        assert_allclose(upper, ci[1, :])

        assert_allclose(ci[1, :], ci_u[1, :])
        assert_allclose(ci[0, :], ci_l[0, :])
        inf = np.empty_like(ci_l[0, :])
        inf.fill(np.inf)
        assert_equal(inf, ci_l[1, :])
        assert_equal(-1 * inf, ci_u[0, :])
Example #51
0
 def test_state(self):
     bs = IIDBootstrap(np.arange(100))
     bs.seed(23456)
     state = bs.get_state()
     for data, kwdata in bs.bootstrap(10):
         final = data[0]
     bs.seed(23456)
     for data, kwdata in bs.bootstrap(10):
         final_seed = data[0]
     bs.set_state(state)
     for data, kwdata in bs.bootstrap(10):
         final_state = data[0]
     assert_equal(final, final_seed)
     assert_equal(final, final_state)
Example #52
0
    def test_cov(self):
        def func(y):
            return y.mean(axis=0)

        bs = IIDBootstrap(self.x)
        num_bootstrap = 10
        cov = bs.cov(func=func, reps=num_bootstrap, recenter=False)
        bs.reset()

        results = np.zeros((num_bootstrap, 2))
        count = 0
        for data, kw in bs.bootstrap(num_bootstrap):
            results[count] = data[0].mean(axis=0)
            count += 1
        errors = results - self.x.mean(axis=0)
        direct_cov = errors.T.dot(errors) / num_bootstrap
        assert_allclose(cov, direct_cov)

        bs.reset()
        cov = bs.cov(func=func, recenter=True, reps=num_bootstrap)
        errors = results - results.mean(axis=0)
        direct_cov = errors.T.dot(errors) / num_bootstrap
        assert_allclose(cov, direct_cov)

        bs = IIDBootstrap(self.x_df)
        cov = bs.cov(func=func, reps=num_bootstrap, recenter=False)
        bs.reset()
        results = np.zeros((num_bootstrap, 2))
        count = 0
        for data, kw in bs.bootstrap(num_bootstrap):
            results[count] = data[0].mean(axis=0)
            count += 1
        errors = results - self.x.mean(axis=0)
        direct_cov = errors.T.dot(errors) / num_bootstrap
        assert_allclose(cov, direct_cov)

        bs.reset()
        cov = bs.cov(func=func, recenter=True, reps=num_bootstrap)
        errors = results - results.mean(axis=0)
        direct_cov = errors.T.dot(errors) / num_bootstrap
        assert_allclose(cov, direct_cov)
Example #53
0
    def test_pandas(self):
        x, y, z = self.x_df, self.y_series, self.z_df
        bs = IIDBootstrap(y)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(kwdata.keys()), 0)
            assert_series_equal(y.iloc[index], data[0])
        # Ensure no changes to original data
        assert_series_equal(bs._args[0], y)

        bs = IIDBootstrap(y=y)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 0)
            assert_series_equal(y.iloc[index], kwdata['y'])
            assert_series_equal(y.iloc[index], bs.y)
        # Ensure no changes to original data
        assert_series_equal(bs._kwargs['y'], y)

        bs = IIDBootstrap(x, y, z)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 3)
            assert_equal(len(kwdata.keys()), 0)
            assert_frame_equal(x.iloc[index], data[0])
            assert_series_equal(y.iloc[index], data[1])
            assert_frame_equal(z.iloc[index], data[2])

        bs = IIDBootstrap(x, y=y, z=z)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 1)
            assert_equal(len(kwdata.keys()), 2)
            assert_frame_equal(x.iloc[index], data[0])
            assert_series_equal(y.iloc[index], kwdata['y'])
            assert_frame_equal(z.iloc[index], kwdata['z'])
            assert_series_equal(y.iloc[index], bs.y)
            assert_frame_equal(z.iloc[index], bs.z)
Example #54
0
    def test_numpy(self):
        x, y, z = self.x, self.y, self.z
        bs = IIDBootstrap(y)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(kwdata.keys()), 0)
            assert_equal(y[index], data[0])
        # Ensure no changes to original data
        assert_equal(bs._args[0], y)

        bs = IIDBootstrap(y=y)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 0)
            assert_equal(y[index], kwdata['y'])
            assert_equal(y[index], bs.y)
        # Ensure no changes to original data
        assert_equal(bs._kwargs['y'], y)

        bs = IIDBootstrap(x, y, z)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 3)
            assert_equal(len(kwdata.keys()), 0)
            assert_equal(x[index], data[0])
            assert_equal(y[index], data[1])
            assert_equal(z[index], data[2])

        bs = IIDBootstrap(x, y=y, z=z)
        bs.seed(23456)
        for data, kwdata in bs.bootstrap(10):
            index = bs.index
            assert_equal(len(data), 1)
            assert_equal(len(kwdata.keys()), 2)
            assert_equal(x[index], data[0])
            assert_equal(y[index], kwdata['y'])
            assert_equal(z[index], kwdata['z'])
            assert_equal(y[index], bs.y)
            assert_equal(z[index], bs.z)