Example #1
0
    def _bootstrap(self, block_size):
        """
        Description
        ----
        Stationary bootstrap for different block sizes

        Inputs
        ----
        :block_size: Number of points per block

        Outputs
        ----
        List containing mean, standard error
        """

        bs = StationaryBootstrap(block_size, self._data)

        try:
            bs_results = eval('bs.apply(' + self._custom_func +
                              ', nbootstrap)')
        except TypeError:
            bs_results = bs.apply(np.mean, self._nbootstrap)

        mn = np.mean(bs_results, axis=0)
        se = np.std(bs_results, ddof=1, axis=0)

        return list(np.hstack((mn, se)))
def bootstrap(block_size, data, nbootstrap, sig_level=0.05, custom_func=None):
    """
    Description
    ----
    Stationary bootstrap for different block sizes

    Inputs
    ----
    :block_size: Number of points per block
    :nbootstrap: Number of bootstrap samples
    :data: Data to find confidence intervals for

    Outputs
    ----
    List containing mean, standard error
    """

    bs = StationaryBootstrap(block_size, data)

    try:
        bs_results = eval('bs.apply(' + custom_func + ', nbootstrap)')
    except TypeError:
        bs_results = bs.apply(np.mean, nbootstrap)

    mn = np.mean(bs_results, axis=0)
    se = np.std(bs_results, ddof=1, axis=0)

    return list(np.hstack((mn, se)))
def block_bootstrap(series, n_samples, bs_type='Stationary', block_size=10):
    '''
    Computes bootstrapped samples of series.
    
    Inputs:
        series: pandas Series indexed by time
        n_samples: # bootstrapped samples to output
        bs_type ('Stationary'): type of bootstrapping to perform.
            Options include ['Stationary', 'Circular']
        block_size: # size of resampling blocks. Should be big enough to
            capture important frequencies in the series
            
    Ouput:
        DataFrame indexed by sample number and time
        
    
    '''

    # Set up list for sampled time-series
    list_samples = []

    # Stationary bootstrapping
    if bs_type == 'Stationary':
        bs = StationaryBootstrap(block_size, series)

        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):

            df_temp = pd.DataFrame({
                'sample': count,
                'time': series.index.values,
                'x': data[0][0]
            })
            list_samples.append(df_temp)
            count += 1

    if bs_type == 'Circular':
        bs = CircularBlockBootstrap(block_size, series)

        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):

            df_temp = pd.DataFrame({
                'sample': count,
                'time': series.index.values,
                'x': data[0][0]
            })
            list_samples.append(df_temp)
            count += 1

    # Concatenate list of samples
    df_samples = pd.concat(list_samples)
    df_samples.set_index(['sample', 'time'], inplace=True)

    # Output DataFrame of samples
    return df_samples
Example #4
0
def stationary_boostrap_method(X, Y, block_size=50, n_samples=50):

    boot_samples = []
    bs = StationaryBootstrap(block_size, X, y=Y)

    for samp in bs.bootstrap(n_samples):
        boot_samples.append((samp[0][0], samp[1]['y']))

    return boot_samples
Example #5
0
    def test_str(self):
        bs = IIDBootstrap(self.y_series)
        expected = 'IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>IID Bootstrap</strong>(' + \
                   '<strong>no. pos. inputs</strong>: 1, ' + \
                   '<strong>no. keyword inputs</strong>: 0, ' + \
                   '<strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)

        bs = StationaryBootstrap(10, self.y_series, self.x_df)
        expected = 'Stationary Bootstrap(block size: 10, no. pos. inputs: 2, no. keyword inputs: 0)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)

        bs = CircularBlockBootstrap(block_size=20,
                                    y=self.y_series,
                                    x=self.x_df)
        expected = 'Circular Block Bootstrap(block size: 20, no. pos. inputs: 0, no. keyword inputs: 2)'
        assert_equal(str(bs), expected)
        expected = expected[:-1] + ', ID: ' + hex(id(bs)) + ')'
        assert_equal(bs.__repr__(), expected)
        expected = '<strong>Circular Block Bootstrap</strong>' + \
                   '(<strong>block size</strong>: 20, ' \
                   + '<strong>no. pos. inputs</strong>: 0, ' + \
                   '<strong>no. keyword inputs</strong>: 2,' + \
                   ' <strong>ID</strong>: ' + hex(id(bs)) + ')'
        assert_equal(bs._repr_html(), expected)
Example #6
0
    def test_smoke(self):
        num_bootstrap = 20

        def func(y):
            return y.mean(axis=0)

        bs = StationaryBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
Example #7
0
def get_TheilSen(_x, what, _nboot, _y):
    import numpy as np
    import pandas as pd
    #the x y are weird, it appears that apply passes the dataframe column as last element
    from arch.bootstrap import StationaryBootstrap, IIDBootstrap
    from scipy.stats import mstats, mannwhitneyu, t, kendalltau
    from statsmodels.distributions.empirical_distribution import ECDF
    try:
        if what=="slope":
            return mstats.theilslopes(np.ma.masked_invalid(_y.values), _x)[0]*86400*365*1000000000
        elif what=="pval_tau":
            return kendalltau(_x, _y)[1]/2
        elif what=="pval_autocorr":            
            res0=mstats.theilslopes(_y, _x, alpha=0.95)[0]
            bs=StationaryBootstrap(3, np.array(range(len(_y))))
            bs_slopes=[]
            for data in bs.bootstrap(_nboot):
                ind=data[0][0]
                res=mstats.theilslopes(_y[ind], _x, alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(res0)
            if pvalue>0.5:
                pvalue=1-pvalue
#            print pvalue
            return pvalue
        elif what=="pval":
            bs=IIDBootstrap(np.array(range(len(_y))))
            bs_slopes=[]
            for data in bs.bootstrap(_nboot):
                ind=data[0][0]
                res=mstats.theilslopes(_y[ind], _x, alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(0)
            if pvalue>0.5:
                pvalue=1-pvalue
#            print pvalue
            return pvalue
    except:
        return np.nan
Example #8
0
    def fit(self, df_portfolios, df_factors):
        """ Fit the estimator

        Parameters
        -----------
        df_portfolios : DataFrame
            Time series of portfolios (test assets)

        df_factors : DataFrame or Series
            Time series of the factors
        """
        tsres, _, loadings = _fmb(df_portfolios, df_factors, self.intercept)
        self._tsres = tsres
        self.loadings = loadings

        if self.alpha is None:
            self.alpha = _get_alpha(self._tsres)

        self._xsres = _penfmb(loadings, self.alpha, self.d, self.tol,
                              self.maxiter)
        self._xsres.name = 'coef'

        sbs = StationaryBootstrap(self.block_length, df_portfolios, df_factors)

        bsxsres = []
        for data in sbs.bootstrap(self.nboot):
            tsres, _, bloadings = _fmb(data[0][0].reset_index(drop=True),
                                       data[0][1].reset_index(drop=True),
                                       self.intercept)
            bsxsres.append(
                _penfmb(bloadings, _get_alpha(tsres), self.d, self.tol,
                        self.maxiter))

        bsxsres = pd.DataFrame(bsxsres)
        self._srate = 1.0 * (bsxsres == 0).sum(axis=0) / bsxsres.shape[0]
        self._srate.name = 'shrinkage rate'
        # self._se = bsxsres.std(axis=0)
        # self._se.name = 'standard error'

        return self
Example #9
0
def test_str(bs_setup):
    bs = IIDBootstrap(bs_setup.y_series)
    expected = "IID Bootstrap(no. pos. inputs: 1, no. keyword inputs: 0)"
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>IID Bootstrap</strong>(" +
                "<strong>no. pos. inputs</strong>: 1, " +
                "<strong>no. keyword inputs</strong>: 0, " +
                "<strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = StationaryBootstrap(10, bs_setup.y_series, bs_setup.x_df)
    expected = ("Stationary Bootstrap(block size: 10, no. pos. "
                "inputs: 2, no. keyword inputs: 0)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)

    bs = CircularBlockBootstrap(block_size=20,
                                y=bs_setup.y_series,
                                x=bs_setup.x_df)
    expected = ("Circular Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Circular Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)

    bs = MovingBlockBootstrap(block_size=20,
                              y=bs_setup.y_series,
                              x=bs_setup.x_df)
    expected = ("Moving Block Bootstrap(block size: 20, no. pos. "
                "inputs: 0, no. keyword inputs: 2)")
    assert_equal(str(bs), expected)
    expected = expected[:-1] + ", ID: " + hex(id(bs)) + ")"
    assert_equal(bs.__repr__(), expected)
    expected = ("<strong>Moving Block Bootstrap</strong>" +
                "(<strong>block size</strong>: 20, " +
                "<strong>no. pos. inputs</strong>: 0, " +
                "<strong>no. keyword inputs</strong>: 2," +
                " <strong>ID</strong>: " + hex(id(bs)) + ")")
    assert_equal(bs._repr_html(), expected)
def calibrate(volume_duration, strikes, reps):
    volume_duration = volume_duration.unstack(['Half-spread', 'Strike'])

    arrival_rate = volume_duration.groupby('Class').apply(
        lambda c: compute_arrival_rate(c.loc[c.name, 'Volume'], c.loc[
            c.name, 'Duration'], strikes[c.name]))
    arrival_rate.name = 'Arrival rate'
    arrival_rate.index = arrival_rate.index.reorder_levels(
        ['Class', 'Strike', 'Half-spread'])

    sbs = volume_duration.groupby('Class').apply(lambda c: StationaryBootstrap(
        25, volume=c.loc[c.name, 'Volume'], duration=c.loc[c.name, 'Duration'])
                                                 )

    conf_int = sbs.groupby('Class').apply(lambda c: pd.DataFrame(
        c[c.name].conf_int(lambda volume, duration: compute_arrival_rate(
            volume, duration, strikes[c.name]),
                           reps=reps), ['2.5%', '97.5%'], arrival_rate.loc[
                               c.name].index))
    conf_int = conf_int.T.stack('Class')
    conf_int.index = conf_int.index.reorder_levels(
        ['Class', 'Strike', 'Half-spread'])

    sigma = sbs.groupby('Class').apply(lambda c: pd.DataFrame(
        c[c.name].cov(lambda volume, duration: compute_arrival_rate(
            volume, duration, strikes[c.name]),
                      reps=reps), arrival_rate.loc[c.name].index, arrival_rate.
        loc[c.name].index))
    sigma = sigma.groupby('Strike').apply(
        lambda k: k.xs(k.name, level='Strike', axis=1))
    sigma.dropna(how='all', inplace=True)
    gls = arrival_rate.loc[sigma.index].groupby(
        ['Class', 'Strike']).apply(lambda k: sm.GLS(
            k.values,
            sm.add_constant(k.index.get_level_values('Half-spread')),
            sigma=sigma.xs(k.name, level=['Class', 'Strike']).dropna(axis=1)).
                                   fit())
    params = gls.apply(lambda g: pd.Series([np.exp(g.params[0]), -g.params[1]],
                                           ['A', '$\\kappa$']))
    base_conf_int = gls.apply(lambda g: pd.Series(
        np.exp(g.conf_int(alpha=.1)[0]), ['A 5%', 'A 95%']))
    decay_conf_int = gls.apply(lambda g: pd.Series(
        -g.conf_int(alpha=.1)[1, ::-1], ['$\\kappa$ 5%', '$\\kappa$ 95%']))
    params = pd.concat([params, base_conf_int, decay_conf_int], axis=1)

    arrival_rate = np.exp(pd.concat([arrival_rate, conf_int], axis=1))
    return arrival_rate, params
Example #11
0
    def sb_bootstrap(self):
        """
        return paths simulated using the stationary  bootstrap 

        params:
        -------

            - self: see above

        return:
        -------

            - none
        """

        print("\nSTATIONARY BOOTSTRAP \n")
        bs = StationaryBootstrap(self.blocksize, self.data)
        out_sbb = boot(N_paths=self.n_paths,
                       method=bs,
                       obs_path=self.data,
                       add_noise=self.add_noise)
        if self.store_sim:
            self.simulated_paths['SB'] = out_sbb.iloc[:, :out_sbb.
                                                      shape[1] if out_sbb.
                                                      shape[1] < 100 else 100]

        self.store_output = investment_horizons(
            observed_path=self.data,
            sims=out_sbb,
            investment_horizons=self.ih,
            freq=self.frequency,
            sum_stats=self.stats,
            perf_functions=self.perf_functions,
            store_output_dic=self.store_output,
            simulation_tech='SB',
            plotting=self.plotting)

        return None
Example #12
0
    def test_smoke(self):
        num_bootstrap = 20

        def func(y):
            return y.mean(axis=0)

        bs = StationaryBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(13, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = MovingBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
        bs = CircularBlockBootstrap(10, self.y)
        cov = bs.cov(func, reps=num_bootstrap)
Example #13
0
def bootstrap(x, f):
    if len(x) == 0:
        return np.nan
    from arch.bootstrap import StationaryBootstrap
    bs = StationaryBootstrap(50, x)
    return bs.apply(f,100).mean()
Example #14
0
def bootstrap(x, f):
    if len(x) == 0:
        return np.nan
    from arch.bootstrap import StationaryBootstrap
    bs = StationaryBootstrap(50, x)
    return bs.apply(f, 100).mean()
from read_data import import_data


def sharpe_ratio(x):
    mu, sigma = x.mean(), np.sqrt(x.var())
    values = np.array([x.sum(), sigma, mu / sigma]).squeeze()
    index = ['CR', 'sigma', 'SR']
    return pd.Series(values, index=index)


R = import_data('results/Yearly_portfolio/yearly_portfolio_returns_CDAX_mp')

R1 = R.iloc[:, 2]
params = sharpe_ratio(R1)

bs = StationaryBootstrap(12, R1)
results = bs.apply(sharpe_ratio, 100000)
delta_CR = results[:, 0] - params[0]
delta_sigma = results[:, 1] - params[1]
delta_SR = results[:, 2] - params[2]


def CI(delta, q=0.95):
    delta.sort()
    abs_sorted = np.abs(delta)
    bound = abs_sorted[int(q * 100000)]
    return bound


CR_bound = CI(delta_CR)
sigma_bound = CI(delta_sigma)
Example #16
0
def block_bootstrap(series,
              n_samples,
              bs_type = 'Stationary',
              block_size = 10
              ):

    '''
    Computes block-bootstrap samples of series.
    
    Args
    ----
    series: pd.Series
        Time-series data in the form of a Pandas Series indexed by time
    n_samples: int
        Number of bootstrapped samples to output.
    bs_type: {'Stationary', 'Circular'}
        Type of block-bootstrapping to perform.
    block_size: int
        Size of resampling blocks. Should be big enough to
        capture important frequencies in the series.
        
    Returns
    -------
    pd.DataFrame:
        DataFrame containing the block-bootstrapped samples of series. 
        Indexed by sample number, then time.
    
    '''

    # Set up list for sampled time-series
    list_samples = []
    
    # Stationary bootstrapping
    if bs_type == 'Stationary':
        bs = StationaryBootstrap(block_size, series)
                
        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):
            
            df_temp = pd.DataFrame({'sample': count, 
                                    'time': series.index.values,
                                    'x': data[0][0]})
            list_samples.append(df_temp)
            count += 1
            
    if bs_type == 'Circular':
        bs = CircularBlockBootstrap(block_size, series)
                
        # Count for sample number
        count = 1
        for data in bs.bootstrap(n_samples):
            
            df_temp = pd.DataFrame({'sample': count, 
                                    'time': series.index.values,
                                    'x': data[0][0]})
            list_samples.append(df_temp)
            count += 1   
    

    # Concatenate list of samples
    df_samples = pd.concat(list_samples)
    df_samples.set_index(['sample','time'], inplace=True)

    
    # Output DataFrame of samples
    return df_samples
Example #17
0
def trend_CI(x_var, y_var, n_boot=1000, ci=95, trendtype="linreg", q=0.5, frac=0.6, it=3, autocorr=None, CItype="bootstrap"):
    """calculates bootstrap confidence interval and significance level for trend, ignoring autocorrelation or accounting for it
    Parameters
    ----------
    x_var : list
      independent variable
    y_var : list
      dependent variable, same length as x_var
    q : int, optional, only if trendtype==quantreg
      quantile for which regression is to be calculated
    n : int, optional
      number of bootstrap samples
    ci : int, optional
      confidence level. Default is for 95% confidence interval
    frac : int, optional, only if trendtype==lowess
      lowess parameter (fraction of time period length used in local regression)
    it : int, optional, only if trendtype==lowess
      lowess parameter (numbre of iterations)
    autocorr : str, optional
      way of accounting for autocorrelation, possible values: None, "bootstrap"
    trendtype : str, optional
      method of trend derivation, possible values: lowess, linreg, quantreg, TheilSen
    CItype : str, optional
      method of CI derivation, possible values: "analytical" and "bootstrap". 
      if trendtype is "lowess", CItype will be set to None
      if CItype is "analytical": autocorrelation will be set to None
      

    Results
    -------
    returns library with following elements:
    slope - slope of the trend
    CI_high - CI on the slope value
    CI_low - as above
    pvalue - trend's significance level
    trend - trend line, or rather its y values for all x_var
    trendCI_high - confidence interval for each value of y
    trendCI_low - as above

    Remarks
    -------
    the fit function ocassionally crashes on resampled data. The workaround is to use try statement
    """
    import numpy as np
    import pandas as pd
    #for linreg
    import statsmodels.api as sm
    from statsmodels.regression.linear_model import OLS
    #for arima
    import statsmodels.tsa as tsa
    #for quantreg
    import statsmodels.formula.api as smf
    from statsmodels.regression.quantile_regression import QuantReg
    #for lowess
    import statsmodels.nonparametric.api as npsm
    #other
    from statsmodels.distributions.empirical_distribution import ECDF
    from scipy.stats import mstats, mannwhitneyu, t, kendalltau
    from arch.bootstrap import StationaryBootstrap, IIDBootstrap

    #preparing data
    if CItype=="analytical" and trendtype=="TheilSen":
        CItype="bootstrap"
    x_var=np.array(x_var)
    y_var=np.ma.masked_invalid(y_var)
    n_data=len(y_var)
    ci_low=(100-ci)/2
    ci_high=100-ci_low
    
    #setting bootstrapping function
    if autocorr=="bootstrap":
        bs=StationaryBootstrap(3, np.array(range(len(y_var))))
    else:
        bs=IIDBootstrap(np.array(range(len(y_var))))
    
    if trendtype=="quantreg":
        print "Quantile regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        xydata=pd.DataFrame(np.column_stack([x_var, y_var]), columns=['X', 'Y'])
        model=smf.quantreg('Y ~ X', xydata)
        res=model.fit(q=q)
        intcpt=res.params.Intercept
        slope=res.params.X
        pvalue=res.pvalues[1]
        CI_low=res.conf_int()[0]['X']
        CI_high=res.conf_int()[1]['X']
        y_pred=res.predict(xydata)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
            
        if CItype=="bootstrap":
            #bootstrapping
            bs_trends=np.copy(y_pred).reshape(-1,1)
            bs_slopes=[]
            bs_intcpts=[]
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = smf.quantreg('Y ~ X', xydata.ix[ind,:])
                try:
                    res = model.fit(q=q)
                    bs_slopes=bs_slopes+[res.params.X]
                    bs_intcpts=bs_intcpts+[res.params.Intercept]
                    bs_trends=np.append(bs_trends,res.predict(xydata).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
    if trendtype=="linreg":
        print "Linear regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        x_varOLS = sm.add_constant(x_var)
        model = sm.OLS(y_var, x_varOLS, hasconst=True, missing='drop')
        res = model.fit()
        intcpt,slope=res.params
        pvalue=res.pvalues[1]
        CI_low,CI_high=res.conf_int()[1]
        y_pred=res.predict(x_varOLS)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
        
        if CItype=="bootstrap":        
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = sm.OLS(y_var[ind], x_varOLS[ind,:], hasconst=True, missing='drop')
                try:
                    res = model.fit()
                    bs_slopes=bs_slopes+[res.params[1]]
                    bs_intcpts=bs_intcpts+[res.params[0]]
                    bs_trends=np.append(bs_trends,res.predict(x_varOLS).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
                    
    if trendtype=="TheilSen":
#        print "Theil-Sen slope, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        #significance of MK tau
        tau,pvalue=kendalltau(x_var, y_var)
#        print "raw MK tau:", tau, "raw MK pvalue:", pvalue
        #TS slope and confidence intervals
        slope,intercept,CI_low,CI_high=mstats.theilslopes(y_var, x_var, alpha=0.95)        
        #getting slope line's y values
        y_pred=intercept+slope*x_var
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
                    
        if CItype=="bootstrap":
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                res=mstats.theilslopes(y_var[ind], x_var[ind], alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
                bs_intcpts=bs_intcpts+[res[1]]
                bs_trends=np.append(bs_trends, (res[1]+res[0]*x_var).reshape(-1,1), 1)

    if trendtype=="lowess":
        print "Lowess\n"
        temp=dict(npsm.lowess(y_var, x_var, frac=frac, it=it, missing="drop"))
        y_pred=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
        bs_trends=np.copy(y_pred)
        
        for data in bs.bootstrap(n_boot):
            ind=data[0][0]
            try:
                temp = dict(npsm.lowess(y_var[ind], x_var[ind], frac=frac, it=it, missing="drop"))
                temp=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
                pred=pd.DataFrame(temp, index=x_var)
                temp_interp=pred.interpolate().values
                bs_trends=np.append(bs_trends, temp_interp, 1)
            except:
                goingdownquietly=1


    #calculating final values of CI and p-value

    #skipping when lowess
    if trendtype=="lowess":
        CI_low=np.nan
        CI_high=np.nan
        slope=np.nan
        intcpt=np.nan
        pvalue=np.nan
        confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
        trendCI_low=confint[:,0]
        trendCI_high=confint[:,1]
    else:
        if CItype=="bootstrap":
            #values for slope, intercept and trend can be obtained as medians of bootstrap distributions, but normally analytical parameters are used instead
            # it the bootstrap bias (difference between analytical values and bootstap median) is strong, it might be better to use bootstrap values. 
            # These three lines would need to be uncommented then
#            slope=np.median(bs_slopes)
#            intcpt=np.median(bs_intcpts)
#            trend=intcpt+slope*x_var
            #these are from bootstrap too, but needs to be used for this accounts for autocorrelation, which is the point of this script
            CI_low,CI_high=np.percentile(bs_slopes, [5, 95])                
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(0)
            #this makes sure we are calculating p-value on the correct side of the distribution. That will be one-sided pvalue
            if pvalue>0.5:
                pvalue=1-pvalue
            confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
            print "bs_trends:", bs_trends.shape, confint.shape
            trendCI_low=confint[:,0]
            trendCI_high=confint[:,1]
        else:
            #this is for analytical calculation of trend confidence interval
            #it happens in the same way for each of the trend types, thus it is done here, not under the trendtype subroutines
            #making sure x are floats
            xtemp=np.array(x_var)*1.0
            #squared anomaly
            squanom=(xtemp-np.mean(xtemp))**2
            temp=((1./len(x_var))+(squanom/sum(squanom)))**0.5
            #standard error of estmation
            see=(np.nansum((np.array(y_var)-np.nanmean(y_pred))**2)/len(x_var))**0.5
            #adjusting ci
            ci_adj=1-((1-ci/100.)/2)
            #accounting for uncertainty in mean through student's t
            tcomp=t.ppf(ci_adj, len(x_var)-2)
            #confidence interval
            cint=tcomp*see*temp
            #for trend only
            trendCI_high=y_pred+cint
            trendCI_low=y_pred-cint

        print trendtype, "slope:",slope, "pvalue (one sided):", pvalue, "conf interval:", CI_low, CI_high, "autocorrelation adjustment:", autocorr, "\n"
    output={"slope":slope, "CI_high":CI_high, "CI_low":CI_high, "pvalue":pvalue, "trend": y_pred, "trendCI_low":trendCI_low, "trendCI_high":trendCI_high}
    return output
Example #18
0

with open('lyapunov_exponents_both_297.dat', 'rb') as f:
    lyap_coupled = numpy.fromfile(f, dtype='float64').reshape([-1, 36])

with open('lyapunov_exponents_ocn_297.dat', 'rb') as f:
    lyap_ocn = numpy.fromfile(f, dtype='float64').reshape([-1, 36])[:, 20:36]

lyap_coupled *= 86400 * 1.032e-4
lyap_ocn *= 86400 * 1.032e-4

block_length_coupled = numpy.array(block_length(lyap_coupled))[:, 0]

all_ci = []
for var in range(36):
    bootstrap = StationaryBootstrap(block_length_coupled[var],
                                    lyap_coupled[:, var])

    ci = bootstrap.conf_int(mean, 1000)
    all_ci.append(ci)

ci_coupled = numpy.hstack(all_ci)

block_length_ocn = numpy.array(block_length(lyap_ocn))[:, 0]

all_ci = []
for var in range(16):
    bootstrap = StationaryBootstrap(block_length_ocn[var], lyap_ocn[:, var])

    ci = bootstrap.conf_int(mean, 1000)
    all_ci.append(ci)