Example #1
0
 def get_coef(data, pretype, econamelist, quatile):
     #获得分位数回归线性关系
     #注意xnamelist 最多只能容纳5个变量,yname是str
     #n=len(xnamelist)
     n = len(econamelist)
     if n == 1:
         mod = smf.quantreg('%s ~ %s' % (pretype, econamelist[0]), data)
     elif n == 2:
         mod = smf.quantreg(
             '%s ~ %s+%s' % (pretype, econamelist[0], econamelist[1]), data)
     elif n == 3:
         mod = smf.quantreg(
             '%s ~ %s+%s+%s' %
             (pretype, econamelist[0], econamelist[1], econamelist[2]),
             data)
     elif n == 4:
         mod = smf.quantreg(
             '%s ~ %s+%s+%s+%s' % (pretype, econamelist[0], econamelist[1],
                                   econamelist[2], econamelist[3]), data)
     elif n == 5:
         mod = smf.quantreg(
             '%s ~ %s+%s+%s+%s+%s' %
             (pretype, econamelist[0], econamelist[1], econamelist[2],
              econamelist[3], econamelist[4]), data)
     res = mod.fit(q=quatile)
     # print(res.summary())
     #返回分位点,截距,各个参数系数 和 各个参数lb,ub
     return quatile, res.params['Intercept'], res.params[
         econamelist], res.conf_int().loc[econamelist]
Example #2
0
    def __construct_from_points(self, points):
        import statsmodels.formula.api as smf

        normed_pnts = self.__get_normed_pnts(points)
        dat_pnts = pd.DataFrame(normed_pnts, columns=('x', 'y', 'z'))
        mod_y = smf.quantreg('y ~ x + I(x**2.0) + I(x**3.0) + I(x**4.0)',
                             dat_pnts)
        mod_z = smf.quantreg('z ~ x + I(x**2.0) + I(x**3.0) + I(x**4.0)',
                             dat_pnts)
        self.res_y = mod_y.fit(q=0.5)
        self.res_z = mod_z.fit(q=0.5)
Example #3
0
 def fit(self, y_name, features_name):
     if np.linalg.matrix_rank(self.temp[['mileage',
                                         'weight_mileage']]) == 1:
         return None
     if self.temp is not None:
         try:
             model = smf.quantreg(y_name + '~' + features_name, self.temp)
             res = model.fit(q=.5, max_iter=10000)
             self.res = res
         except:
             self.res = None
             return None
     if self.res.params['mileage'] <= 0 or \
             self.res.params['weight_mileage'] <= 0 or \
             self.res.params['Intercept'] <= 0:
         dirs = 'output/QR/' + str(self.start_cityid) + '/no/'
         if not os.path.exists(dirs):
             os.makedirs(dirs)
         self.show_result(dirs)
         return None
     dirs = 'output/QR/' + str(self.start_cityid) + '/yes/'
     if not os.path.exists(dirs):
         os.makedirs(dirs)
     self.show_result(dirs)
     return self.res
Example #4
0
def calcuTD(x, O, Y, SeqDepth, Grid, Tau):
    TauGroup, D = Grid[x]
    D = int(D)

    try:
        polyX, centre, scale, alpha, beta = poly.poly(O, D)
    except Exception:
        polyX = None

    if polyX is not None:
        colVars = ['var_' + str(j) for j in range(D)]
        polydata = pd.concat([pd.DataFrame({'Y':Y}), pd.DataFrame(polyX, columns=colVars)], axis=1)
        try:
            rqfit = smf.quantreg('Y~' + '+'.join(colVars), polydata).fit(q=TauGroup)
            revX = poly.predict_poly(polyX, centre, scale, alpha, beta, SeqDepth)
            revX = pd.DataFrame(revX, columns=colVars)
            pdvalsrq = rqfit.predict(revX)

            if min(pdvalsrq) > 0:
                S = QuantReg(pdvalsrq.values, tools.add_constant(SeqDepth)).fit(q=Tau).params[1]
            else:
                S = -50
        except Exception:
            S = -50
    else:
        S = -50
    return S
Example #5
0
def quantile_regression(categorical_mrna, categorical_protein):
    data = pd.DataFrame(columns=['mrna', 'protein'])
    data['mrna'] = categorical_mrna
    data['protein'] = categorical_protein
    mod = smf.quantreg('mrna ~ protein', data)
    res = mod.fit(q=.5)
    return res.prsquared
Example #6
0
    def __qfit_l(self): 
        """ Fit a quantile regression at every quantile and horizon """

        # Prepare a container for each individual fit (convenient later)
        QFit =  namedtuple('Qfit', ['depvar', 'horizon', 'tau', 'qfit'])
        
        qfit_l = list() # Container
        
        for h, depvar in zip(self.horizon_l, self.depvar_l): 
            reg_f = self.regform_d[depvar] # Formula
                
            for tau in self.quantile_l: # For every tau

                # Estimate the quantile regression
                p = {'q':tau, 'maxiter':1000, 'p_tol':1e-05}
                qfit = smf.quantreg(formula=reg_f, data=self.data).fit(**p)

                # Package it into a container
                nt = {'depvar':depvar, 'horizon':h, 'tau':tau, 'qfit':qfit}
                qfit_l.append(QFit(**nt))
                
        print(f'{len(qfit_l)} quantile regressions estimated '
              f'for {len(self.horizon_l)} horizons '
              f'and {len(self.quantile_l)} quantiles')
        
        return(qfit_l)
	def fitMinSpline(self, Yvar, Xvar, smoothingWindow, plot=False, plotVar = None):
	    '''
            This function is to fit/interpolate a spline in the data
            '''
            # use patsy class to define a matrix
            X = np.asarray(patsy.dmatrix("cr(x, df=7)-1", {"x": Xvar}))
	    # redefine dataframe
            modDat = pd.DataFrame(X, index=Yvar.index)
	    # redefine our data into X1-X7
            modDat.columns = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7']
	    modDatTrunc = modDat.iloc[self._smoothingWindow/2:-self._smoothingWindow/2].copy()
	    window = np.ones(self._smoothingWindow)/float(self._smoothingWindow)
	    modDatTrunc['Y'] = np.convolve(Yvar, window, 'same')[self._smoothingWindow/2:-self._smoothingWindow/2]
	    mod = smf.quantreg('Y~X1+X2+X3+X4+X5', modDatTrunc)
	    res = mod.fit(q=0.01)
	    preds = pd.Series(res.predict(modDat), index = Xvar.index)
	    if plot:
	        plotDF = pd.concat([plotVar, Yvar, preds],1)
	        print(plotDF.columns)
	        plotDF.columns = [plotVar.name, Yvar.name, 'fitted']
	        p = ggplot(aes(x=plotVar.name, y=Yvar.name), data=plotDF) + geom_line() +\
	            geom_line(aes(y='fitted'), color='red')+\
	            ylim(0,5) +\
	            xlab('') + ylab('Sensor (V)')
	        print(p)
                #return regression predictors
	    return(preds)
Example #8
0
def fitMinSpline(Yvar, Xvar, smoothingWindow, plot=False, plotVar = None):
    '''
    Function returns minimal interpolation spline
    Inputs:
    Yvar : dependent variables that needed to be fit
    Xvar : independent variables that needed to be fit
    smoothingWindow : the smoothing time average
    plot = boolean value to plot or not, default is not to plot
    plotVar = plot a specific variable, default none
    '''
    X = np.asarray(patsy.dmatrix("cr(x, df=7)-1", {"x": Xvar}))
    modDat = pd.DataFrame(X, index=Yvar.index)
    modDat.columns = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7']
    modDatTrunc = modDat.iloc[smoothingWindow/2:-smoothingWindow/2].copy()
    window = np.ones(smoothingWindow)/float(smoothingWindow)
    modDatTrunc['Y'] = np.convolve(Yvar, window, 'same')[smoothingWindow/2:-smoothingWindow/2]
    mod = smf.quantreg('Y~X1+X2+X3+X4+X5', modDatTrunc)
    res = mod.fit(q=0.01)
    preds = pd.Series(res.predict(modDat), index = Xvar.index)
    if plot:
        plotDF = pd.concat([plotVar, Yvar, preds],1)
        print(plotDF.columns)
        plotDF.columns = [plotVar.name, Yvar.name, 'fitted']
        p = ggplot(aes(x=plotVar.name, y=Yvar.name), data=plotDF) + geom_line() +\
            geom_line(aes(y='fitted'), color='red')+\
            ylim(0,5) +\
            xlab('') + ylab('Sensor (V)')
        print(p)
    return(preds)
Example #9
0
def QR_beta(f,g,df=merge1):
    reg_p=[]
    Y=df[f].astype(float)
    for i in g:
        X=df[i].astype(float)
        tryme2=smf.quantreg('Y~X',data=df).fit(maxiter=100000,q=0.5)
        reg_p.append(str(round(tryme2.params[1],8))+" ("+str(round(tryme2.pvalues[1],4))+" )" )#reg_p.append(tryme2.pvalues[1] )
    return reg_p
Example #10
0
def QR(f,g,df=merge1):
    reg_p=[]
    Y=df[f].astype(float)
    for i in g:
        X=df[i].astype(float)
        tryme2=smf.quantreg('Y~X',data=df).fit(maxiter=100000,q=0.5)
        reg_p.append(tryme2.pvalues[1] )
    return reg_p
Example #11
0
 def quant_summary(self, q=0.5):
     '''
     Func:分位数回归概要\n
     q --> 分位数
     '''
     mod = smf.quantreg(formula=self.formula, data=self.df)
     res = mod.fit(q=q)
     return res.summary()
def fit_quanmod(df: pd.DataFrame, q: Number) -> List[float]:
    """
        - quantile regression
        - returns array of floats [Intercept, NDVI]
    """
    mod = smf.quantreg('STR ~ NDVI', df)
    res = mod.fit(q=q)
    return [res.params['Intercept'], res.params['NDVI']]
Example #13
0
def quant_pred(q, data, **params):
    mod = smf.quantreg(params['formula'], data)
    reg_res = mod.fit(q=q, **params['method_args'])
    out = pd.DataFrame({
        'x': [data['x'].min(), data['x'].max()],
        'quantile': q,
        'group': '{}-{}'.format(data['group'].iloc[0], q)})
    out['y'] = reg_res.predict(out)
    return out
Example #14
0
def quant_pred(q, data, **params):
    mod = smf.quantreg(params['formula'], data)
    reg_res = mod.fit(q=q, **params['method_args'])
    out = pd.DataFrame({
        'x': [data['x'].min(), data['x'].max()],
        'quantile': q,
        'group': '{}-{}'.format(data['group'].iloc[0], q)})
    out['y'] = reg_res.predict(out)
    return out
Example #15
0
def quantile_q(d, p, c, s):
    underage_cost = p - c
    overage_cost = c - s
    ratio = underage_cost / (underage_cost + overage_cost)
    tmp = d.copy()
    tmp['y'] = d
    tmp['ones'] = np.ones(len(d))
    mod = smf.quantreg('y ~ ones', tmp)
    res = mod.fit(q=ratio)
    return res.params['Intercept']
Example #16
0
def quantile_reg(df, quantile):
    mod = smf.quantreg(df.columns[1] + '~' + df.columns[0], df)
    res = mod.fit(q = quantile)
    print(res.summary())
#    get_y = lambda a, b: a + b * df.iloc[:,0].values
#    pre_y = get_y(res.params[0], res.params[1])
    qre_df = pd.DataFrame(data = [[quantile, res.params[0], res.params[1]] + res.conf_int().iloc[1].tolist()], 
                               index = ['quantile_reg'], 
                               columns = ['qt','intercept','x_coef','cf_lower_bound','cf_upper_bound',])
    return qre_df 
Example #17
0
def quantile_regression_q(d, p, c, s):
    underage_cost = p - c
    overage_cost = c - s
    ratio = underage_cost / (underage_cost + overage_cost)

    tmp = d.copy()
    tmp['ones'] = np.ones(tmp.shape)
    mod = smf.quantreg('y ~ ones', tmp)
    res = mod.fit(q=ratio)
    print(res.summary())
    return res
Example #18
0
 def __qfit_dict(self):
     """ Estimate the quantile fit for every quantile """
     qfit_dict = dict()
     for tau in self.quantile_list:
         reg_f = self.reg_formula
         qfit = smf.quantreg(formula=reg_f,
                             data=self.data).fit(q=tau,
                                                 maxiter=2000,
                                                 p_tol=1e-05)
         qfit_dict[tau] = qfit
     return (qfit_dict)
Example #19
0
def ols_annotations(x,
                    y,
                    data=None,
                    ax=None,
                    color='black',
                    font_size=8,
                    textxy=[0.05, 0.95],
                    textva='top',
                    method='quantreg',
                    stats=['N', 'slope', 'slope_p']):
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    if data is None:
        data = pandas.DataFrame({'X': x, 'Y': y})
        x = 'X'
        y = 'Y'
    data = data.sort_values(x)
    if method == 'ols':
        X = sm.add_constant(data.loc[:, x])
        Y = data.loc[:, y]
        mod = sm.OLS(Y, X)
        res = mod.fit()
    elif method == 'quantreg':
        mod = smf.quantreg(y + ' ~ ' + x, data)
        res = mod.fit(q=0.5)
    N = data.shape[0]
    slope = res.params[x]
    slope_p = res.pvalues[x]
    rsquared = res.rsquared_adj
    rsquared_p = res.f_pvalue
    text = ''
    for stat in stats:
        if stat == 'N':
            text += 'N = {:,}\n'.format(N)
        if stat == 'slope':
            text += 'slope = {}\n'.format('%.2f' % Decimal(slope))
        if stat == 'slope_p':
            text += 'P = {}\n'.format('%.2E' % Decimal(slope_p))
        if stat == 'rsquared':
            text += 'R2 = {}\n'.format('%.2f' % Decimal(rsquared))
        if stat == 'rsquared_p':
            text += 'P = {}\n'.format('%.2E' % Decimal(rsquared_p))
    ax.text(textxy[0],
            textxy[1],
            text,
            transform=ax.transAxes,
            va=textva,
            color=color,
            fontsize=font_size)
    xmin = data.loc[:, x].min()
    xmax = data.loc[:, x].max()
    ax.plot(data[x].values[[0, N - 1]], res.predict()[[0, N - 1]], color=color)
Example #20
0
    def fit(self, X, y=None):
        """
        :param X: covariate dataframe
        :param y: currently unused
        """
        # Build formula for prediction
        formula = 'num_det_target ~ np.log(num_det+1)'
        if self.covariates:
            formula += ' + ' + ' + '.join(self.covariates)

        self.fit_result = smf.quantreg(formula, data=X).fit(q=self.quantile)

        return self.fit_result
Example #21
0
def time_varying_delta_covar_regression(macroData, instData, quantile,
                                        instName, writeToFile):
    data = macroData.merge(instData, on='5_Day_Dates')

    mod = smf.quantreg(
        str(instName) +
        ' ~ Change_3_M_TR + Change_TR_Slope + TED + Baa_3_M_TR + SP_500 + RE_excess_FS + SP_500_Vol',
        data)
    varqres = mod.fit(q=quantile)
    var50res = mod.fit(q=0.5)
    mod = smf.quantreg(
        'Fin_Sec_Loss ~ Change_3_M_TR + Change_TR_Slope + TED + Baa_3_M_TR + SP_500 + RE_excess_FS + SP_500_Vol + '
        + str(instName), data)
    covarqres = mod.fit(q=quantile)

    if writeToFile:
        f = open(
            str(instName) + ' Time Varying Delta CoVaR Parameters.txt', 'w')
        f.write(str(quantile) + ' Quantile VaR for institution')
        f.write('\n')
        f.write(str(varqres.summary()))
        f.write('\n')
        f.write('\n')
        f.write('0.5 Quantile VaR for institution')
        f.write('\n')
        f.write(str(var50res.summary()))
        f.write('\n')
        f.write('\n')
        f.write(str(quantile) + ' Quantile CoVaR for system given institution')
        f.write('\n')
        f.write(str(covarqres.summary()))
        f.close()

    return {
        'VaRqParams': varqres.params,
        'VaR50Params': var50res.params,
        'CoVaRqParams': covarqres.params
    }
Example #22
0
def getcoef(data, q=0.5):
    '''
    Pega o coeficiente angular da regressão
    '''
    
    data.columns = ['income', 'depend']
    # por algum motivo a tabela não estava fazendo a regreção corretamente, estava entendendo o eixo
    # das variáveis independentes como vários eixos e não apenas um eixo
    table = {'income': data['income'].values.tolist(), 'depend': data['depend'].values.tolist()}

    mod = smf.quantreg('depend ~ income', table)
    res = mod.fit(q=q)

    return res.params['income']
Example #23
0
 def quantile(self, q=0.5):
     '''
     Func:不准用!!!
     '''
     mod = smf.quantreg(formula=self.formula, data=self.df)
     res = mod.fit(q=q)
     df_lu = res.conf_int()
     df_lu.columns = ['lb', 'ub']
     result_dict = df_lu.to_dict()
     result_dict['params'] = dict(res.params)
     result_dict['pvalue'] = dict(res.pvalues)
     result_dict['q'] = str(res.q)[:4]
     result_dict['Pseudo R-squared'] = res.prsquared
     return pd.DataFrame(result_dict)
    def fit_model(self):
        """
        fit the linear quantile regression model using the train dataset

        Returns
        -------
        output: statsmodels.regression.linear_model.RegressionResultsWrapper object
            the linear quantile regression model
        """
        x_columns = list(self.x.columns.values)
        equation = self.y.name + '~' + '+'.join(x_columns)
        df = pd.concat([self.y, self.x], axis=1)
        self.linear_quantile = smf.quantreg(equation, data=df)
        self.linear_quantile = self.linear_quantile.fit(q=self.qt)
        return self.linear_quantile
Example #25
0
    def fit(self, X, y):
        # Build the design matrix via a tensor basis expansion of natural spline bases
        data = {'x{}'.format(i + 1): x for i, x in enumerate(X.T)}
        design_matrix = dmatrix(
            "te(" + ",".join([
                'cr(x{}, df={})'.format(i + 1, self.df)
                for i in range(X.shape[1])
            ]) + ", constraints='center')", data)

        # Save the design information for future predictions
        self.design_info = design_matrix.design_info

        # Fit the model using the basis
        mod = smf.quantreg('y ~ x - 1', {'y': y, 'x': design_matrix})
        if np.isscalar(self.quantiles):
            self.model = mod.fit(q=self.quantiles)
        else:
            self.model = [mod.fit(q=q) for q in self.quantiles]
Example #26
0
def table_rq_res(formula, taus, data, alpha, R, n, sigma, jacobian):
    m = len(taus)
    tab = pd.DataFrame([], index=[0])
    setab = pd.DataFrame([], index=[0])
    for i in range(m):
        fit_model = smf.quantreg(formula, data)
        fit = fit_model.fit(q=taus[i])
        coeff = np.dot(R.T, np.array(fit.params))
        tab[str(i)] = coeff
        sigmatau = sigma(data, n, taus[i], fit.resid)
        jacobtau = jacobian(data, n, taus[i], fit.resid, alpha)
        solved_jacobtau = np.linalg.inv(jacobtau)
        V = np.dot(np.dot(solved_jacobtau, sigmatau), solved_jacobtau) / n
        secoeff = np.float(np.dot(np.dot(R.T, V), R))**.5
        setab[str(i)] = secoeff
    tab = tab.transpose()
    setab = setab.transpose()
    return (tab, setab)
Example #27
0
def time_constant_delta_covar_regression(macroData, instData, quantile,
                                         instName, writeToFile):
    data = macroData.merge(instData, on='5_Day_Dates')

    mod = smf.quantreg('Fin_Sec_Loss ~ ' + str(instName), data)
    res = mod.fit(q=quantile)
    var50 = data[str(instName)].quantile(0.5)
    varq = data[str(instName)].quantile(quantile)

    if writeToFile:
        f = open(str(instName) + ' Constant Time Delta CoVaR Summary.txt', 'w')
        f.write(str(quantile) + ' Quantile')
        f.write('\n')
        f.write(str(res.summary()))
        f.write('\n')
        f.write('Delta Covar ' + str(quantile) + " : " + str(res.params[1] *
                                                             (varq - var50)))
        f.close()
Example #28
0
def quantCV(q, alpha, L1_wt, data, folds):
    import statsmodels.formula.api as smf
    #    from statsmodels.regression.quantile_regression import QuantReg
    from sklearn.cross_validation import KFold
    from sklearn.metrics import mean_squared_error as MSE
    import warnings
    warnings.filterwarnings("ignore")

    ## KFold
    kf = KFold(len(np.unique(data.index)), n_folds=folds, random_state=0)
    score = np.zeros(folds)
    ct = 0
    ## Train Model
    for train_index, test_index in kf:
        data_train = data[np.array(
            pd.DataFrame(data.index).isin(train_index).values.tolist())]
        data_test = data[np.array(
            pd.DataFrame(data.index).isin(test_index).values.tolist())]
        mod = smf.quantreg(
            'Delay_100_Mile ~ TC + ATP + IP + TC_TC + TC_ATP + TC_IP + ATP_ATP + ATP_IP + IP_IP',
            data_train)
        res = mod.fit_regularized(q=q,
                                  alpha=alpha,
                                  L1_wt=L1_wt,
                                  maxiter=3000,
                                  random_state=0,
                                  cnvrg_tol=1e-08)

        ## Predict Values
        features_predict = data_test.groupby(by=['TC', 'ATP', 'IP'])[data.drop(
            ['Delay_100_Mile'], axis=1).columns].mean()
        params = res.params
        delay_predicted = params[0] + np.dot(features_predict, params[1:])

        ## Corresponding Value of Same Percentile
        target_per = data_test.groupby(
            by=['TC', 'ATP', 'IP'])['Delay_100_Mile'].quantile(q)
        #        score[ct]= MSE(np.expm1(target_per),np.expm1(delay_predicted))**.5
        score[ct] = MSE(target_per, delay_predicted)**.5
        ct += 1

    return np.mean(score)
Example #29
0
def quantile_fit(xi, yi, q=0.5):
    """Perform quantile regression.
    See for instance:
    https://www.statsmodels.org/dev/examples/notebooks/generated/quantile_regression.html
    (valid on 2091-04-16)
    Parametes:
    xi, yi      np.array, x and y values
    Returns:
    slope       regression slope estimate
    intercept   regression intercept estimate
    """
    data = {'xi': xi, 'yi': yi}

    df = pd.DataFrame.from_dict(data=data)

    mod = smf.quantreg('yi ~ xi', df)
    res = mod.fit(q=q)

    # return slope, intercept, covariance_matrix
    return res.params['xi'], res.params['Intercept'], res.cov_params().values
def fitMinSpline(Yvar, Xvar, smoothingWindow, plot=False, plotVar = None):
    X = np.asarray(patsy.dmatrix("cr(x, df=7)-1", {"x": Xvar}))
    modDat = pd.DataFrame(X, index=Yvar.index)
    modDat.columns = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7']
    modDatTrunc = modDat.iloc[smoothingWindow/2:-smoothingWindow/2].copy()
    window = np.ones(smoothingWindow)/float(smoothingWindow)
    modDatTrunc['Y'] = np.convolve(Yvar, window, 'same')[smoothingWindow/2:-smoothingWindow/2]
    mod = smf.quantreg('Y~X1+X2+X3+X4+X5', modDatTrunc)
    res = mod.fit(q=0.01)
    preds = pd.Series(res.predict(modDat), index = Xvar.index)
    if plot:
        plotDF = pd.concat([plotVar, Yvar, preds],1)
        print(plotDF.columns)
        plotDF.columns = [plotVar.name, Yvar.name, 'fitted']
        p = ggplot(aes(x=plotVar.name, y=Yvar.name), data=plotDF) + geom_line() +\
            geom_line(aes(y='fitted'), color='red')+\
            ylim(0,5) +\
            xlab('') + ylab('Sensor (V)')
        print(p)
    return(preds)
Example #31
0
    def fit(self, train, quantiles=[0.025, 0.975], startx=None, endx=None):
        """
        Uses the statsmodel implementation of quantile regression. Quantile weighted least squares.
        Possibility to only fit the exponential decay beyond a certain leadtime
        Works on dataframe with resetted index. (i.e. leadtime as a column)
        """
        train = train.reset_index('leadtime')
        if startx is not None:
            train = train.loc[train[self.predcol] >= startx, :]
        if endx is not None:
            train = train.loc[train[self.predcol] <= endx, :]

        mod = smf.quantreg(self.obscol + ' ~ np.log(' + self.predcol + ')',
                           train)
        self.fits = pd.DataFrame(np.zeros((len(quantiles), 2)),
                                 index=quantiles,
                                 columns=self.model_coefs)
        for q in quantiles:
            res = mod.fit(q=q)
            self.fits.loc[q, :] = res.params.values
Example #32
0
def subsamplek(formula, V, tau, coeffs, data, n, b, B, R):
    k = np.zeros(B)
    RVR = (np.float(np.dot(np.dot(R.T, V), R) / b))**(-1 / 2)
    probs = np.array(data['perwt']) / np.sum(np.array(data['perwt']))
    for s in range(B):
        sing = 0
        while sing == 0:
            sample = np.random.choice(np.arange(0, n),
                                      size=int(b),
                                      replace=True,
                                      p=probs)
            sdata = data.iloc[sample, :]

            x = sdata[["educ", "exper", "exper2", "black", "perwt"]]
            x = x.as_matrix()
            sing = np.linalg.det(np.dot(x.T, x))
        # Didn't use weights here
        sqr_model = smf.quantreg(formula, sdata)
        sqr = sqr_model.fit(q=tau)
        k[s] = np.abs(np.dot(np.dot(RVR, R.T), coeffs - np.array(sqr.params)))
    return (k)
Example #33
0
def CoVar():
    df = pd.read_csv("Data/Index_data.csv", sep=";")
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.dropna().ffill().set_index("Date")
    # data = np.log(df).diff().dropna()[['Nordea Bank','Sydbank','Danske Bank','Jyske Bank','Novo Nordisk B']]
    data = np.log(df).diff().dropna()
    data = data.rename(columns={"S&P": "SP"})
    data = data.replace(np.inf, 0).replace(-np.inf, 0)
    mCovar = np.zeros((len(data.columns), len(data.columns)))

    q = 0.05
    for nr1, j in enumerate(data.columns):
        for nr2, k in enumerate(data.columns):
            if nr1 != nr2:
                mod = smf.quantreg(str(j) + "~" + str(k), data)
                res = mod.fit(q=q)
                var5 = mod.fit(q=q).params[0]
                var5 = np.percentile(data[k], q)
                var50 = mod.fit(q=0.5).params[0]
                covar = res.params[0] + res.params[1] * var5
                print covar
                print res.summary()
                dcovar = mod.fit(q=q).params[1] * (var5 - var50)
                # res = mod.fit(q=0.5)
                mCovar[nr1, nr2] = round(dcovar, 3)
                icepts = []
                for i in np.arange(0.01, 1, 0.01):
                    res = mod.fit(q=i)
                    icepts.append(res.params[1])
                plt.plot(np.arange(0.01, 1, 0.01), icepts)
                # plt.ylim(0,1)
                print j, k
                plt.show()
            else:
                mCovar[nr1, nr2] = np.nan

            # mCovar[nr1,nr2]
    print pd.DataFrame(mCovar, columns=data.columns, index=data.columns)
Example #34
0
    def _set_quantiles(self, data): 
        
        #Compute quantiles for the transformed power conditional on the transformed power prediction
        #for a specific location and a specific lead time.
        
        #smf.quantreg generates warning - see documentation for more details
        #warning off just for this section
        warnings.filterwarnings("ignore")
        #Performs the actual quantile regression and stores the variables of 
        prob = np.concatenate([[0.001],np.arange(0.05,0.951,0.05),[0.999]])
        self.betas = expando()
        for location in data.metadata.id_nodes: 
            print(location)
            setattr(self.betas, location, expando())
            for ileadT, leadT in enumerate(data.metadata.fore_leadT, start = 1):
                
                clim_concurr_loc_leadT = getattr(getattr(self.clim.concurr, location), leadT)                
                
                betas_aux = pd.DataFrame(0, columns = ['probabilities','intercept', 'coefficient'], 
                                         index = range(len(prob)))  
                betas_aux.loc[:,('probabilities')] = prob                            
                #For solar cases, all quantiles are kepts to zeros
                if not np.all(clim_concurr_loc_leadT.observations == 0.): 
                    mod = smf.quantreg('observations ~ predictions', clim_concurr_loc_leadT)
                    for iq,q in enumerate(prob):
                        res = mod.fit(q=q)
                        betas_aux.loc[iq,('intercept')] =  res.params['Intercept']
                        betas_aux.loc[iq,('coefficient')] = res.params['predictions']
                        del res
                    del mod

                setattr(getattr(self.betas,location), leadT, betas_aux)
                del betas_aux
                gc.collect()
        #warning on
        warnings.filterwarnings("always")
        pass
Example #35
0
# -*- coding: utf-8 -*-'''

from __future__ import print_function
import patsy
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from statsmodels.regression.quantile_regression import QuantReg
from matplotlib import rc

data = sm.datasets.engel.load_pandas().data
print(data.head())

mod = smf.quantreg('foodexp ~ income', data)
res = mod.fit(q=.5)
print(res.summary())

quantiles = np.arange(.05, .96, .1)
def fit_model(q):
    res = mod.fit(q=q)
    return [q, res.params['Intercept'], res.params['income']] + \
            res.conf_int().ix['income'].tolist()

models = [fit_model(x) for x in quantiles]
models = pd.DataFrame(models, columns=['q', 'a', 'b','lb','ub'])

ols = smf.ols('foodexp ~ income', data).fit()
ols_ci = ols.conf_int().ix['income'].tolist()
ols = dict(a = ols.params['Intercept'],
Example #36
0
  def do_GLM(self, disp=1):

    """
    Generaliesd Linear Models
    This fits a GLM to the training data set and then fits it to the testing dataset.
    Different families and links can be included if need be simply using the statsmodels
    simple API.
    """
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    import statsmodels.genmod as smg

    # Decide the family    
    if self.family_name == "Gamma":
      if self.link == "log":
        self.family = sm.families.Gamma(link=smg.families.links.log)
      else:
        self.family = sm.families.Gamma()
    elif self.family_name == "Quantile":
        self.family = self.family_name
        self.link = "None"
    else:
      logger.info("You can only pick the family: Gamma and Quantile")

    # Decide the formula
    poly = lambda x, power: x**power

    if not self.formula:
      formula = "redshift ~ poly(PC1, 2) +"
      for i in range(self.num_components):
        if i<self.num_components-1:
          formula += "PC{0}*".format(i+1)
        else:
          formula += "PC{0}".format(i+1)
      self.formula = formula

    self.logger.info("Family: {0} with \tformula: {1}\tlink: {2}".format(self.family_name, self.formula, self.link))
    self.logger.info("Fitting...")
    
    t1 = time.time()
    if self.family == "Quantile":
      # Quantile regression
      model = smf.quantreg(formula=self.formula, data=self.data_frame_train)
      results = model.fit(q=.5)
      if verbose:
        self.logger.info(results.summary())
    else:
      model = smf.glm(formula=self.formula, data=self.data_frame_train, family=self.family)
      results = model.fit()
      self.logger.info(results.summary())
    t2 = time.time()

    self.dt = (t2-t1)
    self.logger.info("Time taken: {0} seconds".format(self.dt))

 
    #Plot the model with our test data
    ## Prediction
    if self.cross_validate:
      self.logger.info("Cross validating")
      self.measured = np.array(self.data_frame_test["redshift"].values)
      self.predicted = results.predict(self.data_frame_test)
    else:
      self.measured = np.array(self.data_frame_train["redshift"].values)
      self.predicted = results.predict(self.data_frame_train)
      self.fitted = results.predict(self.data_frame_test)

    ## Outliers
    ## (z_phot - z_spec)/(1+z_spec)

    self.deltas = abs(self.predicted - self.measured)
    self.median = np.median(self.deltas)
    self.std = np.std(self.deltas)

    # First we will remove the outliers
    mega_out_indx = (self.deltas/(1+self.measured)) > 0.15
    self.num_mega_outliers = mega_out_indx.sum() / (1.0*len(self.deltas))
    self.average = np.mean(self.deltas[mega_out_indx.__invert__()])

    self.rms = np.sqrt(np.mean(self.deltas**2))

    self.rms_outliers = np.sqrt(np.mean(self.deltas[mega_out_indx.__invert__()]**2))
    self.std_outliers = np.std(self.deltas[mega_out_indx.__invert__()])
    self.bias_outliers = np.mean(self.deltas[mega_out_indx.__invert__()])


    self.logger.info("Median (dz):.............................................{0}".format(self.median))
    self.logger.info("Standard deviation (dz):.................................{0}".format(self.std))
    self.logger.info("RMS (dz).................................................{0}".format(self.rms))
    self.logger.info("............................................................")
    self.logger.info("Number of outliers removed...............................{0}".format(self.num_mega_outliers))
    self.logger.info("Average (removed outliers for > 0.15) (dz):..............{0}".format(self.average))
    self.logger.info("Standard deviation (removed outliers for > 0.15) (dz):...{0}".format(self.std_outliers))
    self.logger.info("RMS (removed outliers for z > 0.15)......................{0}".format(self.rms_outliers))
    self.logger.info("Bias (removed outliers for z > 0.15).....................{0}".format(self.bias_outliers))

    self.outliers = (self.predicted - self.measured) / (1.0 + self.measured)

    # R code
    # Out<-100*length(PHAT0.Pred$fit[(abs(PHAT0.test.PCA$redshift-PHAT0.Pred$fit))>0.15*(1+PHAT0.test.PCA$redshift)])/length(PHAT0.Pred$fit)
    self.catastrophic_error = 100.0*(abs(self.measured-self.predicted) > (0.15*(1+self.measured))).sum()/(1.0*self.measured.shape[0])
    self.logger.info("Catastrophic Error:......................................{0}%".format(self.catastrophic_error))
plt.yticks(())
plt.xlabel("x")
plt.ylabel("y and predicted y")
plt.title("Linear regression on data with non-constant variance")



## Quantile regression for the median, 0.5th quantile
import pandas as pd 
data = pd.DataFrame(data = np.hstack([x_, y_]), columns = ["x", "y"])
print data.head()

import statsmodels.formula.api as smf


mod = smf.quantreg('y ~ x', data)
res = mod.fit(q=.5)
print(res.summary())


## Build the model for other quantiles
quantiles = np.arange(0.1,1,0.1)
print quantiles 
models = []
params = []

for qt in quantiles:
	print qt
	res = mod.fit(q = qt )
	models.append(res)
	params.append([qt, res.params['Intercept'], res.params['x']] + res.conf_int().ix['x'].tolist())
 print()
 print(u'-'*30)
 print(u'Variable for close distance:', d_dist)
 
 # NO CONTROL
 ols_res = smf.ols('pct_rr ~ {:s}'.format(d_dist), data = df_compa).fit()
 #print()
 #print(ols_res.summary())
 
 ls_res = []
 ls_quantiles = [0.25, 0.5, 0.75] # use 0.7501 if issue
 for quantile in ls_quantiles:
   #print()
   #print(quantile)
   #print(smf.quantreg('pct_rr~d_dist_5', data = df_repro_compa).fit(quantile).summary())
   ls_res.append(smf.quantreg('pct_rr ~ {:s}'.format(d_dist),
                              data = df_compa[~df_compa[d_dist].isnull()]).fit(quantile))
 
 print(summary_col([ols_res] + ls_res,
                   stars=True,
                   float_format='%0.2f',
                   model_names=['OLS'] + [u'Q{:2.0f}'.format(quantile*100) for quantile in ls_quantiles],
                   info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                              'R2':lambda x: "{:.2f}".format(x.rsquared)}))
 
 # WITH CONTROLS
 ols_res_ctrl = smf.ols('pct_rr ~ {:s} + {:s}'.format(d_dist, str_ev),
                          data = df_compa).fit()
 #print()
 #print(ols_res_ctrl.summary())
 
 ls_res_ctrl = ([smf.quantreg('pct_rr ~ {:s} + {:s}'.format(d_dist, str_ev),
					slope, intercept, r_value, p_value, std_err = stats.linregress(y,dur)
					correlation[q,sea_ID,state,2]=slope
					correlation[q,sea_ID,state,3]=p_value
					correlation[q,sea_ID,state,4]=intercept





					#print slope, intercept, r_value, p_value, std_err
					#plt.plot(y,dur,'o')
					#plt.plot(y,intercept+slope*y,'r')


					df=pd.DataFrame(data={'dur':dur,'y':y})
					mod = smf.quantreg('dur ~ y', df)
					for qu,qui in zip(quantiles,range(5)):
						try:
							res = mod.fit(q=qu)
							slope,p_value,interc=res.params['y'],res.pvalues['y'],res.params['Intercept']
							correlation_qu[q,sea_ID,state,qui,:]=[slope,p_value,interc]
							#plt.plot(y,interc+slope*y,'b--')
						except:
							pass


					#plt.show()
					#asdasd


out_file='data/_TMean/91_7/gridded/91_7_TMean_duration_'+variable+'_cor.nc'
def quant_mincer(q, data):
  r      = smf.quantreg('logwk ~ educ + black + exper + exper2 + wt - 1', data)
  result = r.fit(q = q)
  coef   = result.params['educ']
  se     = result.bse['educ']
  return [coef, se]
        def elast_calc(self, key, Y, X, P, stub='', parts=100):
            """
            Add elasticities using log-log quantile regressions

            number of elasticities will be that of hypothetical delimiters
            in parts i.e parts-1

            key - household key
            Y - dependent variable - resource consumption
            X - independent variable - income
            P - household population weights
            stub - sufix to name variables containing quantiles
                   and elasticities
            parts - number of parts
            """
            dt = self.dataset
            quantstub = 'quant' + stub
            elaststub = 'elast' + stub
            print '\nElasticity calculator started - please be patient'

            # take the logs of Y and X
            dt['__lnY'] = np.log(dt[Y])
            dt['__lnX'] = np.log(dt[X])

            # log of 0 is -infinite, replace with missing (NaN)
            dt['__lnY'][dt[Y] == 0] = np.NaN
            dt['__lnX'][dt[X] == 0] = np.NaN

            # rescale and round weights to inform replication
            dt['__' + P] = dt[P]/dt[P].min()
            dt['__rdwgt'] = dt['__' + P].round()

            # define quantiles based on parts and mark
            dt.sort(Y, inplace=True)
            dt[quantstub] = (dt['__' + P].cumsum() /
                             dt['__' + P].sum() *
                             parts).astype(int) / float(parts)
            dt.sort(key, inplace=True)

            # the quantile of the regression, can't be 0 or 1
            # unique() is sorted as dt, get the smallest non zero quantile
            # and the larger < 1
            quantiles = dt[quantstub].unique()
            quantiles.sort()
            quantiles = quantiles[1:-1]
            dt[quantstub][dt[quantstub] == 0] = quantiles[0]
            dt[quantstub][dt[quantstub] == 1] = quantiles[-1]

            # dataframe with replications
            print 'Replicating observations, {} to {}...'.format(
                dt['__rdwgt'].count(), int(dt['__rdwgt'].sum()))
            lnY, lnX = pd.Series(), pd.Series()
            for i in xrange(len(dt)):
                lnY = lnY.append(pd.Series((dt['__lnY'][i],) *
                                 int(dt['__rdwgt'][i])))
                lnX = lnX.append(pd.Series((dt['__lnX'][i],) *
                                 int(dt['__rdwgt'][i])))
            estdt = pd.DataFrame()
            estdt['lnY'] = lnY
            estdt['lnX'] = lnX
            del lnY, lnX

            # calculate elasticities
            print 'Fitting models...'
            model = smf.quantreg('lnY ~ lnX', estdt)
            elastseries = ()
            #elasterrors = ()
            print 'Quantile\telasticity\tse_elast\tintercept\tse_intercept'
            for quantile in quantiles:
                elast = model.fit(quantile)
                elastseries += (elast.params[1],)
                print '{}\t{:8.6f}\t{:8.6f}\t{:8.6f}\t{:8.6f}'.format(
                    quantile, elast.params[1], elast.bse[1], elast.params[0],
                    elast.bse[0],)
            elastdt = pd.DataFrame()
            elastdt[quantstub] = quantiles
            elastdt[elaststub] = elastseries

            # add elasticities and clean dataset
            todrop = [var for var in dt.keys() if '__' in var]
            self.dataset = pd.merge(dt, elastdt, on=quantstub)
            self.dataset.sort(key, inplace=True)
            self.dataset.reset_index(drop=True, inplace=True)
            self.dataset.drop(todrop, axis=1, inplace=True)
            self.seedvars += [quantstub, elaststub]
Example #42
0
def trend_CI(x_var, y_var, n_boot=1000, ci=95, trendtype="linreg", q=0.5, frac=0.6, it=3, autocorr=None, CItype="bootstrap"):
    """calculates bootstrap confidence interval and significance level for trend, ignoring autocorrelation or accounting for it
    Parameters
    ----------
    x_var : list
      independent variable
    y_var : list
      dependent variable, same length as x_var
    q : int, optional, only if trendtype==quantreg
      quantile for which regression is to be calculated
    n : int, optional
      number of bootstrap samples
    ci : int, optional
      confidence level. Default is for 95% confidence interval
    frac : int, optional, only if trendtype==lowess
      lowess parameter (fraction of time period length used in local regression)
    it : int, optional, only if trendtype==lowess
      lowess parameter (numbre of iterations)
    autocorr : str, optional
      way of accounting for autocorrelation, possible values: None, "bootstrap"
    trendtype : str, optional
      method of trend derivation, possible values: lowess, linreg, quantreg, TheilSen
    CItype : str, optional
      method of CI derivation, possible values: "analytical" and "bootstrap". 
      if trendtype is "lowess", CItype will be set to None
      if CItype is "analytical": autocorrelation will be set to None
      

    Results
    -------
    returns library with following elements:
    slope - slope of the trend
    CI_high - CI on the slope value
    CI_low - as above
    pvalue - trend's significance level
    trend - trend line, or rather its y values for all x_var
    trendCI_high - confidence interval for each value of y
    trendCI_low - as above

    Remarks
    -------
    the fit function ocassionally crashes on resampled data. The workaround is to use try statement
    """
    import numpy as np
    import pandas as pd
    #for linreg
    import statsmodels.api as sm
    from statsmodels.regression.linear_model import OLS
    #for arima
    import statsmodels.tsa as tsa
    #for quantreg
    import statsmodels.formula.api as smf
    from statsmodels.regression.quantile_regression import QuantReg
    #for lowess
    import statsmodels.nonparametric.api as npsm
    #other
    from statsmodels.distributions.empirical_distribution import ECDF
    from scipy.stats import mstats, mannwhitneyu, t, kendalltau
    from arch.bootstrap import StationaryBootstrap, IIDBootstrap

    #preparing data
    if CItype=="analytical" and trendtype=="TheilSen":
        CItype="bootstrap"
    x_var=np.array(x_var)
    y_var=np.ma.masked_invalid(y_var)
    n_data=len(y_var)
    ci_low=(100-ci)/2
    ci_high=100-ci_low
    
    #setting bootstrapping function
    if autocorr=="bootstrap":
        bs=StationaryBootstrap(3, np.array(range(len(y_var))))
    else:
        bs=IIDBootstrap(np.array(range(len(y_var))))
    
    if trendtype=="quantreg":
        print "Quantile regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        xydata=pd.DataFrame(np.column_stack([x_var, y_var]), columns=['X', 'Y'])
        model=smf.quantreg('Y ~ X', xydata)
        res=model.fit(q=q)
        intcpt=res.params.Intercept
        slope=res.params.X
        pvalue=res.pvalues[1]
        CI_low=res.conf_int()[0]['X']
        CI_high=res.conf_int()[1]['X']
        y_pred=res.predict(xydata)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
            
        if CItype=="bootstrap":
            #bootstrapping
            bs_trends=np.copy(y_pred).reshape(-1,1)
            bs_slopes=[]
            bs_intcpts=[]
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = smf.quantreg('Y ~ X', xydata.ix[ind,:])
                try:
                    res = model.fit(q=q)
                    bs_slopes=bs_slopes+[res.params.X]
                    bs_intcpts=bs_intcpts+[res.params.Intercept]
                    bs_trends=np.append(bs_trends,res.predict(xydata).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
    if trendtype=="linreg":
        print "Linear regression, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        x_varOLS = sm.add_constant(x_var)
        model = sm.OLS(y_var, x_varOLS, hasconst=True, missing='drop')
        res = model.fit()
        intcpt,slope=res.params
        pvalue=res.pvalues[1]
        CI_low,CI_high=res.conf_int()[1]
        y_pred=res.predict(x_varOLS)
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
        
        if CItype=="bootstrap":        
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                model = sm.OLS(y_var[ind], x_varOLS[ind,:], hasconst=True, missing='drop')
                try:
                    res = model.fit()
                    bs_slopes=bs_slopes+[res.params[1]]
                    bs_intcpts=bs_intcpts+[res.params[0]]
                    bs_trends=np.append(bs_trends,res.predict(x_varOLS).reshape(-1,1), 1)
                except:
                    goingdownquietly=1
                    
    if trendtype=="TheilSen":
#        print "Theil-Sen slope, CI type: "+CItype+", autocorrelation adjustment: "+str(autocorr)+"\n"
        #significance of MK tau
        tau,pvalue=kendalltau(x_var, y_var)
#        print "raw MK tau:", tau, "raw MK pvalue:", pvalue
        #TS slope and confidence intervals
        slope,intercept,CI_low,CI_high=mstats.theilslopes(y_var, x_var, alpha=0.95)        
        #getting slope line's y values
        y_pred=intercept+slope*x_var
        #calculating residuals
        resids=y_var-y_pred
        #calculate autocorrelation indices
        autocorr_test(x_var, resids)
                    
        if CItype=="bootstrap":
            #bootstrapping for confidence intervals
            bs_slopes=[]
            bs_intcpts=[]
            bs_trends=np.copy(y_pred).reshape(-1,1)
            for data in bs.bootstrap(n_boot):
                ind=data[0][0]
                res=mstats.theilslopes(y_var[ind], x_var[ind], alpha=0.95)
                bs_slopes=bs_slopes+[res[0]]
                bs_intcpts=bs_intcpts+[res[1]]
                bs_trends=np.append(bs_trends, (res[1]+res[0]*x_var).reshape(-1,1), 1)

    if trendtype=="lowess":
        print "Lowess\n"
        temp=dict(npsm.lowess(y_var, x_var, frac=frac, it=it, missing="drop"))
        y_pred=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
        bs_trends=np.copy(y_pred)
        
        for data in bs.bootstrap(n_boot):
            ind=data[0][0]
            try:
                temp = dict(npsm.lowess(y_var[ind], x_var[ind], frac=frac, it=it, missing="drop"))
                temp=np.array(map(temp.get, x_var)).astype("float").reshape(-1,1)
                pred=pd.DataFrame(temp, index=x_var)
                temp_interp=pred.interpolate().values
                bs_trends=np.append(bs_trends, temp_interp, 1)
            except:
                goingdownquietly=1


    #calculating final values of CI and p-value

    #skipping when lowess
    if trendtype=="lowess":
        CI_low=np.nan
        CI_high=np.nan
        slope=np.nan
        intcpt=np.nan
        pvalue=np.nan
        confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
        trendCI_low=confint[:,0]
        trendCI_high=confint[:,1]
    else:
        if CItype=="bootstrap":
            #values for slope, intercept and trend can be obtained as medians of bootstrap distributions, but normally analytical parameters are used instead
            # it the bootstrap bias (difference between analytical values and bootstap median) is strong, it might be better to use bootstrap values. 
            # These three lines would need to be uncommented then
#            slope=np.median(bs_slopes)
#            intcpt=np.median(bs_intcpts)
#            trend=intcpt+slope*x_var
            #these are from bootstrap too, but needs to be used for this accounts for autocorrelation, which is the point of this script
            CI_low,CI_high=np.percentile(bs_slopes, [5, 95])                
            ecdf=ECDF(bs_slopes)
            pvalue=ecdf(0)
            #this makes sure we are calculating p-value on the correct side of the distribution. That will be one-sided pvalue
            if pvalue>0.5:
                pvalue=1-pvalue
            confint=np.nanpercentile(bs_trends, [ci_low,ci_high], 1)
            print "bs_trends:", bs_trends.shape, confint.shape
            trendCI_low=confint[:,0]
            trendCI_high=confint[:,1]
        else:
            #this is for analytical calculation of trend confidence interval
            #it happens in the same way for each of the trend types, thus it is done here, not under the trendtype subroutines
            #making sure x are floats
            xtemp=np.array(x_var)*1.0
            #squared anomaly
            squanom=(xtemp-np.mean(xtemp))**2
            temp=((1./len(x_var))+(squanom/sum(squanom)))**0.5
            #standard error of estmation
            see=(np.nansum((np.array(y_var)-np.nanmean(y_pred))**2)/len(x_var))**0.5
            #adjusting ci
            ci_adj=1-((1-ci/100.)/2)
            #accounting for uncertainty in mean through student's t
            tcomp=t.ppf(ci_adj, len(x_var)-2)
            #confidence interval
            cint=tcomp*see*temp
            #for trend only
            trendCI_high=y_pred+cint
            trendCI_low=y_pred-cint

        print trendtype, "slope:",slope, "pvalue (one sided):", pvalue, "conf interval:", CI_low, CI_high, "autocorrelation adjustment:", autocorr, "\n"
    output={"slope":slope, "CI_high":CI_high, "CI_low":CI_high, "pvalue":pvalue, "trend": y_pred, "trendCI_low":trendCI_low, "trendCI_high":trendCI_high}
    return output