Example #1
0
    def __init__(self,
                 frame,
                 share_column='vote_share',
                 group_by='state',
                 covariate_columns=None,
                 weight_column=None,
                 year_column='year',
                 redistrict_column=None,
                 district_id='district_id',
                 missing='drop',
                 uncontested=None,
                 break_on_GIGO=True):
        super().__init__(frame,
                         share_column=share_column,
                         covariates=covariate_columns,
                         weight_column=weight_column,
                         year_column=year_column,
                         redistrict_column=redistrict_column,
                         district_id=district_id,
                         missing=missing,
                         uncontested=uncontested,
                         break_on_GIGO=break_on_GIGO)
        self._years = np.sort(self.long.year.unique())
        self._covariate_cols += ['grouped_vs']
        self._decade_starts = np.sort(
            list(set([_year_to_decade(yr) for yr in self.years])))
        self.decades = {dec: [] for dec in self._decade_starts}
        for i, (yr, wide) in enumerate(zip(self.years, self.wide)):
            if group_by is not None:
                grouped_vs = wide.groupby(
                    group_by).vote_share.mean().to_frame()
                grouped_vs.columns = ['grouped_vs']
                grouped_vs = grouped_vs.fillna(.5)
                self.wide[i] = wide.merge(grouped_vs,
                                          left_on=group_by,
                                          right_index=True)
                self.wide[i]['grouped_vs'] = self.wide[i]['grouped_vs'].fillna(
                    .5)
            else:
                grouped_vs = wide.vote_share.mean()
                self.wide[i]['grouped_vs'] = grouped_vs
            self.decades[_year_to_decade(yr)].append(self.wide[i])
        self.models = []

        for yr in self._decade_starts:
            self.decades[yr] = pd.concat(self.decades[yr], axis=0, sort=True)

            # WLS Yields incredibly precise simulation values? Not sure why.
            X = sm.add_constant(self.decades[yr][self._covariate_cols]).values
            Y = self.decades[yr].vote_share.values
            Y[np.isnan(Y)] = self.decades[yr]['grouped_vs'].values[np.isnan(Y)]
            if weight_column is None:
                weights = None
                self.models.append(sm.GLS(Y, X).fit())
            else:
                weights = self.decades[yr].weight
                self.models.append(sm.GLS(Y, X, sigma=weights).fit())
Example #2
0
def compare_models(y, smaller_model_list, bigger_model_list):
	model_null = sm.GLS(y,smaller_model_list)
	model_alternative = sm.GLS(y, bigger_model_list)
	params_null = sm.GLS(y,smaller_model_list).fit().params
	params_alternative = sm.GLS(y, bigger_model_list).fit().params
	null = model_null.loglike(params_null)
	alternative = model_alternative.loglike(params_alternative)
	info_small_model = model_null.fit().summary()
	info_big_model = model_alternative.fit().summary()
	rejected = llr_test(null, alternative)

	return rejected, info_small_model, info_big_model
Example #3
0
def best_model_in_the_class(dict_data, combinations):
	helper_dict = {}
	helper_list = []
	combinations_dict = {}
	r_squared = {i: [] for i in range(0, len(combinations))}
	for i in range(0, len(combinations)):
		combinations_dict[i] = combinations[i]
	#create weights
	#create proper data for Linear Regression
	for key in combinations_dict.keys():
		for i in range(0, len(dict_data['Intel'])):
			for item in combinations_dict[key]:
				helper_dict[i] = [v[i] for k, v in dict_data.items() if k in combinations_dict[key]]
				helper_list = [value for key, value in sorted(helper_dict.items())]
		model = sm.GLS(y, helper_list)
		regression = model.fit()
		r = regression.rsquared 
		r_squared[key].append(r)
	#map the results into final    
	determination_combination_dict = {}
	for k1 in combinations_dict.keys():
		for k2 in r_squared.keys():
			if k1 == k2:
				determination_combination_dict[float(r_squared[k2][0])] = list(combinations_dict[k1])

	#choose the best model among the class 
	best_r = max(determination_combination_dict.keys())
	best_model = determination_combination_dict[best_r]

	return best_model, determination_combination_dict, r_squared, helper_dict, helper_list
Example #4
0
def vcfassoc(formula, covariate_df, groups=None):

    y, X = patsy.dmatrices(str(formula), covariate_df, return_type='dataframe')
    # get the column containing genotype
    ix = get_genotype_ix(X)
    Binomial = sm.families.Binomial
    logit = sm.families.links.Logit()

    if groups is not None:
        #covariate_df['grps'] = map(str, range(len(covariate_df) / 8)) * 8
        if not isinstance(groups, (pd.DataFrame, np.ndarray)):
            cov = Exchangeable()
            model = sm.GEE(y,
                           X,
                           groups=covariate_df[groups],
                           cov_struct=cov,
                           family=Binomial())
        else:
            model = sm.GLS(logit(y), X, sigma=groups.ix[X.index, X.index])
    else:
        model = sm.GLM(y, X, missing='drop', family=Binomial())

    result = model.fit(maxiter=1000)
    res = {
        'OR': np.exp(result.params[ix]),
        'pvalue': result.pvalues[ix],
        'z': result.tvalues[ix],
        'OR_CI': tuple(np.exp(result.conf_int().ix[ix, :])),
    }
    try:
        res['df_resid'] = result.df_resid
    except AttributeError:
        pass
    return res
Example #5
0
def gls(data, xseq, **params):
    """
    Fit GLS
    """
    if params['formula']:
        return gls_formula(data, xseq, **params)

    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)

    init_kwargs, fit_kwargs = separate_method_kwargs(params['method_args'],
                                                     sm.OLS, sm.OLS.fit)
    model = sm.GLS(data['y'], X, **init_kwargs)
    results = model.fit(**fit_kwargs)

    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(results, Xseq, alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u

    return data
Example #6
0
def simple_regression_example():
    # Load data.
    spector_data = sm.datasets.spector.load()
    spector_data.exog = sm.add_constant(spector_data.exog, prepend=False)

    # Fit and summarize model.

    # OLS: ordinary least squares for i.i.d. errors Sigma = I.
    mod = sm.OLS(spector_data.endog, spector_data.exog)
    res = mod.fit()
    print(res.summary())

    # GLS: generalized least squares for arbitrary covariance Sigma.
    mod = sm.GLS(spector_data.endog, spector_data.exog)
    res = mod.fit()
    print(res.summary())

    # WLS: weighted least squares for heteroskedastic errors diag(Sigma).
    mod = sm.WLS(spector_data.endog, spector_data.exog)
    res = mod.fit()
    print(res.summary())

    # GLSAR: feasible generalized least squares with autocorrelated AR(p) errors Sigma = Sigma(rho).
    mod = sm.GLSAR(spector_data.endog, spector_data.exog)
    res = mod.fit()
    print(res.summary())
Example #7
0
def test_sm_GLSAR():
    print("Testing SM, GLSAR...")
    X, y = iris_data
    est = sm.GLS(y, X, rho=2)
    mod = est.fit()
    docs = {'name': "GLSAR test"}
    fv = X[0, :]
    upload(mod, fv, docs)
Example #8
0
def simple_model(X, Y, type="LR"):
    if type == "LR":
        simple_model = sm.GLS(Y, X)
    elif type == "NN":
        simple_model = NN_FF(10, X.shape[1])
    else:
        raise NotImplementedError("BAD BOI")
    return simple_model
Example #9
0
 def gls_fit(self, X, y, add_const=True):
     if add_const:
         X_ = sm.add_constant(X)
     mod = sm.GLS(y.values, np.asarray(X_))
     res = mod.fit()        
     #print(res.summary())
     res = res.get_robustcov_results()
     self.reg = res
     print(res.summary()) # Robusted Results
Example #10
0
def pgls(responseList, predictor, phyCovMatrix, intercept=True):
    #predictor = np.array(predictor).T # transpose it to make sure sm doesn't choke
    # TODO: should I set an intercept, or no?
    # Add a column of 1s at the beginning of exog so the model incorporates an intercept
    if intercept:
        predictor = np.array([(1, row)
                              for row in predictor])  # now it is n x 2
    # return the model
    return sm.GLS(endog=responseList, exog=predictor, sigma=phyCovMatrix)
Example #11
0
def test_sm_GLS():
    print("Testing SM, GLS...")
    data = sm.datasets.longley.load(as_pandas=False)
    X = sm.add_constant(data.exog)
    est = sm.GLS(data.endog, X, sigma=1)
    mod = est.fit()
    docs = {'name': "GLS test"}
    fv = X[0, :]
    upload(mod, fv, docs)
Example #12
0
    def compare_models(self, smaller_model_list, bigger_model_list):
        y = self.dependent_variable[list(self.dependent_variable.keys())[0]]

        model_null = sm.GLS(y, smaller_model_list)
        model_alternative = sm.GLS(y, bigger_model_list)

        params_null = sm.GLS(y, smaller_model_list).fit().params
        params_alternative = sm.GLS(y, bigger_model_list).fit().params

        null = model_null.loglike(params_null)
        alternative = model_alternative.loglike(params_alternative)

        self.info_small_model = model_null.fit().summary()
        self.info_big_model = model_alternative.fit().summary()

        self.rejected = self.llr_test(null, alternative)

        return self.rejected, self.info_small_model, self.info_big_model
Example #13
0
    def linear_regression(self):
        """
        Notes
        -----
        Only the following combinations make sense for family and link ::

                       + ident log logit probit cloglog pow opow nbinom loglog logc
          Gaussian     |   x    x                        x
          inv Gaussian |   x    x                        x
          binomial     |   x    x    x     x       x     x    x           x      x
          Poission     |   x    x                        x
          neg binomial |   x    x                        x          x
          gamma        |   x    x                        x

        Examples
        --------
        >>> from cost_functions import *
        >>> imp = imp = Import('./', 'nyc-condominium-dataset.csv')
        >>> df = imp.import_housing_data()
        >>> x_var = ['comp_full_market_value', 'comp2_full_market_value', 'gross_sqft']
        >>> reg = Regression(df, 'full_market_value', x_var, 2.5, 2.5)
        >>> df_final = reg.linear_regression()
        >>> cf = CostFunction(df_final, 'full_market_value', 'predicted', 'district', 10, 2.5, 2.5)
        >>> cf.all_glm()
        """
        df = self.df
        y = df[self.y]
        #Remove Outliers
        ol = y.describe()['mean'] - self.std_ol * y.describe()['std']
        oh = y.describe()['mean'] + self.std_oh * y.describe()['std']

        df = df[(df[self.y] > ol) & (df[self.y] < oh)]

        #Remove missing values
        df = df[(df['comp2_full_market_value'] > 0)
                & (df['comp_full_market_value'] > 0) &
                (df['full_market_value'] > 0)
                & (df[self.y] > ol) & (df[self.y] < oh)]
        df = df.reset_index(drop=True)
        y = df[self.y]
        #y.describe()

        x = sm.add_constant(df[self.x])
        #x.describe()
        #model = sm.GLM(Y, X, family = sm.families.Binomial())
        #model = sm.GLM(y, x, family=sm.families.Gaussian())
        #Note: GLS returns same results as GLM where you set const=1 and family=sm.families.Gaussian()
        m2 = sm.GLS(y, x)
        r2 = m2.fit()

        df_predicted = pd.DataFrame(r2.predict(), columns=['predicted'])
        df_final = df.combine_first(df_predicted)

        print r2.summary()

        return df_final
Example #14
0
    def refresh(self, X, Y):
        X, Y = self._prepare_data_for_fit(X, Y)
        ols_resid = sm.OLS(Y, X).fit().resid  # rezidua OLS
        res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit()  # vypocet korelace mezi rezidui
        rho = res_fit.params  # autoregresni parametr

        order = toeplitz(np.arange(len(ols_resid)))  # some magic
        sigma = rho ** order

        self._model = sm.GLS(Y, X, sigma=sigma).fit()
Example #15
0
def regr_gls_sm(y: Union[np.ndarray, pd.DataFrame],
                x: Union[np.ndarray, pd.DataFrame], **param):
    ''' Use:

    '''
    # X = np.column_stack( (np.ones(N), x**2) )   # ones at beg, BUT need length
    # if str(type(x)) == "<class 'numpy.ndarray'>" or type(x) is pd.core.frame.DataFrame:
    if type(x) == np.ndarray or type(x) is pd.DataFrame:
        X = sm.add_constant(x)
    else:
        X = np.array(x).T
        X = sm.add_constant(X)

    if not str(type(
            y)) == "<class 'numpy.ndarray'>" or not type(y) is pd.DataFrame:
        y = np.array(y)

    model = sm.OLS(y, X)
    fit_ols = model.fit()

    # fit_ols = strat['model'][eqn_f]['fit_model']
    # y, X = strat['model'][eqn_f]['df_train_filter'].iloc[:,0], strat['model'][eqn_f]['df_train_filter'].iloc[:,1:]
    ols_resid = fit_ols.resid

    ols_resid = ols_resid.to_numpy()
    resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
    # print(resid_fit.tvalues[1])
    # print(resid_fit.pvalues[1])

    rho = resid_fit.params[1]
    order = toeplitz(range(len(ols_resid)))

    # so that our error covariance structure is actually rho**order which defines an autocorrelation structure
    sigma = rho**order
    model_gls = sm.GLS(y, X, sigma=sigma)
    fit_gls = model_gls.fit()

    model_glsar = sm.GLSAR(y, X, 1)
    fit_glsar = model_glsar.iterative_fit(1)

    # print(strat['model'][eqn_f]['fit_model'].summary())
    # print(gls_results.summary())
    # print(glsar_results.summary())

    # strat['model'][eqn_f]['fit_gls'] = gls_results
    # strat['model'][eqn_f]['fit_glsar'] = glsar_results
    #
    # print(gls_results.params)
    # print(glsar_results.params)
    # print(gls_results.bse)
    # print(glsar_results.bse)

    return fit_gls, fit_glsar
    """
Example #16
0
    def __init__(self, data, normalize=False, t_value_threshold=2.3, **kwargs):
        # call parent function.
        RegressionModel.__init__(self, data, normalize=normalize, **kwargs)

        # placeholders specific to this class.
        self.model = None
        self.trained_model = None
        self.t_value_threshold = t_value_threshold

        # initialise a statsmodels OLS instance.
        self.model = sm.GLS(self.train_y, self.train_x)
Example #17
0
def GLS(Y, X, e, r):

    res_fit = sm1.OLS(e[1:], e[:-1]).fit()
    rho = res_fit.params
    order = toeplitz(np.arange(len(X)))
    sigma = rho**order
    gls_model = sm1.GLS(Y, X, sigma=sigma)
    gls_results = gls_model.fit()
    print(gls_results.summary())
    E = gls_results.resid

    return E
Example #18
0
def do_gls(y, X, covariance_matrix):
    '''
    generalized least squares using the est_covariance
    input: data, estimated covariance matrix
    output: result, coeff param, residual
    '''

    gls_mod = sm.GLS(endog=y, exog=X, sigma=covariance_matrix)
    gls_res = gls_mod.fit()
    residual = gls_res.resid

    return gls_res, residual
def LinearRegressionPrediction(mat, minidx, linlag):
    # load dataset
    n = len(mat);
    m = len(mat[0]);

    #calculate "latest range"
    startidx = max(minidx - linlag, 0); #???

    slopes = np.array([0.0] * m);
    slopes2 = np.array([0.0] * m);
    intercepts = np.array([0.0] * m);
    
    for i in range(0, m): # per column
        ts = mat[:, i];

        use_SM = False;
        slope = 0.0;
        intercept = 0.0;

        if use_SM:
            model = sm.GLS(np.array(ts[startidx:minidx]), range(startidx, minidx));
            model_fit = model.fit();
            slope = model_fit.params[0];
            intercept = model_fit.scale;
        else:
            model = scistat.linregress(np.array(range(startidx, minidx)), ts[startidx:minidx]);
            slope = model.slope;
            intercept = model.intercept;
        #end if
        slopes[i] = slope;
        intercepts[i] = intercept;

        slope = slope * 1.1;
        
        adjust_intercept = ts[minidx - 1] - ((minidx - 1) * slope + intercept);
        intercept = intercept + adjust_intercept;

        #prediction
        for j in range(minidx, n):
            if (j != minidx) and (((j - minidx) % 8) == 0):
                slope *= 0.85;
                adjust_intercept = ts[j - 1] - ((j - 1) * slope + intercept); # intercept depends on the slope for continuity of prediction
                intercept = intercept + adjust_intercept; # so it has to be re-adjusted if slope is changed
            #end if

            ts[j] = (j * slope) + intercept;
        #end for
        slopes2[i] = slope;
        mat[:, i] = ts;
    #end for
    #np.savetxt("/home/zakhar/ReVival/UDF/py_predict/test.txt", np.array([slopes, slopes2, intercepts]));
    return mat;
Example #20
0
def gls_model():
    # Generalized Least Squares (GLS)
    data = get_dataset("longley")
    data.exog = sm.add_constant(data.exog)
    ols_resid = sm.OLS(data.endog, data.exog).fit().resid
    res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit()
    rho = res_fit.params
    order = toeplitz(np.arange(16))
    sigma = rho**order
    gls = sm.GLS(data.endog, data.exog, sigma=sigma)
    model = gls.fit()

    return ModelWithResults(model=model, alg=gls, inference_dataframe=data.exog)
Example #21
0
def solve_GLS(X=None, y=None, sigma=None):
    """
    Solve a multiple linear problem using statsmodels GLS
    """
    goForGLS = X.copy()
    regr = sm.GLS(y, goForGLS, sigma=sigma).fit()
    while True:
        regr = sm.GLS(y, goForGLS, sigma=sigma).fit()
        # if (regr.pvalues > 0.05).any():
        if (regr.params < 0).any():
            # Some variable are 0, drop them.
            # goForGLS.drop(goForGLS.columns[regr.pvalues>0.05],axis=1,inplace=True)
            # goForGLS.drop(goForGLS.columns[regr.pvalues == max(regr.pvalues)],axis=1,inplace=True)
            goForGLS.drop(goForGLS.columns[regr.params == min(regr.params)],axis=1,inplace=True)
        else:
            # Ok, the run converged
            break
        if goForGLS.shape[1]==0:
            # All variable were droped... Pb
            print("Warning: The run did not converge...")
            break
    # print(regr.summary())
    return regr
    def getReturn(self):
        self._y = np.append(self._pi, self._q)
        self._V = np.zeros((self._gammaSigma.shape[0] + self._Omega.shape[0],
                            self._gammaSigma.shape[0] + self._Omega.shape[0]))
        self._V[:self._gammaSigma.shape[0], :self._gammaSigma.
                shape[0]] += self._gammaSigma
        self._V[self._gammaSigma.shape[0]:,
                self._gammaSigma.shape[0]:] += self._Omega
        self._X = np.vstack([np.identity(self._N), self._P])

        # Below is the statsmodels GLS implementation
        import statsmodels.api as sm
        self._fit = sm.GLS(self._y, self._X, sigma=self._V).fit()
        self._ret = self._fit.params
Example #23
0
def test_fast_scanner_statsmodel_gls():
    import statsmodels.api as sm
    from numpy.linalg import lstsq

    def _lstsq(A, B):
        return lstsq(A, B, rcond=None)[0]

    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    ols_resid = sm.OLS(data.endog, data.exog).fit().resid
    resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
    rho = resid_fit.params[1]
    order = toeplitz(range(len(ols_resid)))
    sigma = rho ** order

    QS = economic_qs(sigma)
    lmm = LMM(data.endog, data.exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()
    best_beta_se = _lstsq(data.exog.T @ _lstsq(lmm.covariance(), data.exog), eye(7))
    best_beta_se = sqrt(best_beta_se.diagonal())
    assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-5)

    endog = data.endog.copy()
    endog -= endog.mean(0)
    endog /= endog.std(0)

    exog = data.exog.copy()
    exog -= exog.mean(0)
    with errstate(invalid="ignore", divide="ignore"):
        exog /= exog.std(0)
    exog[:, 0] = 1

    lmm = LMM(endog, exog, QS)
    lmm.fit(verbose=False)

    sigma = lmm.covariance()
    scanner = lmm.get_fast_scanner()

    gls_model = sm.GLS(endog, exog, sigma=sigma)
    gls_results = gls_model.fit()
    beta_se = gls_results.bse
    our_beta_se = sqrt(scanner.null_beta_covariance.diagonal())
    # statsmodels scales the covariance matrix we pass, that is why
    # we need to account for it here.
    assert_allclose(our_beta_se, beta_se / sqrt(gls_results.scale))
    assert_allclose(scanner.null_beta_se, beta_se / sqrt(gls_results.scale))
Example #24
0
    def best_model_in_the_class(self, combinations):
        helper_dict = {}
        helper_list = []
        combinations_dict = {}
        y = self.dependent_variable[list(self.dependent_variable.keys())[0]]
        #r_squared = {i: [] for i in range(0, len(combinations))}
        aic_coef = {i: [] for i in range(0, len(combinations))}
        for i in range(0, len(combinations)):
            combinations_dict[i] = combinations[i]
        #create weights
        #create proper data for Linear Regression
        for key in combinations_dict.keys():
            for i in range(0, len(self.dict_data['Intel'])):
                for item in combinations_dict[key]:
                    helper_dict[i] = [
                        v[i] for k, v in self.dict_data.items()
                        if k in combinations_dict[key]
                    ]
                    helper_list = [
                        value for key, value in sorted(helper_dict.items())
                    ]
            model = sm.GLS(y, helper_list)
            regression = model.fit()
            aic = regression.aic
            aic_coef[key].append(aic)
            #r = regression.rsquared
            #r_squared[key].append(r)
        #map the results into final
        # determination_combination_dict = {}
        # for k1 in combinations_dict.keys():
        # 	for k2 in r_squared.keys():
        # 		if k1 == k2:
        # 			determination_combination_dict[float(r_squared[k2][0])] = list(combinations_dict[k1])

        aic_combinations_dict = {}
        for k1 in combinations_dict.keys():
            for k2 in aic_coef.keys():
                if k1 == k2:
                    aic_combinations_dict[float(aic_coef[k2][0])] = list(
                        combinations_dict[k1])
        best_aic = min(aic_combinations_dict.keys())
        self.best_model_in_class = aic_combinations_dict[best_aic]

        #choose the best model among the class
        #		best_r = max(determination_combination_dict.keys())
        #self.best_model_in_class = determination_combination_dict[best_r]

        return self.best_model_in_class
Example #25
0
def reg_m(y, x, estimator, weights=None):
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for ele in x[1:]:
        X = sm.add_constant(np.column_stack((ele, X)))

    if estimator == 'ols':
        return sm.OLS(y, X).fit()

    elif estimator == 'wls':
        return sm.WLS(y, X, weights).fit()

    elif estimator == 'gls':
        return sm.GLS(y, X).fit()

    return None
Example #26
0
def SPGLS(context):
    # 从 Context 中获取相关数据
    args = context.args
    # 查看上一节点发送的 args.inputData 数据
    df = args.inputData

    featureColumns = args.featureColumns
    labelColumn = args.labelColumn

    features = df[featureColumns].values
    label = df[labelColumn].values

    arma_mod = sm.GLS(label, features, missing=args.missing)
    arma_res = arma_mod.fit(method=args.method)

    return arma_res
Example #27
0
    def getReward(self):
        """Computes the metric for our reward"""
        # pdb.set_trace()
        X = self.X.T[self.features[0]].T
        if X.shape[1] == 0:
            return 0
        results = sm.GLS(self.Y, X).fit()

        # estimated weights
        # w_linalg = np.dot(np.dot(np.linalg.inv(np.dot(self.X.T[self.features[0]], self.X.T[self.features[0]].T)), self.X.T[self.features[0]]), self.Y)
        # # estimate R^2
        # R_sqred = np.square(np.dot(self.X.T[self.features[0]].T, w_linalg) - np.mean(self.Y)).sum(axis=0) / np.square(self.Y - np.mean(self.Y)).sum(axis=0)
        #
        # # estimated adjusted R^2
        # adj_R_squard = 1 - (1 - R_sqred**2)*(self.X.shape[0] -1) / (self.X.shape[0] -len(self.features[0])-1)
        return results.rsquared_adj
def calibrate(volume_duration, strikes, reps):
    volume_duration = volume_duration.unstack(['Half-spread', 'Strike'])

    arrival_rate = volume_duration.groupby('Class').apply(
        lambda c: compute_arrival_rate(c.loc[c.name, 'Volume'], c.loc[
            c.name, 'Duration'], strikes[c.name]))
    arrival_rate.name = 'Arrival rate'
    arrival_rate.index = arrival_rate.index.reorder_levels(
        ['Class', 'Strike', 'Half-spread'])

    sbs = volume_duration.groupby('Class').apply(lambda c: StationaryBootstrap(
        25, volume=c.loc[c.name, 'Volume'], duration=c.loc[c.name, 'Duration'])
                                                 )

    conf_int = sbs.groupby('Class').apply(lambda c: pd.DataFrame(
        c[c.name].conf_int(lambda volume, duration: compute_arrival_rate(
            volume, duration, strikes[c.name]),
                           reps=reps), ['2.5%', '97.5%'], arrival_rate.loc[
                               c.name].index))
    conf_int = conf_int.T.stack('Class')
    conf_int.index = conf_int.index.reorder_levels(
        ['Class', 'Strike', 'Half-spread'])

    sigma = sbs.groupby('Class').apply(lambda c: pd.DataFrame(
        c[c.name].cov(lambda volume, duration: compute_arrival_rate(
            volume, duration, strikes[c.name]),
                      reps=reps), arrival_rate.loc[c.name].index, arrival_rate.
        loc[c.name].index))
    sigma = sigma.groupby('Strike').apply(
        lambda k: k.xs(k.name, level='Strike', axis=1))
    sigma.dropna(how='all', inplace=True)
    gls = arrival_rate.loc[sigma.index].groupby(
        ['Class', 'Strike']).apply(lambda k: sm.GLS(
            k.values,
            sm.add_constant(k.index.get_level_values('Half-spread')),
            sigma=sigma.xs(k.name, level=['Class', 'Strike']).dropna(axis=1)).
                                   fit())
    params = gls.apply(lambda g: pd.Series([np.exp(g.params[0]), -g.params[1]],
                                           ['A', '$\\kappa$']))
    base_conf_int = gls.apply(lambda g: pd.Series(
        np.exp(g.conf_int(alpha=.1)[0]), ['A 5%', 'A 95%']))
    decay_conf_int = gls.apply(lambda g: pd.Series(
        -g.conf_int(alpha=.1)[1, ::-1], ['$\\kappa$ 5%', '$\\kappa$ 95%']))
    params = pd.concat([params, base_conf_int, decay_conf_int], axis=1)

    arrival_rate = np.exp(pd.concat([arrival_rate, conf_int], axis=1))
    return arrival_rate, params
Example #29
0
def gls(data, xseq, **params):
    """
    Fit GLS
    """
    X = sm.add_constant(data['x'])
    Xseq = sm.add_constant(xseq)
    results = sm.GLS(data['y'], X).fit(**params['method_args'])
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(Xseq)

    if params['se']:
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(results, Xseq, alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u

    return data
Example #30
0
def LL(X, Y, Xs, Ys, error):
    n = len(X)
    h = 0.1
    mean_of_error = np.zeros((len(Xs), len(Ys)))
    for i in range(len(Xs)):
        for j in range(len(Ys)):
            u1 = ((X - Xs[i]) / h)**2
            u2 = ((Y - Ys[j]) / h)**2
            k = (0.9375 *
                 (1 - ((X - Xs[i]) / h)**2)**2) * (0.9375 *
                                                   (1 -
                                                    ((Y - Ys[j]) / h)**2)**2)
            K = np.diag(k)
            indep = np.matrix(np.array([np.ones(n), X - Xs[i], Y - Ys[j]]).T)
            dep = np.matrix(np.array([error]).T)
            gls_model = sm.GLS(dep, indep, sigma=K)
            gls_results = gls_model.fit()
            mean_of_error[i, j] = gls_results.params[0]
    return mean_of_error