def reg2(T): global i print(i) i+=1 #防止全部为Nan if T.isnull().sum()!=T.shape[0]: window = 50 tscv = TimeSeriesSplit(n_splits = T.shape[0]-window+1) new_dd = pd.Series(np.NAN,index=T.index) for train_index, test_index in tscv.split(T): #print("TRAIN:", train_index[-window:], "TEST:", test_index) X, Y = T.iloc[train_index[-window:]],bench.iloc[train_index[-window:]] #防止全部为Nan if X.isnull().sum()!=X.shape[0]: X = sm.add_constant(X) model = OLS(Y,X,missing='drop') results = model.fit() res = results.resid.iloc[-1] new_dd.iloc[train_index[-1]] = res #计算最后一个 X, Y = T.iloc[-window:],bench.iloc[-window:] #防止全部为Nan if X.isnull().sum()!=X.shape[0]: X = sm.add_constant(X) model = OLS(Y,X,missing='drop') results = model.fit() res = results.resid.iloc[-1] new_dd.iloc[-1] = res return new_dd else: return T else: return T
def research_pair_trading_opportunity(currency1, currency2): name1, name2 = currency1.name, currency2.name print(f"Researching Pair {name1} and {name2}") model = OLS(currency1, sm.add_constant(currency2)) ols_results = model.fit() print("Prices OLS results:") print( f"const: {ols_results.params['const']} || {name2} {ols_results.params[name2]}" ) coint_series = currency1 - currency2 * ols_results.params[name2] coint_series.plot() plt.show() dependent_var = coint_series.diff()[1:] independent_var = coint_series.shift(1)[1:] independent_var.name = "val_prev" model = OLS(dependent_var, sm.add_constant(independent_var)) ols_results = model.fit() ols_results.params print("Diff of Cointegrating Series OLS Results:") print( f"const: {ols_results.params['const']} || {ols_results.params['val_prev']}" ) print("Mean-Reverse Half-life:", -np.log(2) / ols_results.params["val_prev"])
def test_permuted_ols_statsmodels_withcovar_multivariate(random_state=0): """Test permuted_ols with multiple tested variates and covariates. It is equivalent to fitting several models with only one tested variate. This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) # design parameters n_samples = 50 n_targets = 10 n_covars = 2 # create design target_vars = rng.randn(n_samples, n_targets) tested_var = rng.randn(n_samples, 1) confounding_vars = rng.randn(n_samples, n_covars) # statsmodels OLS fvals = np.empty((n_targets, 1)) test_matrix = np.array([[1.] + [0.] * n_covars]) for i in range(n_targets): ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars))) fvals[i] = ols.fit().f_test(test_matrix).fvalue[0][0] # permuted OLS _, orig_scores, _ = permuted_ols(tested_var, target_vars, confounding_vars, model_intercept=False, n_perm=0, random_state=random_state) assert_almost_equal(fvals, orig_scores, decimal=6) ### Adds intercept # permuted OLS _, orig_scores_addintercept, _ = permuted_ols(tested_var, target_vars, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state) # statsmodels OLS confounding_vars = np.hstack((confounding_vars, np.ones((n_samples, 1)))) fvals_addintercept = np.empty((n_targets, 1)) test_matrix = np.array([[1.] + [0.] * (n_covars + 1)]) for i in range(n_targets): ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars))) fvals_addintercept[i] = ols.fit().f_test(test_matrix).fvalue[0][0] assert_array_almost_equal(fvals_addintercept, orig_scores_addintercept, decimal=6)
def test_qr_equiv(cov_info): cov_type, cov_kwds = cov_info rs = np.random.RandomState(123498) x = rs.standard_normal((500, 3)) b = np.ones(3) y = x @ b + rs.standard_normal(500) mod = OLS(y, x) pinv_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds) qr_fit = mod.fit(cov_type=cov_type, cov_kwds=cov_kwds, method="qr") assert_allclose(pinv_fit.bse, qr_fit.bse)
def setup_class(cls): cls.cov_type = 'HAC' kwds = {'kernel': sw.weights_uniform, 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds) #for debugging cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags': 2})
def setup_class(cls): cls.cov_type = 'HAC' kwds={'kernel':sw.weights_uniform, 'maxlags':2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds) #for debugging cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags':2})
def test_permuted_ols_statsmodels_withcovar_multivariate(random_state=0): """Test permuted_ols with multiple tested variates and covariates. It is equivalent to fitting several models with only one tested variate. This test has a statsmodels dependance. There seems to be no simple, alternative way to perform a F-test on a linear model including covariates. """ try: from statsmodels.regression.linear_model import OLS except: warnings.warn("Statsmodels is required to run this test") raise nose.SkipTest rng = check_random_state(random_state) # design parameters n_samples = 50 n_targets = 10 n_covars = 2 # create design target_vars = rng.randn(n_samples, n_targets) tested_var = rng.randn(n_samples, 1) confounding_vars = rng.randn(n_samples, n_covars) # statsmodels OLS fvals = np.empty((n_targets, 1)) test_matrix = np.array([[1.0] + [0.0] * n_covars]) for i in range(n_targets): ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars))) fvals[i] = ols.fit().f_test(test_matrix).fvalue[0][0] # permuted OLS _, orig_scores, _ = permuted_ols( tested_var, target_vars, confounding_vars, model_intercept=False, n_perm=0, random_state=random_state ) assert_almost_equal(fvals, orig_scores, decimal=6) ### Adds intercept # permuted OLS _, orig_scores_addintercept, _ = permuted_ols( tested_var, target_vars, confounding_vars, model_intercept=True, n_perm=0, random_state=random_state ) # statsmodels OLS confounding_vars = np.hstack((confounding_vars, np.ones((n_samples, 1)))) fvals_addintercept = np.empty((n_targets, 1)) test_matrix = np.array([[1.0] + [0.0] * (n_covars + 1)]) for i in range(n_targets): ols = OLS(target_vars[:, i], np.hstack((tested_var, confounding_vars))) fvals_addintercept[i] = ols.fit().f_test(test_matrix).fvalue[0][0] assert_array_almost_equal(fvals_addintercept, orig_scores_addintercept, decimal=6)
def test_estimates(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test for start_params assert_equal(mod.start_params, 0) # Test the RLS coefficient estimates against those from R (quantreg) # Due to initialization issues, we get more agreement as we get # farther from the initial values. assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T, results_R.iloc[:8][['beta1', 'beta2']], atol=1e-2, rtol=1e-3) assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T, results_R.iloc[7:18][['beta1', 'beta2']], atol=1e-3, rtol=1e-4) assert_allclose(res.recursive_coefficients.filtered[:, 19:].T, results_R.iloc[17:][['beta1', 'beta2']], atol=1e-4, rtol=1e-4) # Test the RLS estimates against OLS estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() assert_allclose(res.params, res_ols.params)
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'): merge_tab = t.join(tab, sfrsd_tab, 'plateifu') is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4]) mlb_ix = totalmass.StellarMass.bands_ixs[mlb] absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix] logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun)) logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab['ifu_absmag'][:, mlb_ix].to( u.dex(m.bandpass_sol_l_unit), totalmass.bandpass_flux_to_solarunits(absmag_sun_mlb)) merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3) sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2 mass_pca = merge_tab['mass_in_ifu'] + merge_tab['outer_mass_{}'.format(mltype)] ssfrsd = sfrsd / mass_pca merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit)) merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit ols = OLS( endog=np.array(merge_tab['dlogmass_lw'][~is_agn]), exog=sm_add_constant( t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(), prepend=False), hasconst=True, missing='drop') olsfit = ols.fit() return olsfit
def setup(self): model = OLS(self.res1.model.endog, self.res1.model.exog) # res_ols = self.res1.model.fit(cov_type='cluster', res_ols = model.fit( cov_type="cluster", cov_kwds=dict( groups=self.groups, use_correction=False, use_t=False, df_correction=True, ), ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_large self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def fit_ols(regressors, x): X = c_[list(regressors.values())].T X1 = DataFrame(X, columns=regressors.keys()) X1 = add_constant(X1) model = OLS(x, X1, missing='drop') return model.fit()
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'): merge_tab = t.join(tab, sfrsd_tab, 'plateifu') is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4]) mlb_ix = totalmass.StellarMass.bands_ixs[mlb] absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix] logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun)) logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab[f'logsollum_in_ifu_{mlb}'] merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw std_atten_mwtd = merge_tab['std_atten_mwtd'] mean_atten_mwtd = merge_tab['mean_atten_mwtd'] ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3) sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2 outer_mass = (merge_tab[f'outerml_{mltype}'] + \ merge_tab[f'logsollum_outer_{mlb}']).to(u.Msun) mass_pca = merge_tab['mass_in_ifu'].to(u.Msun) + outer_mass ssfrsd = sfrsd / mass_pca merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit)) merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit ols = OLS( endog=np.array(merge_tab['dlogmass_lw'][~is_agn]), exog=sm_add_constant( t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(), prepend=False), hasconst=True, missing='drop') olsfit = ols.fit() return olsfit
def port_ret_summary(): output = {} for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']: for suffix in ['', ' #no peg']: ret = get_port_ret_df(prefix, suffix) ret.loc[:, 'carry'] = data['carry' + suffix] ret = ret.dropna() label = prefix + suffix t = ret.mean() / ret.std() * len(ret) ** 0.5 # t on mean return # regress on carry x = sm.add_constant(ret['carry']) ols_series = pd.Series() for i in range(7): if i == 6: olslabel = 'HML' y = ret[5] - ret[0] else: olslabel = str(i) y = ret[i] model = OLS(y, x) results = model.fit() ols_series = ols_series.combine_first( pd.Series({'alpha to carry': results.params['const'] * 12, 'beta to carry': results.params['carry'], 't(alpha to carry)': results.tvalues['const'], 't(beta to carry)': results.tvalues['carry']} ).add_suffix('#' + olslabel)) output[label] = (ret.mean().multiply(12)).add_prefix('mean return#').combine_first( t.add_prefix('t(mean return)#')).combine_first( pd.Series({'nobs': len(ret)})).combine_first(ols_series) output = pd.DataFrame(output)
def test_resid_recursive(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test the recursive residuals against those from R (strucchange) # Due to initialization issues, we get more agreement as we get # farther from the initial values. assert_allclose(res.resid_recursive[2:10].T, results_R.iloc[:8]['rec_resid'], atol=1e-2, rtol=1e-3) assert_allclose(res.resid_recursive[9:20].T, results_R.iloc[7:18]['rec_resid'], atol=1e-3, rtol=1e-4) assert_allclose(res.resid_recursive[19:].T, results_R.iloc[17:]['rec_resid'], atol=1e-4, rtol=1e-4) # Test the RLS estimates against those from Stata (cusum6) assert_allclose(res.resid_recursive[3:], results_stata.iloc[3:]['rr'], atol=1e-3) # Test the RLS estimates against statsmodels estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:] assert_allclose(res.resid_recursive[2:], desired_resid_recursive, atol=1e-4, rtol=1e-4)
def regress_out_pupils(raw, ocular_channels=['Fpz', 'Fp1', 'Fp2', 'AF7', 'AF8'], method='PCA'): """ raw: Continuous raw data in MNE format ocular_channels: can be labels of EOG channels or EEG channels close to the eyes if no EOG was recorded method: how to combine the ocular channels. Can be 'PCA', 'mean', or 'median'. """ raw_data = raw.get_data(picks='eeg') ocular_data = raw.get_data(picks=ocular_channels) if method == 'PCA': pca = PCA() comps = pca.fit_transform(ocular_data.T) ocular_chan = comps[:, 0] elif method == 'mean': ocular_chan = np.mean(ocular_data, axis=0) elif method == 'median': ocular_chan = np.median(ocular_data, axis=0) for ch in range(raw_data.shape[0]): m = OLS(raw_data[ch, :], ocular_chan) raw_data[ch, :] -= m.fit().predict() raw._data[:raw_data.shape[0], :] = raw_data return raw
def rew_prev_behaviour(data): dm = data['DM'][0] results_array = [] std_err = [] for s, sess in enumerate(dm): DM = dm[s] choices = DM[:, 1] reward = DM[:, 2] reward_2_ago = reward[1:-2] reward_3_ago = reward[:-3] reward_prev = reward[2:-1] reward_current = reward[3:] choices_2_ago = 0.5 - choices[1:-2] choices_3_ago = 0.5 - choices[:-3] choices_prev = 0.5 - choices[2:-1] choices_current = 0.5 - choices[3:] choices_2_ago_rew = ((choices_2_ago) * (reward_2_ago - 0.5)) * 2 choices_3_ago_rew = ((choices_3_ago) * (reward_3_ago - 0.5)) * 2 choices_prev_rew = ((choices_prev) * (reward_prev - 0.5)) * 2 ones = np.ones(len(choices_current)) trials = len(choices_current) predictors_all = OrderedDict([ ('1 ago Outcome', reward_prev), ('2 ago Outcome', reward_2_ago), ('3 ago Outcome', reward_3_ago), # ('4 ago Outcome', reward_4_ago), ('1 ago Choice', choices_prev), ('2 ago Choice', choices_2_ago), ('3 ago Choice', choices_3_ago), # ('4 ago Choice', choices_4_ago), ('1 ago Choice Rew', choices_prev_rew), ('2 ago Choice Rew', choices_2_ago_rew), ('3 ago Choice Rew', choices_3_ago_rew), # ('4 ago Choice Rew', choices_4_ago_rew), ('ones', ones) ]) X = np.vstack(predictors_all.values()).T[:trials, :].astype(float) #choices_current = choices_current.reshape(trials,1) rank = np.linalg.matrix_rank(X) n_predictors = X.shape[1] #model = sm.Logit(choices_current,X) model = OLS(choices_current, X) results = model.fit() results_array.append(results.params) cov = results.cov_params() std_err.append(np.sqrt(np.diag(cov))) average = np.sum((results_array), 0) / np.sqrt(np.sum(std_err, 0))
def setup_class(cls): cls.cov_type = 'HC0' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HC0') mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HC0')
def ols_autoreg_result(request): ar, seasonal, trend, exog, cov_type = request.param y, x, endog, exog = gen_ols_regressors(ar, seasonal, trend, exog) ar_mod = AutoReg(y, ar, seasonal=seasonal, trend=trend, exog=x) ar_res = ar_mod.fit(cov_type=cov_type) ols = OLS(endog, exog) ols_res = ols.fit(cov_type=cov_type, use_t=False) return ar_res, ols_res
def setup_class(cls): cls.cov_type = 'cluster' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def stats(self, parent): from statsmodels.regression.linear_model import OLS model = OLS(parent.endog, parent.exog) result = model.fit() q = len(result.params) // 2 stats = np.abs(result.params[0:q]) - np.abs(result.params[q:]) return stats
def bivariate_expression_plot( ax: plt.Axes, data: [np.ndarray, np.ndarray], feature: str, feature_name: str = "Feature", cmap: colormap = plt.cm.magma, alpha: float = 0.05, distance_scale_factor: float = 1, **kwargs, ) -> np.ndarray: xs = data[0] ys = data[1] model_full = OLS(ys, xs, hasconst=True) model_x1 = OLS(ys, xs[:, [0, 1]]) model_x2 = OLS(ys, xs[:, [0, 2]]) model_0 = OLS(ys, xs[:, 0]) results_full = model_full.fit() results_x1 = model_x1.fit() results_x2 = model_x2.fit() results_0 = model_0.fit() likelihood = np.array( [results_full.llf, results_x1.llf, results_x2.llf, results_0.llf]) insig = np.any(results_full.pvalues > alpha) XY, Z, reshape_shape = expression_fields(xs, ys, results_full) XY = XY * distance_scale_factor plot_field(ax, Z.reshape(reshape_shape), XY, fontsize=kwargs.get("label_fontsize", 15), cmap=cmap) ax.set_title("{} : {}".format(feature_name, feature) + ("(*)" if insig else ""), fontsize=kwargs.get("title_fontsize", 25)) return likelihood
def estimate_ols(x, y, constant=True): from statsmodels.regression.linear_model import OLS if constant == True: reg = OLS(y, add_constant(x)) else: reg = OLS(y, x) #Logistic regression result = reg.fit() betahat = result.params return np.array(betahat)
def polynomial_regression(df, target_col, cutoff_date, y_max, min_license_year, max_license_year, degree=5, type=""): print(f"{'=' * 10}Polynomial Regression {degree} Method Results{'=' * 10}") df.reset_index(inplace=True) X = df[["date", "treatment"]].astype(int).values y = df[[target_col]].values polyfeatures = PolynomialFeatures(degree, include_bias=False).fit_transform(X[:, 0].reshape(-1, 1)) X_c = np.concatenate([polyfeatures, X[:, 1].reshape(-1, 1)], axis=1) X_sm = sm.add_constant(X_c.copy()) lr_stats = OLS(y, X_sm) results = lr_stats.fit(method='qr') polyreg = LinearRegression() polyreg.fit(X_c, y) effect = polyreg.coef_[0][-1] print(f"Treatment effect on {target_col} is {effect}") print(f"CI: {np.round(results.conf_int()[-1], 3)}, pvalue={round(results.pvalues[-1], 3)}") plt.scatter(X[:, 0], y, c="black") plt.xlim([min_license_year - 0.2, max_license_year + 0.2]) # 2005.8, 2018.2 plt.ylim([0, y_max]) X0 = X_c[X_c[:, -1] == 0] cutoff_date_polyfeatures = PolynomialFeatures(degree, include_bias=False).fit_transform(np.array([cutoff_date]).reshape(-1, 1)) X0 = np.concatenate([X0, [list(cutoff_date_polyfeatures[0]) + [0]]]) X1 = X_c[X_c[:, -1] == 1] X1 = np.concatenate([[list(cutoff_date_polyfeatures[0]) + [1]], X1]) # 300 represents number of points to make between T.min and T.max xnew = np.linspace(X0[:, 0].min(), X0[:, 0].max(), 300) spl = make_interp_spline(X0[:, 0], polyreg.predict(X0), k=3) # type: BSpline power_smooth = spl(xnew) plt.plot(xnew, power_smooth, label="old accompaniment program") xnew = np.linspace(X1[:, 0].min(), X1[:, 0].max(), 300) spl = make_interp_spline(X1[:, 0], polyreg.predict(X1), k=3) # type: BSpline power_smooth = spl(xnew) plt.plot(xnew, power_smooth, label="new accompaniment program") # plt.plot(X0[:, 0], polyreg.predict(X0), c="blue", label="old accompaniment program") # plt.plot(X1[:, 0], polyreg.predict(X1), c="orange", label="new accompaniment program") plt.axvline(x=cutoff_date, linestyle='--', c="black", label="cut-off date") plt.xlabel("Year of issued license") if target_col == "normalized_number_of_drivers_in_accidents": ylabel = "number of drivers in accidents in 2019 per 10K drivers" else: ylabel = "number of drivers in accidents in 2019" plt.ylabel(ylabel) plt.ylabel(ylabel) plt.title(f"RD by Polynomial Regression w/ degree {degree}") plt.legend() plt.savefig(f"results/PolynomialRegression_deg_{degree}_{target_col}_{type}.png") plt.show() return effect
def test_filter(): # Basic test for filtering mod = RecursiveLS(endog, exog) res = mod.filter() # Test the RLS estimates against OLS estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() assert_allclose(res.params, res_ols.params)
def test_endog(): # Tests for numpy input mod = RecursiveLS(endog.values, exog.values) res = mod.fit() # Test the RLS estimates against OLS estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() assert_allclose(res.params, res_ols.params) # Tests for 1-dim exog mod = RecursiveLS(endog, dta['m1'].values) res = mod.fit() # Test the RLS estimates against OLS estimates mod_ols = OLS(endog, dta['m1']) res_ols = mod_ols.fit() assert_allclose(res.params, res_ols.params)
def fit_linear_model(formula, data_dict): """ Creates a statsmodel OLS model for the R-style (patsy) formula given the variables in data_dict (created with make_dict_for_regression). Returns the statsmodels results object. """ y, x = dmatrices(formula, data=data_dict, return_type='dataframe') model = OLS(y, x) return model.fit()
def table_2_f_test(): output = {} # f-test on whether all mean returns / alphas are the same for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']: for suffix in ['', ' #no peg']: ret = get_port_ret_df(prefix, suffix) ret.loc[:,'carry'] = data['carry' + suffix] ret = ret.dropna() y = ret.drop('carry', axis=1).unstack() if prefix == 'part1_carry_timed_dollar_port#': x = {} for i in range(6): x['meandiff#' + str(i)] = y * 0 x['meandiff#' + str(i)].loc[i:] = 1 x = pd.DataFrame(x) # test on alpha for i in range(6): df = pd.DataFrame(0, index=ret.index, columns=range(6)) df[i] = ret['carry'] x.loc[:, 'carry#' + str(i)] = df.unstack() model = OLS(y, x) results = model.fit(cov_type='cluster', cov_kwds={'groups': y.index.get_level_values(1)}) rmat = np.identity(len(results.params))[1:6, :] f_test = results.f_test(rmat) label = prefix + suffix + ' f-test on alpha to carry' output[label] = pd.Series({'f': f_test.fvalue[0][0], 'pvalue': f_test.pvalue, 'nobs': results.nobs}) # test on returns x = {} for i in range(6): x['meandiff#' + str(i)] = y * 0 x['meandiff#' + str(i)].loc[i:] = 1 x = sm.add_constant(pd.DataFrame(x)) model = OLS(y, x) results = model.fit(cov_type='cluster', cov_kwds={'groups': y.index.get_level_values(1)}) rmat = np.identity(len(results.params))[1:,:] f_test = results.f_test(rmat) label = prefix + suffix + ' f-test' output[label] = pd.Series({'f': f_test.fvalue[0][0], 'pvalue': f_test.pvalue, 'nobs': results.nobs}) output = pd.DataFrame(output).T return output
def setup_class(cls): cls.cov_type = 'HAC' kwds = {'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def model_m_L(data): log_m, log_L = np.log10(data).dropna().as_matrix().T X = add_constant(log_m) model = OLS(log_L, X) results = model.fit() print(results.summary()) return results.params
def setup_class(cls): cls.cov_type = 'HAC' kwds={'maxlags':2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): cls.cov_type = 'HAC' kwds={'kernel': sw.weights_uniform, 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) # check kernel as string mod2 = OLS(endog, exog) kwds2 = {'kernel': 'uniform', 'maxlags': 2} cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): cls.cov_type = 'HAC' # check kernel specified as string kwds = {'kernel': 'bartlett', 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) kwds2 = {'maxlags': 2} cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds2)
def q1_lab4(X, Y): model = OLS(Y, X) # linear reg with c lines and k variables result = model.fit() # fit the model # Question 1.2: # q_j = vector 8th len - the prob to vote per party in israel (if ***everybody would vote***) potential_per_party = (X * result.params).sum(axis=0) total_potential = potential_per_party.sum() # sum of sum q_j_hat = potential_per_party / total_potential # ratio like we saw in cass return q_j_hat
def test_cusum(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test the cusum statistics against those from R (strucchange) # These values are not even close to ours, to Statas, or to the alternate # statsmodels values # assert_allclose(res.cusum, results_R['cusum']) # Test the cusum statistics against Stata (cusum6) # Note: cusum6 excludes the first 3 elements due to OLS initialization # whereas we exclude only the first 2. Also there are initialization # differences (as seen above in the recursive residuals). # Here we explicitly reverse engineer our cusum to match their to show the # equivalence d = res.nobs_diffuse cusum = res.cusum * np.std(res.resid_recursive[d:], ddof=1) cusum -= res.resid_recursive[d] cusum /= np.std(res.resid_recursive[d + 1:], ddof=1) cusum = cusum[1:] assert_allclose(cusum, results_stata.iloc[3:]['cusum'], atol=1e-6, rtol=1e-5) # Test the cusum statistics against statsmodels estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() desired_cusum = recursive_olsresiduals(res_ols)[-2][1:] assert_allclose(res.cusum, desired_cusum, rtol=1e-6) # Test the cusum bounds against Stata (cusum6) # Again note that cusum6 excludes the first 3 elements, so we need to # change the ddof and points. actual_bounds = res._cusum_significance_bounds(alpha=0.05, ddof=1, points=np.arange( d + 1, res.nobs)) desired_bounds = results_stata.iloc[3:][['lw', 'uw']].T assert_allclose(actual_bounds, desired_bounds, rtol=1e-6) # Test the cusum bounds against statsmodels actual_bounds = res._cusum_significance_bounds(alpha=0.05, ddof=0, points=np.arange( d, res.nobs)) desired_bounds = recursive_olsresiduals(res_ols)[-1] assert_allclose(actual_bounds, desired_bounds) # Test for invalid calls assert_raises(ValueError, res._cusum_squares_significance_bounds, alpha=0.123)
def _get_start(self): # Use OLS to get starting values for mean structure parameters model = OLS(self.endog, self.exog) result = model.fit() m = self.exog_scale.shape[1] + self.exog_smooth.shape[1] if self._has_noise: m += self.exog_noise.shape[1] return np.concatenate((result.params, np.zeros(m)))
def setup_class(cls): cls.cov_type = 'hac-panel' # time index is just made up to have a test case groups = np.repeat(np.arange(5), 7)[:-1] mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian()) kwds = dict(groups=pd.Series(groups), # check for #3606 maxlags=2, kernel=sw.weights_uniform, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def setup_class(cls): cls.cov_type = 'hac-groupsum' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog, exog, family=families.Gaussian()) kwds = dict(time=pd.Series(time), # check for #3606 maxlags=2, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-groupsum', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-groupsum', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-groupsum', cov_kwds=kwds)
def test_regularized_refit(): n = 100 p = 5 np.random.seed(3132) xmat = np.random.normal(size=(n, p)) # covariates 0 and 2 matter yvec = xmat[:, 0] + xmat[:, 2] + np.random.normal(size=n) model1 = OLS(yvec, xmat) result1 = model1.fit_regularized(alpha=2., L1_wt=0.5, refit=True) model2 = OLS(yvec, xmat[:, [0, 2]]) result2 = model2.fit() ii = [0, 2] assert_allclose(result1.params[ii], result2.params) assert_allclose(result1.bse[ii], result2.bse)
def setup_class(cls): cls.cov_type = 'hac-panel' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian()) kwds = dict(time=time, maxlags=2, kernel=sw.weights_uniform, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-panel', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def test_cusum(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test the cusum statistics against those from R (strucchange) # These values are not even close to ours, to Statas, or to the alternate # statsmodels values # assert_allclose(res.cusum, results_R['cusum']) # Test the cusum statistics against Stata (cusum6) # Note: cusum6 excludes the first 3 elements due to OLS initialization # whereas we exclude only the first 2. Also there are initialization # differences (as seen above in the recursive residuals). # Here we explicitly reverse engineer our cusum to match their to show the # equivalence d = res.nobs_diffuse cusum = res.cusum * np.std(res.resid_recursive[d:], ddof=1) cusum -= res.resid_recursive[d] cusum /= np.std(res.resid_recursive[d+1:], ddof=1) cusum = cusum[1:] assert_allclose(cusum, results_stata.iloc[3:]['cusum'], atol=1e-6, rtol=1e-5) # Test the cusum statistics against statsmodels estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() desired_cusum = recursive_olsresiduals(res_ols)[-2][1:] assert_allclose(res.cusum, desired_cusum, rtol=1e-6) # Test the cusum bounds against Stata (cusum6) # Again note that cusum6 excludes the first 3 elements, so we need to # change the ddof and points. actual_bounds = res._cusum_significance_bounds( alpha=0.05, ddof=1, points=np.arange(d+1, res.nobs)) desired_bounds = results_stata.iloc[3:][['lw', 'uw']].T assert_allclose(actual_bounds, desired_bounds, rtol=1e-6) # Test the cusum bounds against statsmodels actual_bounds = res._cusum_significance_bounds( alpha=0.05, ddof=0, points=np.arange(d, res.nobs)) desired_bounds = recursive_olsresiduals(res_ols)[-1] assert_allclose(actual_bounds, desired_bounds) # Test for invalid calls assert_raises(ValueError, res._cusum_squares_significance_bounds, alpha=0.123)
def fit(self): """ Fits the model and provides regression results. Returns ------- Results : class Empirical likelihood regression class """ exog_with = add_constant(self.exog, prepend=True) restricted_model = OLS(self.endog, exog_with) restricted_fit = restricted_model.fit() restricted_el = restricted_fit.el_test( np.array([0]), np.array([0]), ret_params=1) params = np.squeeze(restricted_el[3]) beta_hat_llr = restricted_el[0] llf = np.sum(np.log(restricted_el[2])) return OriginResults(restricted_model, params, beta_hat_llr, llf)
def setup(self): model = OLS(self.res1.model.endog, self.res1.model.exog) # res_ols = self.res1.model.fit(cov_type='cluster', res_ols = model.fit( cov_type="cluster", cov_kwds=dict(groups=self.groups, use_correction=False, use_t=False, df_correction=True) ) self.res3 = self.res1 self.res1 = res_ols self.bse_robust = res_ols.bse self.cov_robust = res_ols.cov_params() cov1 = sw.cov_cluster(self.res1, self.groups, use_correction=False) se1 = sw.se_cov(cov1) self.bse_robust2 = se1 self.cov_robust2 = cov1 self.small = False self.res2 = res2.results_cluster_large self.skip_f = True self.rtol = 1e-6 self.rtolh = 1e-10
def structure(self): # Make the chart label which predictor was removed '''Reruns the regression by removing one of the predictor columns and then plots the residuals versus the target''' # The length of the transpose of the predictors array # gives the number of predictors in the model model_list = [] for i in range(1, len(self.predictors_array.transpose())): temp_target = self.predictors_array[:, i].reshape([len( self.predictors_array), 1]) temp_model = OLS(temp_target, np.delete(self.predictors_array, i, 1)) temp_results = temp_model.fit() model_list.append(temp_results) del temp_target del temp_model for model in model_list: plt.scatter(model.fittedvalues, model.resid) plt.show()
def fit(self): """ Fits the model and provides regression results. Returns ------- Results: class Empirical likelihood regression class """ exog_with = add_constant(self.exog, prepend=True) unrestricted_fit = OLS(self.endog, self.exog).fit() restricted_model = OLS(self.endog, exog_with) restricted_fit = restricted_model.fit() restricted_el = restricted_fit.el_test( np.array([0]), np.array([0]), ret_params=1) params = np.squeeze(restricted_el[3]) beta_hat_llr = restricted_el[0] ls_params = np.hstack((0, unrestricted_fit.params)) ls_llr = restricted_fit.el_test(ls_params, np.arange(self.nvar + 1, dtype=int))[0] return OriginResults(restricted_model, params, beta_hat_llr, ls_llr)
def setupClass(cls): from .results.results_regression import Longley data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() res2 = Longley() res2.wresid = res1.wresid # workaround hack cls.res1 = res1 cls.res2 = res2 res_qr = OLS(data.endog, data.exog).fit(method="qr") model_qr = OLS(data.endog, data.exog) Q, R = np.linalg.qr(data.exog) model_qr.exog_Q, model_qr.exog_R = Q, R model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R)) model_qr.rank = np_matrix_rank(R) res_qr2 = model_qr.fit(method="qr") cls.res_qr = res_qr cls.res_qr_manual = res_qr2
def test_resid_recursive(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test the recursive residuals against those from R (strucchange) assert_allclose(res.resid_recursive[2:10].T, results_R.iloc[:8]['rec_resid']) assert_allclose(res.resid_recursive[9:20].T, results_R.iloc[7:18]['rec_resid']) assert_allclose(res.resid_recursive[19:].T, results_R.iloc[17:]['rec_resid']) # Test the RLS estimates against those from Stata (cusum6) assert_allclose(res.resid_recursive[3:], results_stata.iloc[3:]['rr'], atol=1e-5, rtol=1e-5) # Test the RLS estimates against statsmodels estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() desired_resid_recursive = recursive_olsresiduals(res_ols)[4][2:] assert_allclose(res.resid_recursive[2:], desired_resid_recursive)
def test_estimates(): mod = RecursiveLS(endog, exog) res = mod.fit() # Test for start_params assert_equal(mod.start_params, 0) # Test the RLS coefficient estimates against those from R (quantreg) # Due to initialization issues, we get more agreement as we get # farther from the initial values. assert_allclose(res.recursive_coefficients.filtered[:, 2:10].T, results_R.iloc[:8][['beta1', 'beta2']], rtol=1e-5) assert_allclose(res.recursive_coefficients.filtered[:, 9:20].T, results_R.iloc[7:18][['beta1', 'beta2']]) assert_allclose(res.recursive_coefficients.filtered[:, 19:].T, results_R.iloc[17:][['beta1', 'beta2']]) # Test the RLS estimates against OLS estimates mod_ols = OLS(endog, exog) res_ols = mod_ols.fit() assert_allclose(res.params, res_ols.params)
def test_single_partition(): # tests that the results make sense if we have a single partition np.random.seed(435265) N = 200 p = 10 m = 1 beta = np.random.normal(size=p) beta = beta * np.random.randint(0, 2, p) X = np.random.normal(size=(N, p)) y = X.dot(beta) + np.random.normal(size=N) # test regularized OLS v. naive db_mod = DistributedModel(m) fitOLSdb = db_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0}) nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive, join_method=_join_naive) fitOLSnv = nv_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0}) ols_mod = OLS(y, X) fitOLS = ols_mod.fit(alpha=0) assert_allclose(fitOLSdb.params, fitOLS.params) assert_allclose(fitOLSnv.params, fitOLS.params) # test regularized nv_mod = DistributedModel(m, estimation_method=_est_regularized_naive, join_method=_join_naive) fitOLSnv = nv_mod.fit(_data_gen(y, X, m), fit_kwds={"alpha": 0.1}) ols_mod = OLS(y, X) fitOLS = ols_mod.fit_regularized(alpha=0.1) assert_allclose(fitOLSnv.params, fitOLS.params)
#using GMM and IV2SLS classes #---------------------------- mod = IVGMM(endog, exog, instrument, nmoms=instrument.shape[1]) res = mod.fit() modgmmols = IVGMM(endog, exog, exog, nmoms=exog.shape[1]) resgmmols = modgmmols.fit() #the next is the same as IV2SLS, (Z'Z)^{-1} as weighting matrix modgmmiv = IVGMM(endog, exog, instrument, nmoms=instrument.shape[1]) #same as mod resgmmiv = modgmmiv.fitgmm(np.ones(exog.shape[1], float), weights=np.linalg.inv(np.dot(instrument.T, instrument))) modls = IV2SLS(endog, exog, instrument) resls = modls.fit() modols = OLS(endog, exog) resols = modols.fit() print '\nIV case' print 'params' print 'IV2SLS', resls.params print 'GMMIV ', resgmmiv # .params print 'GMM ', res.params print 'diff ', res.params - resls.params print 'OLS ', resols.params print 'GMMOLS', resgmmols.params print '\nbse' print 'IV2SLS', resls.bse print 'GMM ', mod.bse #bse currently only attached to model not results print 'diff ', mod.bse - resls.bse print '%-diff', resls.bse / mod.bse * 100 - 100
nobs, k_vars = 200, 1 x = np.random.uniform(-2, 2, size=(nobs, k_vars)) x.sort() order = 3 exog = x ** np.arange(order + 1) beta = np.array([1, 1, 0.1, 0.0])[: order + 1] # 1. / np.arange(1, order + 2) y_true = np.dot(exog, beta) y = y_true + sig_e * np.random.normal(size=nobs) endog = y print "DGP" print "nobs=%d, beta=%r, sig_e=%3.1f" % (nobs, beta, sig_e) mod_ols = OLS(endog, exog[:, :2]) res_ols = mod_ols.fit() #'cv_ls'[1000, 0.5][0.01, 0.45] tst = smke.TestFForm( endog, exog[:, :2], bw=[0.01, 0.45], var_type="cc", fform=lambda x, p: mod_ols.predict(p, x), estimator=lambda y, x: OLS(y, x).fit().params, nboot=1000, ) print "bw", tst.bw print "tst.test_stat", tst.test_stat print tst.sig print "tst.boots_results mean, min, max", (
def test_ols(): # More comprehensive tests against OLS estimates mod = RecursiveLS(endog, dta['m1']) res = mod.fit() mod_ols = OLS(endog, dta['m1']) res_ols = mod_ols.fit() # Regression coefficients, standard errors, and estimated scale assert_allclose(res.params, res_ols.params) assert_allclose(res.bse, res_ols.bse) # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is # the called the ML estimator and sometimes (e.g. later in section 5) # denoted \tilde \sigma_*^2 assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale) # OLS residuals are equivalent to smoothed forecast errors # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5) # (this follows since the smoothed state simply contains the # full-information estimates of the regression coefficients) actual = (mod.endog[:, 0] - np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0)) assert_allclose(actual, res_ols.resid) # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the # full OLS loglikelihood (i.e. without the scale concentrated out). desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale) assert_allclose(res.llf_recursive, desired) # Alternatively, we can constrcut the concentrated OLS loglikelihood # by computing the scale term with `nobs` in the denominator rather than # `nobs - d`. scale_alternative = np.sum(( res.standardized_forecasts_error[0, 1:] * res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0, scale=scale_alternative**0.5)).sum() assert_allclose(llf_alternative, res_ols.llf) # Prediction actual = res.forecast(10, design=np.ones((1, 1, 10))) assert_allclose(actual, res_ols.predict(np.ones((10, 1)))) # Sums of squares, R^2 assert_allclose(res.ess, res_ols.ess) assert_allclose(res.ssr, res_ols.ssr) assert_allclose(res.centered_tss, res_ols.centered_tss) assert_allclose(res.uncentered_tss, res_ols.uncentered_tss) assert_allclose(res.rsquared, res_ols.rsquared) # Mean squares assert_allclose(res.mse_model, res_ols.mse_model) assert_allclose(res.mse_resid, res_ols.mse_resid) assert_allclose(res.mse_total, res_ols.mse_total) # Hypothesis tests actual = res.t_test('m1 = 0') desired = res_ols.t_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) actual = res.f_test('m1 = 0') desired = res_ols.f_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) # Information criteria # Note: the llf and llf_obs given in the results are based on the Kalman # filter and so the ic given in results will not be identical to the # OLS versions. Additionally, llf_recursive is comparable to the # non-concentrated llf, and not the concentrated llf that is by default # used in OLS. Compute new ic based on llf_alternative to compare. actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_aic, res_ols.aic) actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_bic, res_ols.bic)
def fit(self): """estimate the model and compute the Anova table Returns ------- AnovaResults instance """ y = self.data[self.depvar].values # Construct OLS endog and exog from string using patsy within = ['C(%s, Sum)' % i for i in self.within] subject = 'C(%s, Sum)' % self.subject factors = within + [subject] x = patsy.dmatrix('*'.join(factors), data=self.data) term_slices = x.design_info.term_name_slices for key in term_slices: ind = np.array([False]*x.shape[1]) ind[term_slices[key]] = True term_slices[key] = np.array(ind) term_exclude = [':'.join(factors)] ind = _not_slice(term_slices, term_exclude, x.shape[1]) x = x[:, ind] # Fit OLS model = OLS(y, x) results = model.fit() if model.rank < x.shape[1]: raise ValueError('Independent variables are collinear.') for i in term_exclude: term_slices.pop(i) for key in term_slices: term_slices[key] = term_slices[key][ind] params = results.params df_resid = results.df_resid ssr = results.ssr anova_table = pd.DataFrame( {'F Value': [], 'Num DF': [], 'Den DF': [], 'Pr > F': []}) for key in term_slices: if self.subject not in key and key != 'Intercept': # Independen variables are orthogonal ssr1, df_resid1 = _ssr_reduced_model( y, x, term_slices, params, [key]) df1 = df_resid1 - df_resid msm = (ssr1 - ssr) / df1 if (key == ':'.join(factors[:-1]) or (key + ':' + subject not in term_slices)): mse = ssr / df_resid df2 = df_resid else: ssr1, df_resid1 = _ssr_reduced_model( y, x, term_slices, params, [key + ':' + subject]) df2 = df_resid1 - df_resid mse = (ssr1 - ssr) / df2 F = msm / mse p = stats.f.sf(F, df1, df2) term = key.replace('C(', '').replace(', Sum)', '') anova_table.loc[term, 'F Value'] = F anova_table.loc[term, 'Num DF'] = df1 anova_table.loc[term, 'Den DF'] = df2 anova_table.loc[term, 'Pr > F'] = p return AnovaResults(anova_table.iloc[:, [1, 2, 0, 3]])
# 初始化数据集 data = [] # 查询所有合约 cursor.execute("select distinct vari,deli from contract_daily where deli between '1401' and '1712'") for vari,deli in cursor.fetchall(): # 查询合约结算价 cursor.execute("select settle from contract_daily where vari=%s and deli=%s order by day asc", (vari,deli)) # 标准化 scaler = MinMaxScaler() settle = scaler.fit_transform(cursor.fetchall()) settle = [row[0] for row in settle] # 估计一阶差分方程的系数 ols = OLS(settle[1:], [[1.,x] for x in settle[:-1]]) result = ols.fit() data.append([vari, deli, result.params[0], result.params[1], result.rsquared]) # 生成DataFrame对象 df = pd.DataFrame(data, columns=['vari','deli','beta0','beta1','R2']) # 对beta0, beta1, R2做Z标准化 # 存入文件 df.to_csv('ols.csv', index=False) # 关闭数据库 cursor.close() conn.close()
print('\n\n') tt = res_hac4.t_test(np.eye(len(res_hac4.params))) print(tt.summary()) print('\n\n') print(tt.summary_frame()) print(vars(res_hac4.f_test(np.eye(len(res_hac4.params))[:-1]))) print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=True))) print(vars(res_hac4.wald_test(np.eye(len(res_hac4.params))[:-1], use_f=False))) # new cov_type can be set in fit method of model mod_olsg = OLS(g_inv, exogg) res_hac4b = mod_olsg.fit(cov_type='HAC', cov_kwds=dict(maxlags=4, use_correction=True)) print(res_hac4b.summary()) res_hc1b = mod_olsg.fit(cov_type='HC1') print(res_hc1b.summary()) # force t-distribution res_hc1c = mod_olsg.fit(cov_type='HC1', cov_kwds={'use_t':True}) print(res_hc1c.summary()) # force t-distribution decade = (d2['year'][1:] // 10).astype(int) # just make up a group variable res_clu = mod_olsg.fit(cov_type='cluster', cov_kwds={'groups':decade, 'use_t':True}) print(res_clu.summary())
def table_2_t_test(): output = {} # t-test on return diffs for dollar ports, carry-timed dollar ports, and its alpha to carry for prefix in ['part1_dollar_port#', 'part1_carry_timed_dollar_port#']: for suffix in ['', ' #no peg']: ret = get_port_ret_df(prefix, suffix) ret.loc[:,'carry'] = data['carry' + suffix] ret = ret.dropna() for i in range(5): y = ret[i + 1] - ret[i] if prefix == 'part1_carry_timed_dollar_port#': # t test on alpha diff x = sm.add_constant(ret['carry']) model = OLS(y, x) results = model.fit() label = 'part1_carry_timed_dollar_port_alpha_to_carry#' + suffix + str(i + 1) + '-' + str(i) output[label] = pd.Series({'annualized ret diff': results.params['const'] * 12, 't': results.tvalues['const'], 'nobs': results.nobs}) # t test on return diff x = np.ones(len(y)) model = OLS(y, x) results = model.fit() label = prefix + suffix + str(i + 1) + '-' + str(i) output[label] = pd.Series({'annualized ret diff': results.params['const'] * 12, 't': results.tvalues['const'], 'nobs': results.nobs}) output = pd.DataFrame(output).T return output