def setup_class(cls): d = macrodata.load_pandas().data # growth rates d['gs_l_realinv'] = 400 * np.log(d['realinv']).diff() d['gs_l_realgdp'] = 400 * np.log(d['realgdp']).diff() d['lint'] = d['realint'].shift(1) d['tbilrate'] = d['tbilrate'].shift(1) d = d.dropna() cls.d = d endogg = d['gs_l_realinv'] exogg = add_constant(d[['gs_l_realgdp', 'lint']]) exogg2 = add_constant(d[['gs_l_realgdp', 'tbilrate']]) exogg3 = add_constant(d[['gs_l_realgdp']]) res_ols = OLS(endogg, exogg).fit() res_ols2 = OLS(endogg, exogg2).fit() res_ols3 = OLS(endogg, exogg3).fit() cls.res = res_ols cls.res2 = res_ols2 cls.res3 = res_ols3 cls.endog = cls.res.model.endog cls.exog = cls.res.model.exog
def test_add_constant_has_constant2darray(self): x = np.asarray([[1, 1, 1, 1], [1, 2, 3, 4.]]).T y = tools.add_constant(x, has_constant='skip') assert_equal(x, y) with pytest.raises(ValueError): tools.add_constant(x, has_constant='raise') assert_equal(tools.add_constant(x, has_constant='add'), np.column_stack((np.ones(4), x)))
def test_add_constant_has_constant1darray(self): x = np.ones(5) x = tools.add_constant(x, has_constant='skip') assert_equal(x, np.ones((5, 1))) with pytest.raises(ValueError): tools.add_constant(x, has_constant='raise') assert_equal(tools.add_constant(x, has_constant='add'), np.ones((5, 2)))
def test_wls_tss(): y = np.array([22, 22, 22, 23, 23, 23]) X = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]] ols_mod = OLS(y, add_constant(X, prepend=False)).fit() yw = np.array([22, 22, 23.]) Xw = [[1, 0], [1, 1], [0, 1]] w = np.array([2, 1, 3.]) wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit() assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)
def test_poisson_residuals(): nobs, k_exog = 100, 5 np.random.seed(987125) x = np.random.randn(nobs, k_exog - 1) x = add_constant(x) y_true = x.sum(1) / 2 y = y_true + 2 * np.random.randn(nobs) exposure = 1 + np.arange(nobs) // 4 yp = np.random.poisson(np.exp(y_true) * exposure) yp[10:15] += 10 fam = sm.families.Poisson() mod_poi_e = GLM(yp, x, family=fam, exposure=exposure) res_poi_e = mod_poi_e.fit() mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure) res_poi_w = mod_poi_w.fit() assert_allclose(res_poi_e.resid_response / exposure, res_poi_w.resid_response) assert_allclose(res_poi_e.resid_pearson, res_poi_w.resid_pearson) assert_allclose(res_poi_e.resid_deviance, res_poi_w.resid_deviance) assert_allclose(res_poi_e.resid_anscombe, res_poi_w.resid_anscombe) assert_allclose(res_poi_e.resid_anscombe_unscaled, res_poi_w.resid_anscombe)
def setup_class(cls): path = os.path.join(current_path, 'results', 'mar_filardo.csv') cls.mar_filardo = pd.read_csv(path) cls.mar_filardo.index = pd.date_range('1948-02-01', '1991-04-01', freq='MS') true = { 'params': np.r_[4.35941747, -1.6493936, 1.7702123, 0.9945672, 0.517298, -0.865888, np.exp(-0.362469)**2, 0.189474, 0.079344, 0.110944, 0.122251], 'llf': -586.5718, 'llf_fit': -586.5718, 'llf_fit_em': -586.5718 } endog = cls.mar_filardo['dlip'].iloc[1:] exog_tvtp = add_constant(cls.mar_filardo['dmdlleading'].iloc[:-1]) super(TestFilardoPandas, cls).setup_class(true, endog, k_regimes=2, order=4, switching_ar=False, exog_tvtp=exog_tvtp)
def setup_class(cls): R = np.zeros(7) R[4:6] = [1, -1] data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() cls.Ttest1 = res1.t_test(R)
def test_pandas_const_series_prepend(self): # Check that the constant is added in the expected column location dta = longley.load_pandas() series = dta.exog['GNP'] series = tools.add_constant(series, prepend=True) assert_string_equal('const', series.columns[0]) assert_equal(series.var(0)[0], 0)
def test_pandas_const_df_prepend(): # GH#1025 dta = longley.load_pandas().exog dta['UNEMP'] /= dta['UNEMP'].std() dta = tools.add_constant(dta, prepend=True) assert_string_equal('const', dta.columns[0]) assert_equal(dta.var(0)[0], 0)
def test_missing(self): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) data.endog[[3, 7, 14]] = np.nan mod = OLS(data.endog, data.exog, missing='drop') assert mod.endog.shape[0] == 13 assert mod.exog.shape[0] == 13
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = OLS(data.endog, data.exog).fit() R = np.identity(7) cls.Ttest = cls.res1.t_test(R) hyp = 'x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0, const = 0' cls.NewTTest = cls.res1.t_test(hyp)
def setup_class(cls): d2 = macrodata.load_pandas().data g_gdp = 400 * np.diff(np.log(d2['realgdp'].values)) g_inv = 400 * np.diff(np.log(d2['realinv'].values)) exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values], prepend=False) cls.res1 = OLS(g_inv, exogg).fit()
def test_const_indicator(): np.random.seed(12345) X = np.random.randint(0, 3, size=30) X = categorical(X, drop=True) y = np.dot(X, [1., 2., 3.]) + np.random.normal(size=30) modc = OLS(y, add_constant(X[:, 1:], prepend=True)).fit() mod = OLS(y, X, hasconst=True).fit() assert_almost_equal(modc.rsquared, mod.rsquared, 12)
def test_add_constant_dataframe(self): df = pd.DataFrame([[1.0, 'a', 4], [2.0, 'bc', 9], [3.0, 'def', 16]]) output = tools.add_constant(df) expected = pd.Series([1.0, 1.0, 1.0], name='const') tm.assert_series_equal(expected, output['const']) dfc = df.copy() dfc.insert(0, 'const', np.ones(3)) tm.assert_frame_equal(dfc, output)
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]] cls.Ftest1 = res1.f_test(R2) hyp = 'x2 = x3, x5 = x6' cls.NewFtest1 = res1.f_test(hyp)
def setup_class(cls): d2 = macrodata.load_pandas().data g_gdp = 400 * np.diff(np.log(d2['realgdp'].values)) g_inv = 400 * np.diff(np.log(d2['realinv'].values)) exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1].values], prepend=False) mod1 = GLSAR(g_inv, exogg, 1) cls.res = mod1.iterative_fit(5)
def setup_class(cls): dta = datasets.longley.load(as_pandas=False) dta.exog = add_constant(dta.exog, prepend=True) wls_scalar = WLS(dta.endog, dta.exog, weights=1. / 3).fit() cls.res1 = wls_scalar weights = [1 / 3.] * len(dta.endog) wls_array = WLS(dta.endog, dta.exog, weights=weights).fit() cls.res2 = wls_array
def setup_class(cls): data = sm.datasets.star98.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = GLM(data.endog, data.exog, family=sm.families.Binomial()).fit() weights = data.endog.sum(axis=1) endog2 = data.endog[:, 0] / weights cls.res2 = GLM(endog2, data.exog, family=sm.families.Binomial(), var_weights=weights).fit()
def test_wls_example(): # example from the docstring, there was a note about a bug, should # be fixed now Y = [1, 3, 4, 5, 2, 3, 4] X = list(range(1, 8)) X = add_constant(X, prepend=False) wls_model = WLS(Y, X, weights=list(range(1, 8))).fit() # taken from R lm.summary assert_almost_equal(wls_model.fvalue, 0.127337843215, 6) assert_almost_equal(wls_model.scale, 2.44608530786**2, 6)
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() R = np.array([[0, 1, 1, 0, 0, 0, 0], [0, 1, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0]]) q = np.array([0, 0, 0, 1, 0]) cls.Ftest1 = res1.f_test((R, q))
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) ols_res = OLS(data.endog, data.exog).fit() gls_res = GLS(data.endog, data.exog).fit() gls_res_scalar = GLS(data.endog, data.exog, sigma=1) cls.endog = data.endog cls.exog = data.exog cls.res1 = gls_res cls.res2 = ols_res cls.res3 = gls_res_scalar # TODO: Do something with this?
def test_add_constant_recarray(self): dt = np.dtype([('', int), ('', '<S4'), ('', np.float32), ('', np.float64)]) x = np.array([(1, 'abcd', 1.0, 2.0), (7, 'abcd', 2.0, 4.0), (21, 'abcd', 2.0, 8.0)], dt) x = x.view(np.recarray) y = tools.add_constant(x) assert_equal(y['const'], np.array([1.0, 1.0, 1.0])) for f in x.dtype.fields: assert y[f].dtype == x[f].dtype
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) y = data.endog X = data.exog n = y.shape[0] w = np.ones(n) cls.results = [] cls.results.append(OLS(y, X).fit()) cls.results.append(WLS(y, X, w).fit()) cls.results.append(GLS(y, X, 100 * w).fit()) cls.results.append(GLS(y, X, np.diag(0.1 * w)).fit())
def setup_class(cls): from sm2.datasets.star98 import load data = load(as_pandas=False) exog = add_constant(data.exog, prepend=True) offset = np.ones(len(data.endog)) exog_keep = exog[:, :-5] cls.mod2 = GLM(data.endog, exog_keep, family=family.Binomial(), offset=offset) cls.mod1 = GLM(data.endog, exog, family=family.Binomial(), offset=offset) cls.init()
def setup_class(cls): d = macrodata.load_pandas().data # growth rates gs_l_realinv = 400 * np.diff(np.log(d['realinv'].values)) gs_l_realgdp = 400 * np.diff(np.log(d['realgdp'].values)) lint = d['realint'][:-1].values tbilrate = d['tbilrate'][:-1].values endogg = gs_l_realinv exogg = add_constant(np.c_[gs_l_realgdp, lint]) exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate]) exogg3 = add_constant(np.c_[gs_l_realgdp]) res_ols = OLS(endogg, exogg).fit() res_ols2 = OLS(endogg, exogg2).fit() res_ols3 = OLS(endogg, exogg3).fit() cls.res = res_ols cls.res2 = res_ols2 cls.res3 = res_ols3 cls.endog = cls.res.model.endog cls.exog = cls.res.model.exog
def test_outlier_influence_funcs(reset_randomstate): x = add_constant(np.random.randn(10, 2)) y = x.sum(1) + np.random.randn(10) res = OLS(y, x).fit() out_05 = oi.summary_table(res) # GH#3344 : Check alpha has an effect out_01 = oi.summary_table(res, alpha=0.01) assert np.all(out_01[1][:, 6] <= out_05[1][:, 6]) assert np.all(out_01[1][:, 7] >= out_05[1][:, 7]) res2 = OLS(y, x[:, 0]).fit() oi.summary_table(res2, alpha=0.05) infl = res2.get_influence() infl.summary_table()
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) y = data.endog X = data.exog n = y.shape[0] np.random.seed(5) w = np.random.uniform(0.5, 1, n) w_inv = 1. / w cls.results = [] cls.results.append(WLS(y, X, w).fit()) cls.results.append(WLS(y, X, 0.01 * w).fit()) cls.results.append(GLS(y, X, 100 * w_inv).fit()) cls.results.append(GLS(y, X, np.diag(0.1 * w_inv)).fit())
def setup_class(cls): data = datasets.longley.load(as_pandas=False) exog = add_constant(np.column_stack((data.exog[:, 1], data.exog[:, 4])), prepend=False) tmp_results = OLS(data.endog, exog).fit() rho = np.corrcoef(tmp_results.resid[1:], tmp_results.resid[:-1])[0][1] # by assumption order = toeplitz(np.arange(16)) sigma = rho**order cls.res1 = GLS(data.endog, exog, sigma=sigma).fit() # attach for test_missing cls.sigma = sigma cls.exog = exog cls.endog = data.endog
def setup_class(cls): dtapa = grunfeld.data.load_pandas() # Stata example/data seems to miss last firm dtapa_endog = dtapa.endog[:200] dtapa_exog = dtapa.exog[:200] exog = add_constant(dtapa_exog[['value', 'capital']], prepend=False) cls.res1 = OLS(dtapa_endog, exog).fit() firm_names, firm_id = np.unique(np.asarray(dtapa_exog[['firm']], 'S20'), return_inverse=True) cls.groups = firm_id # time indicator in range(max Ti) time = np.asarray(dtapa_exog[['year']]) time -= time.min() cls.time = np.squeeze(time).astype(int) # nw_panel function requires interval bounds cls.tidx = [(i * 20, 20 * (i + 1)) for i in range(10)]
def setup_class(cls): data = datasets.longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = OLS(data.endog, data.exog).fit() #cls.res2.wresid = cls.res1.wresid # workaround hack res_qr = OLS(data.endog, data.exog).fit(method="qr") model_qr = OLS(data.endog, data.exog) Q, R = np.linalg.qr(data.exog) model_qr.exog_Q, model_qr.exog_R = Q, R model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R)) model_qr.rank = np.linalg.matrix_rank(R) res_qr2 = model_qr.fit(method="qr") cls.res_qr = res_qr cls.res_qr_manual = res_qr2