def test_formula(): data = DataFrame(np.random.standard_normal((500, 4)), columns=['y1', 'y2', 'x1', 'x2']) formula = {'eq1': 'y1 ~ 1 + x1', 'eq2': 'y2 ~ 1 + x2'} mod = SUR.from_formula(formula, data) mod.fit() formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}' mod = SUR.from_formula(formula, data) mod.fit(cov_type='heteroskedastic') formula = ''' {y1 ~ 1 + x1} {y2 ~ 1 + x2} ''' mod = SUR.from_formula(formula, data) mod.fit(cov_type='heteroskedastic') formula = ''' {eq.a:y1 ~ 1 + x1} {second: y2 ~ 1 + x2} ''' mod = SUR.from_formula(formula, data) res = mod.fit(cov_type='heteroskedastic') assert 'eq.a' in res.equation_labels assert 'second' in res.equation_labels
def test_formula(): data = DataFrame(np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]) formula = {"eq1": "y1 ~ 1 + x1", "eq2": "y2 ~ 1 + x2"} mod = SUR.from_formula(formula, data) mod.fit() formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}" mod = SUR.from_formula(formula, data) mod.fit(cov_type="heteroskedastic") formula = """ {y1 ~ 1 + x1} {y2 ~ 1 + x2} """ mod = SUR.from_formula(formula, data) mod.fit(cov_type="heteroskedastic") formula = """ {eq.a:y1 ~ 1 + x1} {second: y2 ~ 1 + x2} """ mod = SUR.from_formula(formula, data) res = mod.fit(cov_type="heteroskedastic") assert "eq.a" in res.equation_labels assert "second" in res.equation_labels
def test_ols_against_gls(data): mod = SUR(data) res = mod.fit(method='gls') sigma = res.sigma sigma_m12 = inv_matrix_sqrt(sigma) key = list(data.keys())[0] if isinstance(data[key], Mapping): y = [data[key]['dependent'] for key in data] x = [data[key]['exog'] for key in data] try: w = [data[key]['weights'] for key in data] except KeyError: w = [np.ones_like(data[key]['dependent']) for key in data] else: y = [data[key][0] for key in data] x = [data[key][1] for key in data] try: w = [data[key][2] for key in data] except IndexError: w = [np.ones_like(data[key][0]) for key in data] wy = [_y * np.sqrt(_w / _w.mean()) for _y, _w in zip(y, w)] wx = [_x * np.sqrt(_w / _w.mean()) for _x, _w in zip(x, w)] wy = blocked_column_product(wy, sigma_m12) wx = blocked_diag_product(wx, sigma_m12) ols_res = OLS(wy, wx).fit(debiased=False) assert_allclose(res.params, ols_res.params)
def test_ols_against_gls(data): mod = SUR(data) res = mod.fit(method="gls") if isinstance(data[list(data.keys())[0]], dict): predictions = mod.predict(res.params, equations=data) predictions2 = mod.predict(np.asarray(res.params)[:, None], equations=data) assert_allclose(predictions, predictions2) sigma = res.sigma sigma_m12 = inv_matrix_sqrt(np.asarray(sigma)) key = list(data.keys())[0] if isinstance(data[key], Mapping): y = [data[key]["dependent"] for key in data] x = [data[key]["exog"] for key in data] try: w = [data[key]["weights"] for key in data] except KeyError: w = [np.ones_like(data[key]["dependent"]) for key in data] else: y = [data[key][0] for key in data] x = [data[key][1] for key in data] try: w = [data[key][2] for key in data] except IndexError: w = [np.ones_like(data[key][0]) for key in data] wy = [_y * np.sqrt(_w / _w.mean()) for _y, _w in zip(y, w)] wx = [_x * np.sqrt(_w / _w.mean()) for _x, _w in zip(x, w)] wy = blocked_column_product(wy, sigma_m12) wx = blocked_diag_product(wx, sigma_m12) ols_res = OLS(wy, wx).fit(debiased=False) assert_allclose(res.params, ols_res.params)
def test_formula_partial_weights(): data = DataFrame(np.random.standard_normal((500, 4)), columns=['y1', 'y2', 'x1', 'x2']) weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=['eq2']) formula = OrderedDict() formula['eq1'] = 'y1 ~ 1 + x1' formula['eq2'] = 'y2 ~ 1 + x1' with warnings.catch_warnings(record=True) as w: mod = SUR.from_formula(formula, data, weights=weights) assert len(w) == 1 assert 'Weights' in w[0].message.args[0] assert 'eq1' in w[0].message.args[0] assert 'eq2' not in w[0].message.args[0] mod.fit() expected = np.ones((500, 1)) assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [0]] assert_allclose(mod._w[1], expected / expected.mean()) formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}' weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=['y2']) with warnings.catch_warnings(record=True) as w: mod = SUR.from_formula(formula, data, weights=weights) assert len(w) == 1 assert 'y1' in w[0].message.args[0] assert 'y2' not in w[0].message.args[0] expected = np.ones((500, 1)) assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [0]] assert_allclose(mod._w[1], expected / expected.mean())
def test_predict(missing_data): mod = SUR(missing_data) res = mod.fit() pred = res.predict() for key in pred: assert_series_equal(pred[key].iloc[:, 0], res.equations[key].fitted_values, check_names=False) pred = res.predict(fitted=False, idiosyncratic=True) for key in pred: assert_series_equal(pred[key].iloc[:, 0], res.equations[key].resids, check_names=False) pred = res.predict(fitted=True, idiosyncratic=True) assert isinstance(pred, dict) for key in res.equations: assert key in pred pred = res.predict(dataframe=True) assert isinstance(pred, DataFrame) assert_frame_equal(pred, res.fitted_values) pred = res.predict(fitted=False, idiosyncratic=True, dataframe=True) assert isinstance(pred, DataFrame) assert_frame_equal(pred, res.resids) pred = res.predict(fitted=True, idiosyncratic=True, dataframe=True) assert isinstance(pred, dict) assert 'fitted_values' in pred assert_frame_equal(pred['fitted_values'], res.fitted_values) assert 'idiosyncratic' in pred assert_frame_equal(pred['idiosyncratic'], res.resids) nobs = missing_data[list(missing_data.keys())[0]]['dependent'].shape[0] pred = res.predict(fitted=True, idiosyncratic=False, dataframe=True, missing=True) assert pred.shape[0] == nobs pred = res.predict(fitted=True, idiosyncratic=True, missing=True) for key in pred: assert pred[key].shape[0] == nobs
def test_formula_partial_weights(): data = DataFrame(np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]) weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=["eq2"]) formula = {"eq1": "y1 ~ 1 + x1", "eq2": "y2 ~ 1 + x1"} with warnings.catch_warnings(record=True) as w: mod = SUR.from_formula(formula, data, weights=weights) assert len(w) == 1 assert "Weights" in w[0].message.args[0] assert "eq1" in w[0].message.args[0] assert "eq2" not in w[0].message.args[0] mod.fit() expected = np.ones((500, 1)) assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [0]] assert_allclose(mod._w[1], expected / expected.mean()) formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}" weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=["y2"]) with warnings.catch_warnings(record=True) as w: mod = SUR.from_formula(formula, data, weights=weights) assert len(w) == 1 assert "y1" in w[0].message.args[0] assert "y2" not in w[0].message.args[0] expected = np.ones((500, 1)) assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [0]] assert_allclose(mod._w[1], expected / expected.mean())
def test_restricted_f_statistic(): data = generate_data(k=2, p=2) mod = SUR(data) r = DataFrame(np.zeros((1, 6)), columns=mod.param_names) r.iloc[0, 1] = 1.0 mod.add_constraints(r) res = mod.fit() eqn = res.equations[res.equation_labels[0]] assert isinstance(eqn.f_statistic, InvalidTestStatistic)
def test_mv_ols_hac_smoke(kernel_options): data = generate_data(p=3, const=True, rho=0.8, common_exog=False, included_weights=False, output_dict=True) mod = SUR(data) res = mod.fit(cov_type='kernel', **kernel_options) assert 'Kernel (HAC) ' in str(res) assert 'Kernel: {0}'.format(kernel_options['kernel']) in str(res) if kernel_options['bandwidth'] == 0: res_base = mod.fit(cov_type='robust', debiased=kernel_options['debiased']) assert_allclose(res.tstats, res_base.tstats)
def test_errors(): with pytest.raises(TypeError): SUR([]) with pytest.raises(TypeError): SUR({'a': 'absde', 'b': 12345}) moddata = {'a': {'dependent': np.random.standard_normal((100, 1)), 'exog': np.random.standard_normal((100, 5))}} with pytest.raises(ValueError): mod = SUR(moddata) mod.fit(cov_type='unknown') moddata = {'a': {'dependent': np.random.standard_normal((100, 1)), 'exog': np.random.standard_normal((101, 5))}} with pytest.raises(ValueError): SUR(moddata) moddata = {'a': {'dependent': np.random.standard_normal((10, 1)), 'exog': np.random.standard_normal((10, 20))}} with pytest.raises(ValueError): SUR(moddata) x = np.random.standard_normal((100, 2)) x = np.c_[x, x] moddata = {'a': {'dependent': np.random.standard_normal((100, 1)), 'exog': x}} with pytest.raises(ValueError): SUR(moddata)
def test_smoke(data): mod = SUR(data) mod.fit() mod.fit(cov_type="unadjusted") mod.fit(cov_type="unadjusted", method="ols") res = mod.fit(full_cov=False) get_res(res)
def test_smoke(data): mod = SUR(data) mod.fit() mod.fit(cov_type='unadjusted') mod.fit(cov_type='unadjusted', method='ols') res = mod.fit(full_cov=False) get_res(res)
def test_system_r2_direct(): eqns = generate_data(k=3) mod = SUR(eqns) res = mod.fit(method="ols", cov_type="unadjusted") y = np.hstack([eqns[eq]["dependent"] for eq in eqns]) ref = reference_mcelroy(res.resids, y, res.sigma) assert_allclose(ref, res.system_rsquared.mcelroy) ref = reference_berndt(res.resids, y) assert_allclose(ref, res.system_rsquared.berndt) res = mod.fit(method="gls", cov_type="unadjusted", iter_limit=100) y = np.hstack([eqns[eq]["dependent"] for eq in eqns]) ref = reference_mcelroy(res.resids, y, res.sigma) assert_allclose(ref, res.system_rsquared.mcelroy) ref = reference_berndt(res.resids, y) assert_allclose(ref, res.system_rsquared.berndt, atol=1e-3, rtol=1e-3)
def test_fitted(data): mod = SUR(data) res = mod.fit() expected = [] for i, key in enumerate(res.equations): eq = res.equations[key] fv = res.fitted_values[key].copy() fv.name = 'fitted_values' assert_series_equal(eq.fitted_values, fv) b = eq.params.values direct = mod._x[i] @ b expected.append(direct[:, None]) assert_allclose(eq.fitted_values, direct, atol=1e-8) expected = np.concatenate(expected, 1) expected = DataFrame(expected, index=mod._dependent[i].pandas.index, columns=[key for key in res.equations]) assert_frame_equal(expected, res.fitted_values)
def test_mv_ols_hac_smoke(kernel_options): data = generate_data( p=3, const=True, rho=0.8, common_exog=False, included_weights=False, output_dict=True, ) mod = SUR(data) res = mod.fit(cov_type="kernel", **kernel_options) assert "Kernel (HAC) " in str(res) assert "Kernel: {0}".format(kernel_options["kernel"]) in str(res) if kernel_options["bandwidth"] == 0: res_base = mod.fit(cov_type="robust", debiased=kernel_options["debiased"]) assert_allclose(res.tstats, res_base.tstats)
def test_mv_reg_smoke(mvreg_data): dependent, exog = mvreg_data mod = SUR.multivariate_ls(dependent, exog) mod.fit() mod.fit(cov_type='unadjusted') res = mod.fit(cov_type='unadjusted', method='ols') assert res.method == 'OLS' res = mod.fit(full_cov=False) get_res(res)
def test_mv_reg_smoke(mvreg_data): dependent, exog = mvreg_data mod = SUR.multivariate_ls(dependent, exog) mod.fit() mod.fit(cov_type="unadjusted") res = mod.fit(cov_type="unadjusted", method="ols") assert res.method == "OLS" res = mod.fit(full_cov=False) get_res(res)
def test_mv_ols_equivalence_hetero_debiased(mvreg_data): dependent, exog = mvreg_data mod = SUR.multivariate_ls(dependent, exog) res = mod.fit(cov_type='robust', debiased=True) keys = res.equation_labels for i in range(dependent.shape[1]): ols_mod = OLS(dependent[:, i], exog) ols_res = ols_mod.fit(cov_type='robust', debiased=True) mv_res = res.equations[keys[i]] check_results(mv_res, ols_res)
def test_against_direct_model(data): keys = list(data.keys()) if not isinstance(data[keys[0]], Mapping): return if 'weights' in data[keys[0]]: return y = [] x = [] data_copy = OrderedDict() for i in range(min(3, len(data))): data_copy[keys[i]] = data[keys[i]] y.append(data[keys[i]]['dependent']) x.append(data[keys[i]]['exog']) direct = simple_sur(y, x) mod = SUR(data_copy) res = mod.fit(method='ols') assert_allclose(res.params.values[:, None], direct.beta0) res = mod.fit(method='gls') assert_allclose(res.params.values[:, None], direct.beta1)
def test_formula_repeated_key(): data = DataFrame(np.random.standard_normal((500, 4)), columns=['y1', 'y2', 'x1', 'x2']) formula = ''' {first:y1 ~ 1 + x1} {first: y2 ~ 1 + x2} ''' mod = SUR.from_formula(formula, data) res = mod.fit() assert 'first' in res.equation_labels assert 'first.0' in res.equation_labels
def test_missing(data): primes = [11, 13, 17, 19, 23] for i, key in enumerate(data): if isinstance(data[key], Mapping): data[key]['dependent'][::primes[i % 5]] = np.nan else: data[key][0][::primes[i % 5]] = np.nan with warnings.catch_warnings(record=True) as w: SUR(data) assert len(w) == 1 assert 'missing' in w[0].message.args[0]
def test_formula_repeated_key(): data = DataFrame(np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]) formula = """ {first:y1 ~ 1 + x1} {first: y2 ~ 1 + x2} """ mod = SUR.from_formula(formula, data) res = mod.fit() assert "first" in res.equation_labels assert "first.0" in res.equation_labels
def test_against_direct_model(data): keys = list(data.keys()) if not isinstance(data[keys[0]], Mapping): return if "weights" in data[keys[0]]: return y = [] x = [] data_copy = {} for i in range(min(3, len(data))): data_copy[keys[i]] = data[keys[i]] y.append(data[keys[i]]["dependent"]) x.append(data[keys[i]]["exog"]) direct = simple_sur(y, x) mod = SUR(data_copy) res = mod.fit(method="ols") assert_allclose(res.params.values[:, None], direct.beta0) res = mod.fit(method="gls") assert_allclose(res.params.values[:, None], direct.beta1)
def test_formula_weights(): data = DataFrame(np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]) weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=["eq1", "eq2"]) formula = {"eq1": "y1 ~ 1 + x1", "eq2": "y2 ~ 1 + x1"} mod = SUR.from_formula(formula, data, weights=weights) mod.fit() expected = weights.values[:, [0]] assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [1]] assert_allclose(mod._w[1], expected / expected.mean()) formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}" weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=["y1", "y2"]) mod = SUR.from_formula(formula, data, weights=weights) mod.fit() expected = weights.values[:, [0]] assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [1]] assert_allclose(mod._w[1], expected / expected.mean())
def test_tvalues_homogeneity(method, cov_type): eqns = generate_data(k=3) mod = SUR(eqns) kwargs = {} base = direct_gls(eqns, 1) base_tstat = np.squeeze(base[0]) / np.sqrt(np.diag(base[1])) base_100 = direct_gls(eqns, 1 / 100) base_100_tstat = np.squeeze(base_100[0]) / np.sqrt(np.diag(base_100[1])) assert_allclose(base_tstat, base_100_tstat) if cov_type == "hac": kwargs["bandwidth"] = 1 elif cov_type == "clustered": key0 = list(eqns.keys())[0] nobs = eqns[key0]["dependent"].shape[0] rs = np.random.RandomState(231823) kwargs["clusters"] = rs.randint(0, nobs // 5, size=(nobs, 1)) res0 = mod.fit(method=method, cov_type=cov_type, **kwargs) for key in eqns: eqns[key]["dependent"] = eqns[key]["dependent"] / 100.0 mod = SUR(eqns) res1 = mod.fit(method=method, cov_type=cov_type, **kwargs) assert_allclose(res0.tstats, res1.tstats) if cov_type == "robust" and method == "gls": assert_allclose(res0.tstats, base_tstat) assert_allclose(res1.tstats, base_100_tstat)
def test_likelihood_ratio(k): eqns = generate_data(k=k) mod = SUR(eqns) res = mod.fit() stat = res.likelihood_ratio() if k == 1: assert isinstance(stat, InvalidTestStatistic) assert "Likelihood Ratio Test" in str(stat) assert np.isnan(stat.stat) return eps = np.asarray(res.resids) sigma = eps.T @ eps / eps.shape[0] nobs = res.resids.shape[0] direct = np.linalg.slogdet(sigma * np.eye(k))[1] direct -= np.linalg.slogdet(sigma)[1] direct *= nobs assert isinstance(stat, WaldTestStatistic) assert_allclose(stat.stat, direct) assert stat.df == 3 assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct)) assert "Covariance is diagonal" in stat.null assert "Likelihood Ratio Test" in str(stat)
def test_errors(): with pytest.raises(TypeError): SUR([]) with pytest.raises(TypeError): SUR({"a": "absde", "b": 12345}) moddata = { "a": { "dependent": np.random.standard_normal((100, 1)), "exog": np.random.standard_normal((100, 5)), } } with pytest.raises(ValueError): mod = SUR(moddata) mod.fit(cov_type="unknown") moddata = { "a": { "dependent": np.random.standard_normal((100, 1)), "exog": np.random.standard_normal((101, 5)), } } with pytest.raises(ValueError): SUR(moddata) moddata = { "a": { "dependent": np.random.standard_normal((10, 1)), "exog": np.random.standard_normal((10, 20)), } } with pytest.raises(ValueError): SUR(moddata) x = np.random.standard_normal((100, 2)) x = np.c_[x, x] moddata = { "a": { "dependent": np.random.standard_normal((100, 1)), "exog": x } } with pytest.raises(ValueError): SUR(moddata)
def test_mv_ols_equivalence(mvreg_data): dependent, exog = mvreg_data mod = SUR.multivariate_ls(dependent, exog) res = mod.fit(cov_type='unadjusted') keys = res.equation_labels assert res.method == 'OLS' for i in range(dependent.shape[1]): ols_mod = OLS(dependent[:, i], exog) ols_res = ols_mod.fit(cov_type='unadjusted', debiased=False) mv_res = res.equations[keys[i]] assert mv_res.method == 'OLS' check_results(mv_res, ols_res)
def test_formula_weights(): data = DataFrame(np.random.standard_normal((500, 4)), columns=['y1', 'y2', 'x1', 'x2']) weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=['eq1', 'eq2']) formula = OrderedDict() formula['eq1'] = 'y1 ~ 1 + x1' formula['eq2'] = 'y2 ~ 1 + x1' mod = SUR.from_formula(formula, data, weights=weights) mod.fit() expected = weights.values[:, [0]] assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [1]] assert_allclose(mod._w[1], expected / expected.mean()) formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}' weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=['y1', 'y2']) mod = SUR.from_formula(formula, data, weights=weights) mod.fit() expected = weights.values[:, [0]] assert_allclose(mod._w[0], expected / expected.mean()) expected = weights.values[:, [1]] assert_allclose(mod._w[1], expected / expected.mean())
def test_brequsch_pagan(k): eqns = generate_data(k=k) mod = SUR(eqns) res = mod.fit() stat = res.breusch_pagan() if k == 1: assert isinstance(stat, InvalidTestStatistic) assert "Breusch-Pagan" in str(stat) assert np.isnan(stat.stat) return rho = np.asarray(res.resids.corr()) nobs = res.resids.shape[0] direct = 0.0 for i in range(3): for j in range(i + 1, 3): direct += rho[i, j]**2 direct *= nobs assert isinstance(stat, WaldTestStatistic) assert_allclose(stat.stat, direct) assert stat.df == 3 assert_allclose(stat.pval, 1.0 - scipy.stats.chi2(3).cdf(direct)) assert "Residuals are uncorrelated" in stat.null assert "Breusch-Pagan" in str(stat)