def test_panel_effects_sanity(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False) fitted = mod.exog.values2d @ res.params.values[:, None] expected = fitted expected += res.resids.values[:, None] expected += res.estimated_effects.values assert_allclose(mod.dependent.values2d, expected)
def test_cluster_smoke(data): mod = PanelOLS(data.y, data.x, entity_effects=True) mod.fit(cov_type="clustered", cluster_time=True, debiased=False) mod.fit(cov_type="clustered", cluster_entity=True, debiased=False) c2 = PanelData(data.vc2) c1 = PanelData(data.vc1) mod.fit(cov_type="clustered", clusters=c2, debiased=False) mod.fit(cov_type="clustered", cluster_entity=True, clusters=c1, debiased=False) mod.fit(cov_type="clustered", cluster_time=True, clusters=c1, debiased=False) with pytest.raises(ValueError): mod.fit(cov_type="clustered", cluster_time=True, clusters=c2, debiased=False) with pytest.raises(ValueError): mod.fit(cov_type="clustered", cluster_entity=True, clusters=c2, debiased=False) with pytest.raises(ValueError): mod.fit( cov_type="clustered", cluster_entity=True, cluster_time=True, clusters=c1, debiased=False, ) with pytest.raises(ValueError): clusters = c1.dataframe.iloc[:c1.dataframe.shape[0] // 2] mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
def test_other_weighted_smoke(data): mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c) mod.fit(debiased=False)
def test_too_many_effects(data): with pytest.raises(ValueError): PanelOLS(data.y, data.x, entity_effect=True, time_effect=True, other_effects=data.c)
def test_alt_rsquared_weighted(data): mod = PanelOLS(data.y, data.x, entity_effects=True, weights=data.w) res = mod.fit(debiased=False) assert_allclose(res.rsquared, res.rsquared_within)
def test_panel_no_effects(data): res = PanelOLS(data.y, data.x).fit() res2 = PooledOLS(data.y, data.x).fit() assert_results_equal(res, res2)
def test_panel_time_lsdv(large_data): mod = PanelOLS(large_data.y, large_data.x, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d = mod.dependent.dummies("time", drop_first=mod.has_constant) d_cols = list(d.columns) d = d.values if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d, rcond=None)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_cols) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type="unadjusted") assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type="robust") assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_time=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_entity=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_other_lsdv(data): mod = PanelOLS(data.y, data.x, other_effects=data.c) assert "Num Other Effects: 2" in str(mod) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe.copy() x = mod.exog.dataframe.copy() c = mod._other_effect_cats.dataframe.copy() d = [] d_columns = [] for i, col in enumerate(c): s = c[col].copy() dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0)) dummies.columns = [s.name + "_val_" + str(c) for c in dummies.columns] d_columns.extend(list(dummies.columns)) d.append(dummies.values) d = np.column_stack(d) if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d, rcond=None)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type="unadjusted") assert_results_equal(res, res2, test_fit=False) res3 = mod.fit(cov_type="unadjusted", auto_df=False, count_effects=False, debiased=False) assert_results_equal(res, res3) res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type="robust") assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_time=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_entity=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_panel_lsdv(data): mod = PanelOLS(data.y, data.x, entity_effects=True) y, x = mod.dependent.dataframe, mod.exog.dataframe res = mod.fit() expected = lsdv(y, x, has_const=False, entity=True) assert_allclose(res.params.squeeze(), expected) mod = PanelOLS(data.y, data.x, time_effects=True) res = mod.fit() expected = lsdv(y, x, has_const=False, time=True) assert_allclose(res.params.squeeze(), expected) mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit() expected = lsdv(y, x, has_const=False, entity=True, time=True) assert_allclose(res.params.squeeze(), expected, rtol=1e-4) other = y.copy() other.iloc[:, :] = 0 other = other.astype(np.int64) skip = other.shape[0] // 3 for i in range(skip): other.iloc[i::skip] = i mod = PanelOLS(y, x, other_effects=other) res = mod.fit() expected = lsdv(y, x, has_const=False, general=other.iloc[:, 0].values) assert_allclose(res.params.squeeze(), expected, rtol=1e-4)
def test_panel_time_lsdv_weighted(large_data): mod = PanelOLS(large_data.y, large_data.x, time_effects=True, weights=large_data.w) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe w = mod.weights.dataframe d = mod.dependent.dummies('time', drop_first=mod.has_constant) d_cols = d.columns d = d.values if mod.has_constant: z = np.ones_like(y) root_w = np.sqrt(w.values) wd = root_w * d wz = root_w * z d = d - z @ lstsq(wz, wd)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols)) ols_mod = IV2SLS(y, xd, None, None, weights=w) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = large_data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_valid_weight_shape(data): # Same size n = np.prod(data.y.shape) weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d missing = PanelData(data.y).isnull | PanelData(data.x).isnull expected = weights[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per time if isinstance(data.x, pd.DataFrame): n = len(data.y.index.levels[1]) k = len(data.y.index.levels[0]) elif isinstance(data.x, np.ndarray): n = data.y.shape[0] k = data.y.shape[1] else: n = data.y.shape[1] k = data.y.shape[2] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights[:, None] @ np.ones((1, k)) expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per entity if isinstance(data.x, pd.DataFrame): n = len(data.y.index.levels[0]) k = len(data.y.index.levels[1]) elif isinstance(data.x, np.ndarray): n = data.y.shape[1] k = data.y.shape[0] else: n = data.y.shape[2] k = data.y.shape[1] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = np.ones((k, 1)) @ weights[None, :] expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) weights = 1 + np.random.random_sample(data.y.shape) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected)
def test_panel_both_lsdv(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant) d2 = mod.dependent.dummies('time', drop_first=True) d = np.c_[d1.values, d2.values] if mod.has_constant: z = np.ones_like(y) d = d - z @ lstsq(z, d)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)) ols_mod = IV2SLS(y, xd, None, None) res2 = ols_mod.fit(cov_type='unadjusted') assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='robust') assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False, debiased=False) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=['var.clust']) res2 = ols_mod.fit(cov_type='clustered', clusters=clusters) assert_results_equal(res, res2, test_fit=False)
def test_results_access(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit() d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val() mod = PanelOLS(data.y, data.x, other_effects=data.c) res = mod.fit() d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val() mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True) res = mod.fit() d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val() mod = PanelOLS(data.y, data.x) res = mod.fit() d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val() const = PanelData(data.y).copy() const.dataframe.iloc[:, :] = 1 const.dataframe.columns = ['const'] mod = PanelOLS(data.y, const) res = mod.fit() d = dir(res) for key in d: if not key.startswith('_'): val = getattr(res, key) if callable(val): val()
def test_lsdv_options(data): mod = PanelOLS(data.y, data.x, weights=data.w) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) c1 = PanelData(data.c).dataframe.iloc[:, [0]] mod = PanelOLS(data.y, data.x, entity_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, time_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, other_effects=c1) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2) mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c) res1 = mod.fit() res2 = mod.fit(use_lsdv=True) assert_results_equal(res1, res2)
def test_const_data_only_weights(const_data): y, x = const_data.y, const_data.x mod = PanelOLS(y, x, weights=const_data.w) res = mod.fit(debiased=False) res2 = IV2SLS(y, x, None, None, weights=const_data.w).fit() assert_allclose(res.params, res2.params)
def test_panel_ols(data): PanelOLS(data.y, data.x).fit() PanelOLS(data.y, data.x, entity_effects=True).fit() PanelOLS(data.y, data.x, time_effects=True).fit()
def test_absorbed_option(data): mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=True) res = mod.fit(auto_df=False, count_effects=False, debiased=False) mod = PanelOLS(data.y, data.x, entity_effects=True, drop_absorbed=False) res_false = mod.fit(auto_df=False, count_effects=False, debiased=False) assert_results_equal(res, res_false)
def test_valid_weight_shape(data): # Same size n = np.prod(data.y.shape) weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d missing = PanelData(data.y).isnull | PanelData(data.x).isnull expected = weights[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per time n = data.y.shape[0] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights[:, None] @ np.ones((1, data.y.shape[1])) expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) # Per entity n = data.y.shape[1] weights = 1 + np.random.random_sample(n) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = np.ones((data.y.shape[0], 1)) @ weights[None, :] expected = expected.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected) weights = 1 + np.random.random_sample(data.y.shape) mod = PanelOLS(data.y, data.x, weights=weights) mod.fit() w = mod.weights.values2d expected = weights.T.ravel() expected = expected[~missing.squeeze()][:, None] expected = expected / expected.mean() assert_equal(w, expected)
def test_panel_no_effects_weighted(data): res = PanelOLS(data.y, data.x, weights=data.w).fit() res2 = PooledOLS(data.y, data.x, weights=data.w).fit() assert_results_equal(res, res2)
def test_results_access(data): mod = PanelOLS(data.y, data.x, entity_effects=True) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x, other_effects=data.c) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True) res = mod.fit() access_attributes(res) mod = PanelOLS(data.y, data.x) res = mod.fit() access_attributes(res) const = PanelData(data.y).copy() const.dataframe.iloc[:, :] = 1 const.dataframe.columns = ["const"] mod = PanelOLS(data.y, const) res = mod.fit() access_attributes(res)
def test_panel_both_lsdv_weighted(data): mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True, weights=data.w) res = mod.fit(auto_df=False, count_effects=False, debiased=False) y = mod.dependent.dataframe x = mod.exog.dataframe w = mod.weights.dataframe d1 = mod.dependent.dummies("entity", drop_first=mod.has_constant) d2 = mod.dependent.dummies("time", drop_first=True) d = np.c_[d1.values, d2.values] if mod.has_constant: z = np.ones_like(y) root_w = np.sqrt(w.values) wd = root_w * d wz = root_w * z d = d - z @ lstsq(wz, wd, rcond=None)[0] xd = np.c_[x.values, d] xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)) ols_mod = IV2SLS(y, xd, None, None, weights=w) res2 = ols_mod.fit(cov_type="unadjusted") assert_results_equal(res, res2, test_fit=False) assert_allclose(res.rsquared_inclusive, res2.rsquared) res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False) res2 = ols_mod.fit(cov_type="robust") assert_results_equal(res, res2, test_fit=False) clusters = data.vc1 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) clusters = data.vc2 ols_clusters = mod.reformat_clusters(clusters) res = mod.fit( cov_type="clustered", clusters=clusters, auto_df=False, count_effects=False, debiased=False, ) res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_time=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False) res = mod.fit( cov_type="clustered", cluster_entity=True, auto_df=False, count_effects=False, debiased=False, ) clusters = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]) res2 = ols_mod.fit(cov_type="clustered", clusters=clusters) assert_results_equal(res, res2, test_fit=False)
from linearmodels.panel.model import PanelOLS import statsmodels.api as sm Data = Data.set_index(["id", "Age"]) # Create the variables they actually use Data["ExpBin"] = 0 Data.loc[Data["ExpIncChange"] > 0, "ExpBin"] = 1 Data.loc[Data["ExpIncChange"] < 0, "ExpBin"] = -1 Data["ChangeBin"] = 0 Data.loc[Data["Y_change"] > 0, "ChangeBin"] = 1 Data.loc[Data["Y_change"] < 0, "ChangeBin"] = -1 mod = PanelOLS(Data.ExpBin, sm.add_constant(Data.ChangeBin), entity_effects=True) fe_res = mod.fit() print(fe_res) # %% [markdown] # The estimated $\hat{\gamma}_{1}$ is negative because in usual life-cycle calibrations, transitory shocks are volatile enough that mean reversion of transitory fluctuations is a stronger force than persistent trends in income age-profiles. # # However, with less volatile transitory shocks, the regression coefficient would be positive. We demonstrate this by shutting off transitory shocks, simulating another population of agents, and re-running the regression. # %% tags=[] # %%capture params_no_transitory = copy(params) params_no_transitory.update({"TranShkStd": [0.0] * len(params["TranShkStd"])}) # Create agent