Example #1
0
def test_single_entity(data):
    x = data.x
    y = data.y
    if isinstance(x, pd.DataFrame):
        time = y.index.levels[1][0]
        y = y.xs(time, level=1, drop_level=False)
        x = x.xs(time, level=1, drop_level=False)
    elif isinstance(x, np.ndarray):
        x = x[:, [0]]
        y = y[[0]]
    else:
        # xarray DataArray
        x = x[:, [0]]
        y = y[:, [0]]
    mod = BetweenOLS(y, x)
    res = mod.fit(reweight=True, debiased=False)

    dep = mod.dependent.dataframe
    exog = mod.exog.dataframe
    ols = IV2SLS(dep, exog, None, None)
    ols_res = ols.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape),
                            index=dep.index)
    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols.fit(cov_type="clustered", clusters=clusters)
    assert_results_equal(res, ols_res)
Example #2
0
def test_multiple_obs_per_entity(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit(reweight=True, debiased=False)

    dep = mod.dependent.values3d.mean(1).T
    exog = pd.DataFrame(mod.exog.values3d.mean(1).T,
                        columns=mod.exog.vars)
    ols = IV2SLS(dep, exog, None, None)
    ols_res = ols.fit(cov_type='unadjusted')
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type='robust', debiased=False)
    ols_res = ols.fit(cov_type='robust', debiased=False)
    assert_results_equal(res, ols_res)

    clusters = mod.dependent.dataframe.copy()
    clusters.loc[:, :] = 0
    clusters = clusters.astype(np.int32)
    for entity in mod.dependent.entities:
        clusters.loc[entity] = np.random.randint(9)

    ols_clusters = PanelData(clusters).values3d.mean(1).T.astype(np.int32)
    res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
    ols_res = ols.fit(cov_type='clustered', clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Example #3
0
def test_fitted_effects_residuals(both_data_types):
    mod = BetweenOLS(both_data_types.y, both_data_types.x)
    res = mod.fit(reweight=True, debiased=False)
    expected = pd.DataFrame(
        mod.exog.values2d @ res.params.values,
        mod.dependent.index,
        columns=["fitted_values"],
    )
    assert_allclose(expected, res.fitted_values)
    assert_frame_similar(res.fitted_values, expected)

    index = mod.dependent.dataframe.index
    reindex = index.levels[0][get_codes(index)[0]]
    resids = res.resids.copy()
    resids = resids.reindex(reindex)
    resids.index = index
    expected = pd.DataFrame(resids)
    expected.columns = ["estimated_effects"]
    assert_allclose(expected, res.estimated_effects)
    assert_frame_similar(res.estimated_effects, expected)

    fitted_effects = res.fitted_values.values + res.estimated_effects.values
    expected.iloc[:, 0] = mod.dependent.values2d - fitted_effects
    expected.columns = ["idiosyncratic"]
    assert_allclose(expected, res.idiosyncratic, atol=1e-8)
    assert_frame_similar(res.idiosyncratic, expected)
Example #4
0
def test_missing(missing_data):
    mod = BetweenOLS(missing_data.y, missing_data.x)
    res = mod.fit(reweight=True, debiased=False)

    dep = mod.dependent.dataframe.groupby(level=0).mean()
    exog = mod.exog.dataframe.groupby(level=0).mean()
    weights = mod.weights.dataframe.groupby(level=0).sum()

    dep = dep.reindex(mod.dependent.entities)
    exog = exog.reindex(mod.dependent.entities)
    weights = weights.reindex(mod.dependent.entities)

    ols = IV2SLS(dep, exog, None, None, weights=weights)
    ols_res = ols.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(reweight=True, cov_type="robust", debiased=False)
    ols_res = ols.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    vc1 = PanelData(missing_data.vc1)
    ols_clusters = vc1.dataframe.groupby(level=0).mean().astype(np.int32)
    ols_clusters = ols_clusters.reindex(mod.dependent.entities)

    res = mod.fit(reweight=True,
                  cov_type="clustered",
                  clusters=missing_data.vc1,
                  debiased=False)
    ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Example #5
0
def test_2way_cluster(data):
    mod = BetweenOLS(data.y, data.x)

    dep = mod.dependent.dataframe.groupby(level=0).mean()
    exog = mod.exog.dataframe.groupby(level=0).mean()

    clusters = mod.dependent.dataframe.copy()
    clusters.columns = ["cluster.0"]
    clusters["cluster.1"] = mod.dependent.dataframe.copy()
    clusters.loc[:, :] = 0
    clusters = clusters.astype(np.int32)
    for entity in mod.dependent.entities:
        clusters.loc[entity, :] = np.random.randint(33, size=(1, 2))

    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)

    dep = dep.reindex(list(res.resids.index))
    exog = exog.reindex(list(res.resids.index))

    ols = IV2SLS(dep, exog, None, None)
    ols_clusters = clusters.groupby(level=0).max()
    ols_clusters = ols_clusters.reindex(list(res.resids.index))

    ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Example #6
0
def test_multiple_obs_per_entity_weighted(data):
    mod = BetweenOLS(data.y, data.x, weights=data.w)
    res = mod.fit(reweight=True, debiased=False)

    weights = np.nansum(mod.weights.values3d, axis=1).T
    wdep = np.nansum(mod.weights.values3d * mod.dependent.values3d, axis=1).T
    wexog = np.nansum(mod.weights.values3d * mod.exog.values3d, axis=1).T
    wdep = wdep / weights
    wexog = wexog / weights

    dep = wdep
    exog = pd.DataFrame(wexog, columns=mod.exog.vars)

    ols = IV2SLS(dep, exog, None, None, weights=weights)
    ols_res = ols.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols.fit(cov_type="robust")
    assert_results_equal(res, ols_res)

    clusters = mod.dependent.dataframe.copy()
    clusters.loc[:, :] = 0
    clusters = clusters.astype(np.int32)
    for entity in mod.dependent.entities:
        clusters.loc[entity] = np.random.randint(9)

    ols_clusters = PanelData(clusters).values3d.mean(1).T.astype(np.int32)
    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
def test_single_entity_weights(data):
    x = data.x
    y = data.y
    w = data.w
    if isinstance(x, pd.Panel):
        x = x.iloc[:, [0], :]
        y = y.iloc[[0], :]
        w = w.iloc[[0], :]
    else:
        x = x[:, [0]]
        y = y[[0]]
        w = w[[0]]

    mod = BetweenOLS(y, x, weights=w)
    res = mod.fit(reweight=True, debiased=False)

    dep = mod.dependent.dataframe
    exog = mod.exog.dataframe
    ols = IV2SLS(dep, exog, None, None, weights=mod.weights.values2d)
    ols_res = ols.fit(cov_type='unadjusted')
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type='robust', debiased=False)
    ols_res = ols.fit(cov_type='robust', debiased=False)
    assert_results_equal(res, ols_res)

    clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape),
                            index=dep.index)
    res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
    ols_res = ols.fit(cov_type='clustered', clusters=clusters, debiased=False)
    assert_results_equal(res, ols_res)
def test_results_access(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit(debiased=False)
    d = dir(res)
    for key in d:
        if not key.startswith('_'):
            val = getattr(res, key)
            if callable(val):
                val()
Example #9
0
def test_cluster_error(data):
    mod = BetweenOLS(data.y, data.x)
    clusters = mod.dependent.dataframe.copy()
    clusters.loc[:, :] = 0
    clusters = clusters.astype(np.int32)
    for entity in mod.dependent.entities:
        clusters.loc[entity] = np.random.randint(9)
    clusters.iloc[::7, :] = 0

    with pytest.raises(ValueError):
        mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
Example #10
0
def test_default_clusters(data):
    x = data.x
    y = data.y
    if isinstance(x, pd.Panel):
        x = x.iloc[:, [0], :]
        y = y.iloc[[0], :]
    else:
        x = x[:, [0]]
        y = y[[0]]
    mod = BetweenOLS(y, x)
    res = mod.fit(reweight=True, cov_type='clustered', debiased=False)

    dep = mod.dependent.dataframe
    exog = mod.exog.dataframe
    ols = IV2SLS(dep, exog, None, None)
    ols_res = ols.fit(cov_type='clustered')
    assert_results_equal(res, ols_res)
Example #11
0
def test_missing_weighted(missing_data):
    mod = BetweenOLS(missing_data.y, missing_data.x, weights=missing_data.w)
    res = mod.fit(reweight=True, debiased=False)

    weights = mod.weights.dataframe.groupby(level=0).sum()
    weights = weights.reindex(mod.dependent.entities)

    dep = mod.dependent.dataframe * mod.weights.dataframe.values
    dep = dep.groupby(level=0).sum()
    dep = dep.reindex(mod.dependent.entities)
    dep = dep / weights.values

    exog = mod.weights.dataframe.values * mod.exog.dataframe
    exog = exog.groupby(level=0).sum()
    exog = exog.reindex(mod.dependent.entities)
    exog = (1.0 / weights.values) * exog

    ols = IV2SLS(dep, exog, None, None, weights=weights)
    ols_res = ols.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)
Example #12
0
def test_default_clusters(data):
    x = data.x
    y = data.y
    if isinstance(x, pd.DataFrame):
        time = y.index.levels[1][0]
        y = y.xs(time, level=1, drop_level=False)
        x = x.xs(time, level=1, drop_level=False)
    elif isinstance(x, np.ndarray):
        x = x[:, [0]]
        y = y[[0]]
    else:
        # xarray DataArray
        x = x[:, [0]]
        y = y[:, [0]]
    mod = BetweenOLS(y, x)
    res = mod.fit(reweight=True, cov_type="clustered", debiased=False)

    dep = mod.dependent.dataframe
    exog = mod.exog.dataframe
    ols = IV2SLS(dep, exog, None, None)
    ols_res = ols.fit(cov_type="clustered")
    assert_results_equal(res, ols_res)
Example #13
0
def test_unknown_covariance(data):
    mod = BetweenOLS(data.y, data.x)
    with pytest.raises(KeyError):
        mod.fit(cov_type="unknown")
Example #14
0
def test_alt_rsquared_weighted_missing(missing_data):
    mod = BetweenOLS(missing_data.y, missing_data.x, weights=missing_data.w)
    res = mod.fit(debiased=False)
    assert_allclose(res.rsquared, res.rsquared_between)
Example #15
0
def test_alt_rsquared(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit(debiased=False)
    assert_allclose(res.rsquared, res.rsquared_between)
Example #16
0
def test_results_access(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit(debiased=False)
    access_attributes(res)
def test_extra_df(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit()
    res_extra = mod.fit(extra_df=10)
    assert np.all(np.diag(res_extra.cov) > np.diag(res.cov))