コード例 #1
0
def test_categorical_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    effects = np.random.randint(0, 5, size=(nt, 2))
    temp = {}
    for i, e in enumerate(effects.T):
        name = "effect." + str(i)
        temp[name] = pd.Categorical(pd.Series(e, index=y.index, name=name))
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    for i, c in enumerate(clusters.T):
        name = "effect." + str(i)
        temp[name] = pd.Categorical(pd.Series(c, index=y.index, name=name))
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type="clustered", clusters=clusters)
コード例 #2
0
def test_incorrect_type(data):
    dependent = data.set_index(['nr', 'year']).lwage
    exog = sm.add_constant(
        data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
    mod = PanelOLS(dependent, exog)
    res = mod.fit()
    mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None)
    res2 = mod2.fit()
    with pytest.raises(TypeError):
        compare(dict(model1=res, model2=res2))
コード例 #3
0
def test_mixed_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    effects = np.random.randint(0, 5, size=(nt))
    prim = ['a', 'b', 'c', 'd', 'e']
    temp = {}
    temp['effect.0'] = pd.Categorical(pd.Series(effects, index=y.index))
    temp['effect.1'] = pd.Series(np.random.choice(prim, size=(nt)), index=y.index)
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase))))
    temp['var.cluster.0'] = pd.Series(np.random.choice(prim, size=(nt)), index=y.index)
    temp['var.cluster.1'] = pd.Series(clusters[:, 1], index=y.index)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type='clustered', clusters=clusters)
コード例 #4
0
def test_incorrect_type(data):
    dependent = data.set_index(["nr", "year"]).lwage
    exog = add_constant(
        data.set_index(["nr", "year"])[["expersq", "married", "union"]])
    mod = PanelOLS(dependent, exog)
    res = mod.fit()
    mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None)
    res2 = mod2.fit()
    with pytest.raises(TypeError):
        compare(dict(model1=res, model2=res2))
コード例 #5
0
def test_predict_no_selection(generated_data):
    mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
    res = mod.fit()
    with pytest.raises(ValueError):
        res.predict(fitted=False)
    with pytest.raises(ValueError):
        res.predict(fitted=False,
                    effects=False,
                    idiosyncratic=False,
                    missing=True)
コード例 #6
0
def test_singleton_removal_mixed(singleton_data, other_effects):
    if other_effects == 1:
        other_effects = PanelData(singleton_data.c).dataframe.iloc[:, [0]]
    elif other_effects == 2:
        other_effects = singleton_data.c
    mod = PanelOLS(singleton_data.y,
                   singleton_data.x,
                   other_effects=other_effects)
    res_keep = mod.fit(use_lsmr=True)

    mod = PanelOLS(singleton_data.y,
                   singleton_data.x,
                   other_effects=other_effects,
                   singletons=False)
    res = mod.fit(cov_type='clustered',
                  clusters=singleton_data.vc2,
                  use_lsmr=True)
    assert_allclose(res_keep.params, res.params)
    assert res.nobs <= res_keep.nobs
コード例 #7
0
def test_methods_equivalent(data, lsdv_config):
    other_effects = None
    if lsdv_config.other_effects == 1:
        other_effects = PanelData(data.c).dataframe.iloc[:, [0]]
    elif lsdv_config.other_effects == 2:
        other_effects = data.c
    weights = data.w if lsdv_config.weights else None
    mod = PanelOLS(
        data.y,
        data.x,
        weights=weights,
        entity_effects=lsdv_config.entity_effects,
        time_effects=lsdv_config.time_effects,
        other_effects=other_effects,
    )
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    res3 = mod.fit(use_lsmr=True)
    assert_results_equal(res1, res2)
    assert_results_equal(res2, res3, strict=False)
コード例 #8
0
def test_predict(generated_data):
    mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
    res = mod.fit()
    pred = res.predict()
    nobs = mod.dependent.dataframe.shape[0]
    assert list(pred.columns) == ["fitted_values"]
    assert pred.shape == (nobs, 1)
    pred = res.predict(effects=True, idiosyncratic=True)
    assert list(pred.columns) == [
        "fitted_values", "estimated_effects", "idiosyncratic"
    ]
    assert pred.shape == (nobs, 3)
    assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0])
    assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:,
                                                                           0])
    assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0])
    pred = res.predict(effects=True, idiosyncratic=True, missing=True)
    assert list(pred.columns) == [
        "fitted_values", "estimated_effects", "idiosyncratic"
    ]
    assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)

    mod = PanelOLS(generated_data.y, generated_data.x)
    res = mod.fit()
    pred = res.predict()
    assert list(pred.columns) == ["fitted_values"]
    assert pred.shape == (nobs, 1)
    pred = res.predict(effects=True, idiosyncratic=True)
    assert list(pred.columns) == [
        "fitted_values", "estimated_effects", "idiosyncratic"
    ]
    assert pred.shape == (nobs, 3)
    assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0])
    assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:,
                                                                           0])
    assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0])
    pred = res.predict(effects=True, idiosyncratic=True, missing=True)
    assert list(pred.columns) == [
        "fitted_values", "estimated_effects", "idiosyncratic"
    ]
    assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)
コード例 #9
0
def test_string_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    temp = {}
    prim = ['a', 'b', 'c', 'd', 'e']
    for i in range(2):
        name = 'effect.' + str(i)
        temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase))))

    for i in range(clusters.shape[1]):
        name = 'effect.' + str(i)
        temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type='clustered', clusters=clusters)
コード例 #10
0
ファイル: test_model.py プロジェクト: peteos123/linearmodels
def test_pickle(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True)
    remod = pickle.loads(pickle.dumps(mod))
    res = mod.fit()
    reres = remod.fit()
    rereres = pickle.loads(pickle.dumps(res))
    assert_allclose(res.params, reres.params)
    assert_allclose(res.params, rereres.params)
    assert_allclose(res.cov, reres.cov)
    assert_allclose(res.cov, rereres.cov)
    assert isinstance(res.f_statistic_robust, WaldTestStatistic)
    assert isinstance(reres.f_statistic_robust, WaldTestStatistic)
    assert isinstance(res.f_statistic_robust, WaldTestStatistic)
コード例 #11
0
def test_absorbed(absorbed_data):
    mod = PanelOLS(absorbed_data.y,
                   absorbed_data.x,
                   drop_absorbed=True,
                   entity_effects=True)
    if isinstance(absorbed_data.y, pd.DataFrame):
        match = "x_absorbed"
    else:
        match = "Exog.3"
    with pytest.warns(AbsorbingEffectWarning, match=match):
        res = mod.fit()
    if isinstance(absorbed_data.x, np.ndarray):
        x = absorbed_data.x[:-1]
    else:
        x = absorbed_data.x.iloc[:, :-1]
    mod = PanelOLS(absorbed_data.y,
                   x,
                   drop_absorbed=False,
                   entity_effects=True)
    res_no = mod.fit()
    assert_allclose(res.params, res_no.params)
    assert_results_equal(res, res_no)
コード例 #12
0
def test_panel_time_fwl(data):
    mod = PanelOLS(data.y, data.x, time_effects=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    d = mod.dependent.dummies('time', drop_first=mod.has_constant)
    d = d.values
    if mod.has_constant:
        z = np.ones_like(y)
        d = d - z @ lstsq(z, d)[0]

    x = x - d @ lstsq(d, x)[0]
    y = y - d @ lstsq(d, y)[0]

    ols_mod = IV2SLS(y, x, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_df=False)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_df=False)
コード例 #13
0
def test_mixed_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    effects = np.random.randint(0, 5, size=nt)
    prim = ["a", "b", "c", "d", "e"]
    temp = {
        "effect.0": pd.Categorical(pd.Series(effects, index=y.index)),
        "effect.1": pd.Series(np.random.choice(prim, size=nt), index=y.index),
    }
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(
        map(lambda s: "".join(s), list(product(ascii_lowercase, ascii_lowercase)))
    )
    temp["var.cluster.0"] = pd.Series(np.random.choice(prim, size=nt), index=y.index)
    temp["var.cluster.1"] = pd.Series(clusters[:, 1], index=y.index)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type="clustered", clusters=clusters)
コード例 #14
0
def test_panel_entity_lsdv_weighted(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True, weights=data.w)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    w = mod.weights.dataframe
    d = mod.dependent.dummies('entity', drop_first=mod.has_constant)
    d_cols = d.columns
    d = d.values
    if mod.has_constant:
        z = np.ones_like(y)
        root_w = np.sqrt(w.values)
        wd = root_w * d
        wz = root_w * z
        d = d - z @ lstsq(wz, wd)[0]

    xd = np.c_[x.values, d]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols))

    ols_mod = IV2SLS(y, xd, None, None, weights=w)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_fit=False)
    assert_allclose(res.rsquared_inclusive, res2.rsquared)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc2
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
コード例 #15
0
def test_results_access(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x, other_effects=data.c)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x)
    res = mod.fit()
    access_attributes(res)

    const = PanelData(data.y).copy()
    const.dataframe.iloc[:, :] = 1
    const.dataframe.columns = ["const"]
    mod = PanelOLS(data.y, const)
    res = mod.fit()
    access_attributes(res)
コード例 #16
0
def test_masked_singleton_removal():
    nobs = 8
    entities = ["A", "B", "C", "D"] * 2
    times = [0, 1, 1, 1, 1, 2, 2, 2]
    index = pd.MultiIndex.from_arrays((entities, times))
    x = pd.DataFrame(np.random.randn(nobs, 1), index=index, columns=["x"])
    y = pd.DataFrame(np.random.randn(nobs, 1), index=index)
    mod = PanelOLS(y,
                   x,
                   singletons=False,
                   entity_effects=True,
                   time_effects=True)
    res = mod.fit()
    assert res.nobs == 6
コード例 #17
0
def test_const_data_entity(const_data):
    y, x = const_data.y, const_data.x
    mod = PanelOLS(y, x, entity_effects=True)
    res = mod.fit(debiased=False)

    x = mod.exog.dataframe
    d = mod.dependent.dummies('entity', drop_first=True)
    d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0]

    xd = np.c_[x.values, d.values]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns))

    res2 = IV2SLS(mod.dependent.dataframe, xd, None, None).fit()
    assert_allclose(res.params, res2.params.iloc[:1])
コード例 #18
0
def test_panel_both_lsdv(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant)
    d2 = mod.dependent.dummies('time', drop_first=True)
    d = np.c_[d1.values, d2.values]

    if mod.has_constant:
        z = np.ones_like(y)
        d = d - z @ lstsq(z, d)[0]

    xd = np.c_[x.values, d]
    xd = pd.DataFrame(xd,
                      index=x.index,
                      columns=list(x.columns) + list(d1.columns) + list(d2.columns))

    ols_mod = IV2SLS(y, xd, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_fit=False)
    assert_allclose(res.rsquared_inclusive, res2.rsquared)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc1
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    clusters = data.vc2
    ols_clusters = mod.reformat_clusters(clusters)
    res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
                  debiased=False)
    res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.time_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)

    res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
                  debiased=False)
    clusters = pd.DataFrame(mod.dependent.entity_ids,
                            index=mod.dependent.index,
                            columns=['var.clust'])
    res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
    assert_results_equal(res, res2, test_fit=False)
コード例 #19
0
def areg(df, y_var, X_vars, absorb_var, add_intercept=True):
    """
    This function replicates areg in STATA.

    Inputs.
    ---------
    df:pd.DataFrame, the data for OLS.
    y_var:str, the column name of the dependent variable
    X_vars:list of str, the list of explanatory variable names (column names in df)
    g_var:str, the name of the column (varible) to be absorbed.
          The g_var column in df should be only contain categorical values (
          df中g_var列应只含有离散值,可以是str,float或者int,比如公司名,公司代码,年份,
          数值不可以是连续变量的值,如温度,股票回报等等。如果数值是连续变量的值,程序包也会执行,
          但模型不具有经济学意义).

    Outputs.
    ---------
    res:obj

    """
    new_df = df.copy()
    # new_df = df.dropna()

    new_df['time_index'] = 1.0
    new_df['entity_index'] = new_df[absorb_var]

    new_df = new_df.set_index(['entity_index',
                               'time_index'])  # entity first, and then year

    # 因变量
    y = new_df[y_var]

    # 解释变量集合
    if add_intercept:
        new_df['intercept'] = 1.0
        X = new_df[['intercept'] + X_vars]
    else:
        X = new_df[X_vars]

    #  weights: 权重变量,暂时没用; entity_effects: 把g_var转为多个dummy variables,然后将它们加入解释变量集合; time_effects: 忽视time index
    areg = PanelOLS(dependent=y,
                    exog=X,
                    weights=None,
                    entity_effects=True,
                    time_effects=False,
                    singletons=False,
                    drop_absorbed=True)
    res = areg.fit()
    return res
コード例 #20
0
def test_predict_exception(generated_data):
    if np.any(np.isnan(generated_data.x)):
        pytest.skip("Cannot test with missing values")
    mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
    res = mod.fit()
    pred = res.predict()
    pred2 = res.predict(generated_data.x)
    assert_allclose(pred, pred2, atol=1e-3)

    panel_data = PanelData(generated_data.x, copy=True)
    x = panel_data.dataframe
    x.index = np.arange(x.shape[0])
    with pytest.raises(ValueError,
                       match="exog does not have the correct number"):
        res.predict(x)
コード例 #21
0
def test_panel_entity_fwl(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit(auto_df=False, count_effects=False, debiased=False)

    y = mod.dependent.dataframe
    x = mod.exog.dataframe
    if mod.has_constant:
        d = mod.dependent.dummies('entity', drop_first=True)
        z = np.ones_like(y)
        d_demean = d.values - z @ lstsq(z, d.values)[0]
    else:
        d = mod.dependent.dummies('entity', drop_first=False)
        d_demean = d.values

    x = x - d_demean @ lstsq(d_demean, x)[0]
    y = y - d_demean @ lstsq(d_demean, y)[0]

    ols_mod = IV2SLS(y, x, None, None)
    res2 = ols_mod.fit(cov_type='unadjusted')
    assert_results_equal(res, res2, test_df=False)

    res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
    res2 = ols_mod.fit(cov_type='robust')
    assert_results_equal(res, res2, test_df=False)
コード例 #22
0
def test_singleton_removal():
    entities = []
    for i in range(6):
        entities.extend(["entity.{j}".format(j=j) for j in range(6 - i)])
    nobs = len(entities)
    times = np.arange(nobs) % 6
    index = pd.MultiIndex.from_arrays((entities, times))
    cols = ["x{0}".format(i) for i in range(3)]
    x = pd.DataFrame(np.random.randn(nobs, 3), index=index, columns=cols)
    y = pd.DataFrame(np.random.randn(nobs, 1), index=index)
    mod = PanelOLS(y,
                   x,
                   singletons=False,
                   entity_effects=True,
                   time_effects=True)
    res = mod.fit()

    mod = PanelOLS(y,
                   x,
                   singletons=True,
                   entity_effects=True,
                   time_effects=True)
    res_with = mod.fit()
    assert_allclose(res.params, res_with.params)
コード例 #23
0
def test_panel_effects_sanity(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit(auto_df=False, count_effects=False)
    fitted = mod.exog.values2d @ res.params.values[:, None]
    expected = fitted
    expected += res.resids.values[:, None]
    expected += res.estimated_effects.values
    assert_allclose(mod.dependent.values2d, expected)

    mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True)
    res = mod.fit(auto_df=False, count_effects=False)
    fitted = mod.exog.values2d @ res.params.values[:, None]
    expected = fitted
    expected += res.resids.values[:, None]
    expected += res.estimated_effects.values
    assert_allclose(mod.dependent.values2d, expected)

    mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True)
    res = mod.fit(auto_df=False, count_effects=False)
    fitted = mod.exog.values2d @ res.params.values[:, None]
    expected = fitted
    expected += res.resids.values[:, None]
    expected += res.estimated_effects.values
    assert_allclose(mod.dependent.values2d, expected)

    mod = PanelOLS(data.y,
                   data.x,
                   weights=data.w,
                   entity_effects=True,
                   time_effects=True)
    res = mod.fit(auto_df=False, count_effects=False)
    fitted = mod.exog.values2d @ res.params.values[:, None]
    expected = fitted
    expected += res.resids.values[:, None]
    expected += res.estimated_effects.values
    assert_allclose(mod.dependent.values2d, expected)
コード例 #24
0
def xtreg(df, y_var, other_X_vars, fix1, fix2=None, add_intercept=True):
    """
    This function replicates xtreg in STATA, for linear fixed effect model.
    至少有一个固定效应变量,至多只能有两个。

    Inputs.
    ---------
    df:pd.DataFrame, the data for OLS.
    y_var:str, the column name of the dependent variable
    other_X_vars:list of str, the list of explanatory variable names (除固定效应变量之外的解释变量列表)
    fix1:str, the column name of the first fix effect variable (第一个固定效应变量名)
    fix2:str or None, the column name of the second fix effect variable (if there is one) (第二个固定效应变量名)

    Outputs.
    ---------
    res:obj

    """
    new_df = df.copy()
    new_df = new_df.dropna()

    if fix2 is None:
        # new_df.dropna(subset=[fix1], inplace=True)
        fix2 = 'time_index'
        fix2_effect = False
        new_df[fix2] = 1.0
    else:
        # new_df.dropna(subset=[fix1, fix2], inplace=True)
        fix2_effect = True

    new_df = new_df.set_index([fix1, fix2])  # entity first, and then year
    y = new_df[y_var]

    if add_intercept:
        new_df['intercept'] = 1.0
        X = new_df[['intercept'] + other_X_vars]
    else:
        X = new_df[other_X_vars]

    xtreg = PanelOLS(dependent=y,
                     exog=X,
                     weights=None,
                     entity_effects=True,
                     time_effects=fix2_effect,
                     other_effects=None,
                     drop_absorbed=True)
    res = xtreg.fit()
    return res
コード例 #25
0
def test_valid_weight_shape(data):
    # Same size
    n = np.prod(data.y.shape)
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    missing = PanelData(data.y).isnull | PanelData(data.x).isnull
    expected = weights[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per time
    n = data.y.shape[0]
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights[:, None] @ np.ones((1, data.y.shape[1]))
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per entity
    n = data.y.shape[1]
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = np.ones((data.y.shape[0], 1)) @ weights[None, :]
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    weights = 1 + np.random.random_sample(data.y.shape)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)
コード例 #26
0
def test_const_data_both(const_data):
    y, x = const_data.y, const_data.x
    mod = PanelOLS(y, x, entity_effects=True, time_effects=True)
    res = mod.fit(debiased=False)

    x = mod.exog.dataframe
    d1 = mod.dependent.dummies('entity', drop_first=True)
    d1.columns = ['d.entity.{0}'.format(i) for i in d1]
    d2 = mod.dependent.dummies('time', drop_first=True)
    d2.columns = ['d.time.{0}'.format(i) for i in d2]
    d = np.c_[d1.values, d2.values]
    d = pd.DataFrame(d, index=x.index, columns=list(d1.columns) + list(d2.columns))
    d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0]

    xd = np.c_[x.values, d.values]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns))

    res2 = IV2SLS(mod.dependent.dataframe, xd, None, None).fit()
    assert_allclose(res.params, res2.params.iloc[:1])
コード例 #27
0
def test_fitted_effects_residuals(data, entity_eff, time_eff):
    mod = PanelOLS(data.y, data.x,
                   entity_effects=entity_eff,
                   time_effects=entity_eff)
    res = mod.fit()

    expected = mod.exog.values2d @ res.params.values
    expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values'])
    assert_allclose(res.fitted_values, expected)
    assert_frame_similar(res.fitted_values, expected)

    expected.iloc[:, 0] = res.resids
    expected.columns = ['idiosyncratic']
    assert_allclose(res.idiosyncratic, expected)
    assert_frame_similar(res.idiosyncratic, expected)

    fitted_error = res.fitted_values + res.idiosyncratic.values
    expected.iloc[:, 0] = mod.dependent.values2d - fitted_error
    expected.columns = ['estimated_effects']
    assert_allclose(res.estimated_effects, expected, atol=1e-8)
    assert_frame_similar(res.estimated_effects, expected)
コード例 #28
0
def test_const_data_entity_weights(const_data):
    y, x = const_data.y, const_data.x
    mod = PanelOLS(y, x, entity_effects=True, weights=const_data.w)
    res = mod.fit(debiased=False)

    y = mod.dependent.dataframe
    w = mod.weights.dataframe
    x = mod.exog.dataframe
    d = mod.dependent.dummies('entity', drop_first=True)
    d_columns = list(d.columns)

    root_w = np.sqrt(w.values)
    z = np.ones_like(x)
    wd = root_w * d.values
    wz = root_w
    d = d - z @ lstsq(wz, wd)[0]

    xd = np.c_[x.values, d.values]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_columns)

    res2 = IV2SLS(y, xd, None, None, weights=w).fit()
    assert_allclose(res.params, res2.params.iloc[:1])
コード例 #29
0
def test_const_data_both_weights(const_data):
    y, x = const_data.y, const_data.x
    mod = PanelOLS(y, x, entity_effect=True, time_effect=True, weights=const_data.w)
    res = mod.fit(debiased=False)

    w = mod.weights.dataframe
    x = mod.exog.dataframe

    d1 = mod.dependent.dummies('entity', drop_first=True)
    d2 = mod.dependent.dummies('time', drop_first=True)
    d = np.c_[d1.values, d2.values]
    root_w = np.sqrt(w.values)
    z = np.ones_like(x)
    wd = root_w * d
    wz = root_w
    d = d - z @ np.linalg.lstsq(wz, wd)[0]
    d = pd.DataFrame(d, index=x.index, columns=list(d1.columns) + list(d2.columns))

    xd = np.c_[x.values, d.values]
    xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns))

    res2 = IV2SLS(mod.dependent.dataframe, xd, None, None, weights=w).fit()
    assert_allclose(res.params, res2.params.iloc[:1])
コード例 #30
0
def test_cov_equiv_cluster(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit(cov_type='clustered', cluster_entity=True, debiased=False)

    y = PanelData(data.y)
    clusters = pd.DataFrame(y.entity_ids, index=y.index)
    res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
    assert_results_equal(res, res2)

    mod = PanelOLS(data.y, data.x, time_effects=True)
    res = mod.fit(cov_type='clustered', cluster_time=True, debiased=False)
    y = PanelData(data.y)
    clusters = pd.DataFrame(y.time_ids, index=y.index)
    res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
    assert_results_equal(res, res2)

    res = mod.fit(cov_type='clustered', debiased=False)
    res2 = mod.fit(cov_type='clustered', clusters=None, debiased=False)
    assert_results_equal(res, res2)