Esempio n. 1
0
def test_mean_missing(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_mean = xpd.mean('entity')
    expected = xpd.dataframe.groupby(level=0).mean()
    expected = expected.loc[xpd.entities]
    expected.columns.name = None
    assert_frame_equal(entity_mean, expected)

    time_mean = xpd.mean('time')
    expected = xpd.dataframe.groupby(level=1).mean()
    expected = expected.loc[xpd.time]
    expected.columns.name = None
    assert_frame_equal(time_mean, expected)
Esempio n. 2
0
def data(request):
    model, vcv, weights, missing = request.param.split('-')
    y_vars = ['y']
    x_vars = ['x1', 'x2', 'x3', 'x4', 'x5']
    vars = y_vars + x_vars
    if missing:
        for i, v in enumerate(vars):
            vars[i] = v + missing
        y_vars = vars[:1]
        x_vars = vars[1:]
    y = sim_data[y_vars]
    x = sim_data[['intercept'] + x_vars]
    mod = MODELS[model]
    mod_options = {}
    if model == 'fixed_effect':
        mod_options = {'entity_effects': True}
    if weights == 'weighted':
        mod_options.update({'weights': sim_data['w']})
    fit_options = {'debiased': True}
    if weights == 'wls':
        fit_options.update({'reweight': True})
    if vcv == 'robust' and model not in ('fixed_effect', 'random_effect'):
        fit_options.update({'cov_type': 'robust'})
    elif vcv in ('cluster', 'robust'):
        y_data = PanelData(y)
        eid = y_data.entity_ids
        entities = pd.DataFrame(eid, index=y_data.index, columns=['firm_ids'])
        fit_options.update({'cov_type': 'clustered', 'clusters': entities})
    else:
        fit_options.update({'cov_type': 'unadjusted'})

    if vcv == 'cluster' or (
            model in ('fixed_effect', 'random_effect') and vcv == 'robust'):
        fit_options.update({'group_debias': True})
    spec_mod = mod(y, x, **mod_options)
    fit = spec_mod.fit(**fit_options)
    return AttrDict(fit=fit, model=spec_mod, model_options=mod_options, y=y,
                    x=x,
                    stata=STATA_RESULTS[request.param],
                    fit_options=fit_options,
                    model_name=model, vcv=vcv, weights=weights,
                    missing=missing)
Esempio n. 3
0
def test_general_demean_oneway(mi_df):
    y = PanelData(mi_df)
    dm1 = y.demean("entity")
    g = DataFrame(y.entity_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean("time")
    g = DataFrame(y.time_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g = Categorical(g.iloc[:, 0])
    d = get_dummies(g)
    dm1 = y.values2d - d @ lstsq(d, y.values2d, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d)
Esempio n. 4
0
def test_general_demean_oneway(panel):
    y = PanelData(panel)
    dm1 = y.demean('entity')
    g = pd.DataFrame(y.entity_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean('time')
    g = pd.DataFrame(y.time_ids, index=y.index)
    dm2 = y.general_demean(g)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g)
    g = pd.Categorical(g.iloc[:, 0])
    d = pd.get_dummies(g)
    dm1 = y.values2d - d @ np.linalg.lstsq(d, y.values2d)[0]
    assert_allclose(dm1, dm2.values2d)
def test_nested_effects(data):
    y = PanelData(data.y)
    effects = pd.DataFrame(y.entity_ids // 2, index=y.index)
    with pytest.raises(ValueError) as exception:
        PanelOLS(data.y, data.x, entity_effects=True, other_effects=effects)
    assert 'entity effects' in str(exception.value)

    effects = pd.DataFrame(y.time_ids // 2, index=y.index)
    with pytest.raises(ValueError) as exception:
        PanelOLS(data.y, data.x, time_effects=True, other_effects=effects)
    assert 'time effects' in str(exception.value)

    effects1 = pd.Series(y.entity_ids.squeeze() // 2, index=y.index)
    effects2 = pd.Series(y.entity_ids.squeeze() // 4, index=y.index)
    effects = pd.DataFrame({'eff1': effects1, 'eff2': effects2})
    with pytest.raises(ValueError) as exception:
        PanelOLS(data.y, data.x, other_effects=effects)
    assert 'by other effects' in str(exception.value)
    assert 'time effects' not in str(exception.value)
    assert 'entity effects' not in str(exception.value)
Esempio n. 6
0
def test_string_conversion():
    t, n = 3, 1000
    string = np.random.choice(['a', 'b', 'c'], (t, n))
    num = np.random.randn(t, n)
    p = pd.Panel({'a': string, 'b': num})
    p = p[['a', 'b']]
    panel = PanelData(p, var_name='OtherEffect')
    df = panel.dataframe
    assert df.shape == (3000, 3)
    s = string.T.ravel()
    a_locs = np.where(s == 'a')
    b_locs = np.where(s == 'b')
    c_locs = np.where(s == 'c')
    assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
    assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
    assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)

    assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
    assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
    assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
Esempio n. 7
0
def test_valid_weight_shape(data):
    # Same size
    n = np.prod(data.y.shape)
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    missing = PanelData(data.y).isnull | PanelData(data.x).isnull
    expected = weights[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per time
    n = data.y.shape[0]
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights[:, None] @ np.ones((1, data.y.shape[1]))
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    # Per entity
    n = data.y.shape[1]
    weights = 1 + np.random.random_sample(n)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = np.ones((data.y.shape[0], 1)) @ weights[None, :]
    expected = expected.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)

    weights = 1 + np.random.random_sample(data.y.shape)
    mod = PanelOLS(data.y, data.x, weights=weights)
    mod.fit()
    w = mod.weights.values2d
    expected = weights.T.ravel()
    expected = expected[~missing.squeeze()][:, None]
    expected = expected / expected.mean()
    assert_equal(w, expected)
Esempio n. 8
0
def test_numpy_3d():
    n, t, k = 11, 7, 3
    x = np.random.random((k, t, n))
    dh = PanelData(x)
    assert_equal(x, dh.values3d)
    assert dh.nentity == n
    assert dh.nobs == t
    assert dh.nvar == k
    assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
    items = ['entity.{0}'.format(i) for i in range(n)]
    obs = [i for i in range(t)]
    var_names = ['x.{0}'.format(i) for i in range(k)]
    expected = pd.Panel(np.reshape(x, (k, t, n)),
                        items=var_names,
                        major_axis=obs,
                        minor_axis=items)
    expected_frame = expected.swapaxes(1, 2).to_frame()
    expected_frame.index.levels[0].name = 'entity'
    expected_frame.index.levels[1].name = 'time'
    assert_frame_equal(dh.dataframe, expected_frame)
Esempio n. 9
0
def test_methods_equivalent(data, lsdv_config):
    other_effects = None
    if lsdv_config.other_effects == 1:
        other_effects = PanelData(data.c).dataframe.iloc[:, [0]]
    elif lsdv_config.other_effects == 2:
        other_effects = data.c
    weights = data.w if lsdv_config.weights else None
    mod = PanelOLS(
        data.y,
        data.x,
        weights=weights,
        entity_effects=lsdv_config.entity_effects,
        time_effects=lsdv_config.time_effects,
        other_effects=other_effects,
    )
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    res3 = mod.fit(use_lsmr=True)
    assert_results_equal(res1, res2)
    assert_results_equal(res2, res3, strict=False)
Esempio n. 10
0
def test_demean_missing_alt_types(data):
    check = isinstance(data.x, (DataFrame, np.ndarray))
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_demean = xpd.demean('entity')
    expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
    assert_frame_equal(entity_demean.dataframe, expected,
                       check_index_type=check,
                       check_column_type=check)

    time_demean = xpd.demean('time')
    expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
    assert_frame_equal(time_demean.dataframe, expected,
                       check_index_type=check,
                       check_column_type=check)
Esempio n. 11
0
def test_lsdv_options(data):
    mod = PanelOLS(data.y, data.x, weights=data.w)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, entity_effect=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effect=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effect=True, entity_effect=True)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    c1 = PanelData(data.c).dataframe.iloc[:, [0]]
    mod = PanelOLS(data.y, data.x, entity_effect=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, time_effect=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, time_effect=True, other_effects=c1)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)

    mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c)
    res1 = mod.fit()
    res2 = mod.fit(use_lsdv=True)
    assert_results_equal(res1, res2)
Esempio n. 12
0
def test_singleton_removal_mixed(singleton_data, other_effects):
    if other_effects == 1:
        other_effects = PanelData(singleton_data.c).dataframe.iloc[:, [0]]
    elif other_effects == 2:
        other_effects = singleton_data.c
    mod = PanelOLS(singleton_data.y,
                   singleton_data.x,
                   other_effects=other_effects)
    res_keep = mod.fit(use_lsmr=True)

    mod = PanelOLS(
        singleton_data.y,
        singleton_data.x,
        other_effects=other_effects,
        singletons=False,
    )
    res = mod.fit(cov_type="clustered",
                  clusters=singleton_data.vc2,
                  use_lsmr=True)
    assert_allclose(res_keep.params, res.params)
    assert res.nobs <= res_keep.nobs
def test_string_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    temp = {}
    prim = ['a', 'b', 'c', 'd', 'e']
    for i in range(2):
        name = 'effect.' + str(i)
        temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase))))

    for i in range(clusters.shape[1]):
        name = 'effect.' + str(i)
        temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type='clustered', clusters=clusters)
Esempio n. 14
0
def test_numpy_3d():
    n, t, k = 11, 7, 3
    x = np.random.random((k, t, n))
    dh = PanelData(x)
    assert_equal(x, dh.values3d)
    assert dh.nentity == n
    assert dh.nobs == t
    assert dh.nvar == k
    assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
    items = ["entity.{0}".format(i) for i in range(n)]
    obs = [i for i in range(t)]
    var_names = ["x.{0}".format(i) for i in range(k)]
    expected_frame = panel_to_frame(
        np.reshape(x, (k, t, n)),
        items=var_names,
        major_axis=obs,
        minor_axis=items,
        swap=True,
    )
    expected_frame.index.set_names(["entity", "time"], inplace=True)
    assert_frame_equal(dh.dataframe, expected_frame)
Esempio n. 15
0
def test_random_effects_small_sample(data):
    y = PanelData(data.y)
    mod = RandomEffects(data.y, data.x)
    no_ss = mod.fit()
    ss = mod.fit(small_sample=True)
    if y.dataframe.shape[0] == mod.dependent.dataframe.shape[0]:
        assert (ss.variance_decomposition.Effects ==
                no_ss.variance_decomposition.Effects)
    else:
        assert (ss.variance_decomposition.Effects !=
                no_ss.variance_decomposition.Effects)

    mod = RandomEffects(data.y, data.x, weights=data.w)
    no_ss = mod.fit()
    ss = mod.fit(small_sample=True)
    if y.dataframe.shape[0] == mod.dependent.dataframe.shape[0]:
        assert (ss.variance_decomposition.Effects ==
                no_ss.variance_decomposition.Effects)
    else:
        assert (ss.variance_decomposition.Effects !=
                no_ss.variance_decomposition.Effects)
Esempio n. 16
0
def test_count(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_mean = xpd.count("entity")
    expected = xpd.dataframe.groupby(level=0).count()
    expected = expected.loc[xpd.entities]
    expected.columns.name = None
    expected = expected.astype(np.int64)
    assert_frame_equal(entity_mean, expected)

    time_mean = xpd.count("time")
    expected = xpd.dataframe.groupby(level=1).count()
    expected = expected.loc[xpd.time]
    expected.columns.name = None
    expected = expected.astype(np.int64)
    assert_frame_equal(time_mean, expected)
Esempio n. 17
0
def test_mixed_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    effects = np.random.randint(0, 5, size=nt)
    prim = ["a", "b", "c", "d", "e"]
    temp = {
        "effect.0": pd.Categorical(pd.Series(effects, index=y.index)),
        "effect.1": pd.Series(np.random.choice(prim, size=nt), index=y.index),
    }
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(
        map(lambda s: "".join(s), list(product(ascii_lowercase, ascii_lowercase)))
    )
    temp["var.cluster.0"] = pd.Series(np.random.choice(prim, size=nt), index=y.index)
    temp["var.cluster.1"] = pd.Series(clusters[:, 1], index=y.index)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type="clustered", clusters=clusters)
def test_mixed_input(data):
    y = PanelData(data.y)
    nt = y.values2d.shape[0]
    effects = np.random.randint(0, 5, size=nt)
    prim = ['a', 'b', 'c', 'd', 'e']
    temp = {
        'effect.0': pd.Categorical(pd.Series(effects, index=y.index)),
        'effect.1': pd.Series(np.random.choice(prim, size=nt), index=y.index)
    }
    effects = pd.DataFrame(temp, index=y.index)
    mod = PanelOLS(data.y, data.x, other_effects=effects)
    mod.fit()

    clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
    temp = {}
    prim = list(
        map(lambda s: ''.join(s),
            list(product(ascii_lowercase, ascii_lowercase))))
    temp['var.cluster.0'] = pd.Series(np.random.choice(prim, size=nt),
                                      index=y.index)
    temp['var.cluster.1'] = pd.Series(clusters[:, 1], index=y.index)
    clusters = pd.DataFrame(temp, index=y.index)
    mod.fit(cov_type='clustered', clusters=clusters)
Esempio n. 19
0
def test_results_access(data):
    mod = PanelOLS(data.y, data.x, entity_effects=True)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x, other_effects=data.c)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x, time_effects=True, entity_effects=True)
    res = mod.fit()
    access_attributes(res)

    mod = PanelOLS(data.y, data.x)
    res = mod.fit()
    access_attributes(res)

    const = PanelData(data.y).copy()
    const.dataframe.iloc[:, :] = 1
    const.dataframe.columns = ["const"]
    mod = PanelOLS(data.y, const)
    res = mod.fit()
    access_attributes(res)
Esempio n. 20
0
def test_multiple_obs_per_entity(data):
    mod = BetweenOLS(data.y, data.x)
    res = mod.fit(reweight=True, debiased=False)

    dep = mod.dependent.values3d.mean(1).T
    exog = pd.DataFrame(mod.exog.values3d.mean(1).T, columns=mod.exog.vars)
    ols = IV2SLS(dep, exog, None, None)
    ols_res = ols.fit(cov_type="unadjusted")
    assert_results_equal(res, ols_res)

    res = mod.fit(cov_type="robust", debiased=False)
    ols_res = ols.fit(cov_type="robust", debiased=False)
    assert_results_equal(res, ols_res)

    clusters = mod.dependent.dataframe.copy()
    clusters.loc[:, :] = 0
    clusters = clusters.astype(np.int32)
    for entity in mod.dependent.entities:
        clusters.loc[entity] = np.random.randint(9)

    ols_clusters = PanelData(clusters).values3d.mean(1).T.astype(np.int32)
    res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
    ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
    assert_results_equal(res, ols_res)
Esempio n. 21
0
def test_two_way_clustering(data):
    mod = PooledOLS(data.y, data.x)

    y = PanelData(data.y)
    entity_clusters = pd.DataFrame(y.entity_ids, index=y.index)
    vc1 = PanelData(data.vc1)
    clusters = vc1.copy()
    clusters.dataframe['var.cluster.entity'] = entity_clusters
    clusters._frame = clusters._frame.astype(np.int64)
    res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)

    y = mod.dependent.dataframe.copy()
    x = mod.exog.dataframe.copy()
    y.index = np.arange(len(y))
    x.index = y.index
    clusters = mod.reformat_clusters(clusters)

    ols_mod = IV2SLS(y, x, None, None)
    ols_res = ols_mod.fit(cov_type='clustered', clusters=clusters.dataframe)
    assert_results_equal(res, ols_res)
Esempio n. 22
0
def test_incorrect_time_axis():
    x = np.random.randn(3, 3, 1000)
    entities = ['entity.{0}'.format(i) for i in range(1000)]
    time = ['time.{0}' for i in range(3)]
    vars = ['var.{0}' for i in range(3)]
    p = pd.Panel(x, items=vars, major_axis=time, minor_axis=entities)
    with pytest.raises(ValueError):
        PanelData(p)
    df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
    with pytest.raises(ValueError):
        PanelData(df)
    da = xr.DataArray(x,
                      coords={
                          'entities': entities,
                          'time': time,
                          'vars': vars
                      },
                      dims=['vars', 'time', 'entities'])
    with pytest.raises(ValueError):
        PanelData(da)

    time = [1, pd.datetime(1960, 1, 1), 'a']
    vars = ['var.{0}' for i in range(3)]
    p = pd.Panel(x, items=vars, major_axis=time, minor_axis=entities)
    with pytest.raises(ValueError):
        PanelData(p)
    df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
    with pytest.raises(ValueError):
        PanelData(df)
    da = xr.DataArray(x,
                      coords={
                          'entities': entities,
                          'time': time,
                          'vars': vars
                      },
                      dims=['vars', 'time', 'entities'])
    with pytest.raises(ValueError):
        PanelData(da)
Esempio n. 23
0
def test_general_unit_weighted_demean_twoway(panel):
    np.random.seed(12345)
    y = PanelData(panel)
    weights = pd.DataFrame(np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index)
    w = PanelData(weights)

    dm1 = y.demean('both', weights=w)
    g = pd.DataFrame(y.entity_ids, index=y.index)
    g['column2'] = pd.Series(y.time_ids.squeeze(), index=y.index)
    dm2 = y.general_demean(g, weights=w)
    assert_allclose(dm1.values2d - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)

    g = pd.DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
    dm2 = y.general_demean(g, weights=w)
    g1 = pd.Categorical(g.iloc[:, 0])
    d1 = pd.get_dummies(g1)
    g2 = pd.Categorical(g.iloc[:, 1])
    d2 = pd.get_dummies(g2, drop_first=True)
    d = np.c_[d1, d2]
    wd = np.sqrt(w.values2d) * d
    wy = np.sqrt(w.values2d) * y.values2d
    dm1 = wy - wd @ np.linalg.lstsq(wd, wy)[0]
    assert_allclose(dm1 - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)
Esempio n. 24
0
def test_numpy_1d():
    n = 11
    x = np.random.random(n)
    with pytest.raises(ValueError):
        PanelData(x)
Esempio n. 25
0
def test_general_weighted_demean_oneway(mi_df):
    y = PanelData(mi_df)
    weights = DataFrame(np.random.chisquare(10,
                                            (y.dataframe.shape[0], 1)) / 10,
                        index=y.index)
    w = PanelData(weights)

    dm1 = y.demean("entity", weights=w)
    g = PanelData(DataFrame(y.entity_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    dm1 = y.demean("time", weights=w)
    g = PanelData(DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, w)
    assert_allclose(dm1.values2d, dm2.values2d)

    g = PanelData(
        DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
    dm2 = y.general_demean(g, w)
    g = Categorical(g.dataframe.iloc[:, 0])
    d = get_dummies(g)
    wd = np.sqrt(w.values2d) * d
    wy = np.sqrt(w.values2d) * y.values2d
    dm1 = wy - wd @ lstsq(wd, wy, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d, atol=1e-14)
Esempio n. 26
0
def test_general_unit_weighted_demean_oneway(mi_df):
    y = PanelData(mi_df)
    dm1 = y.demean("entity")
    g = PanelData(DataFrame(y.entity_ids, index=y.index))
    weights = PanelData(g).copy()
    weights.dataframe.iloc[:, :] = 1
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    dm1 = y.demean("time")
    g = PanelData(DataFrame(y.time_ids, index=y.index))
    dm2 = y.general_demean(g, weights)
    assert_allclose(dm1.values2d, dm2.values2d)
    dm3 = y.general_demean(g)
    assert_allclose(dm3.values2d, dm2.values2d)

    g = PanelData(
        DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
    dm2 = y.general_demean(g, weights)
    dm3 = y.general_demean(g)
    g = Categorical(g.dataframe.iloc[:, 0])
    d = get_dummies(g)
    dm1 = y.values2d - d @ lstsq(d, y.values2d, rcond=None)[0]
    assert_allclose(dm1, dm2.values2d)
    assert_allclose(dm3.values2d, dm2.values2d)
Esempio n. 27
0
def test_repr_html(mi_df):
    data = PanelData(mi_df)
    html = data._repr_html_()
    assert "<br/>" in html
Esempio n. 28
0
def test_demean_simple_weighted(data):
    x = PanelData(data.x)
    w = PanelData(data.w)
    missing = x.isnull | w.isnull
    x.drop(missing)
    w.drop(missing)
    w.dataframe.iloc[:, 0] = 1
    unweighted_entity_demean = x.demean("entity")
    weighted_entity_demean = x.demean("entity", weights=w)
    assert_allclose(unweighted_entity_demean.dataframe,
                    weighted_entity_demean.dataframe)

    unweighted_entity_demean = x.demean("time")
    weighted_entity_demean = x.demean("time", weights=w)
    assert_allclose(unweighted_entity_demean.dataframe,
                    weighted_entity_demean.dataframe)
Esempio n. 29
0
def test_first_difference(data):
    x = PanelData(data.x)
    x.first_difference()
Esempio n. 30
0
def test_invalid_seires(mi_df):
    si = mi_df.reset_index()
    with pytest.raises(ValueError):
        PanelData(si.iloc[:, 0])