Ejemplo n.º 1
0
def test_parser(config):
    fmla, model, interface = config
    parser = SystemFormulaParser(fmla, joined, eval_env=5)
    orig_data = parser.data
    assert isinstance(orig_data, OrderedDict)
    assert parser.eval_env == 5

    parser.eval_env = 4
    assert parser.eval_env == 4
    exog = parser.exog
    dep = parser.dependent
    endog = parser.endog
    instr = parser.instruments
    for key in orig_data:
        eq = orig_data[key]
        assert_frame_equal(exog[key], eq['exog'])
        assert_frame_equal(dep[key], eq['dependent'])
        assert_frame_equal(endog[key], eq['endog'])
        assert_frame_equal(instr[key], eq['instruments'])

    labels = parser.equation_labels
    for label in labels:
        assert label in orig_data
    new_parser = SystemFormulaParser(parser.formula, joined, eval_env=5)

    new_data = new_parser.data
    for key in orig_data:
        eq1 = orig_data[key]
        eq2 = new_data[key]
        for key in eq1:
            if eq1[key] is not None:
                assert_frame_equal(eq1[key], eq2[key])
Ejemplo n.º 2
0
def test_parser(data, formula, effects):
    if not isinstance(data.y, pd.DataFrame):
        return
    if effects:
        formula += ' + EntityEffects + TimeEffects'
    joined = data.x
    joined['y'] = data.y
    parser = PanelFormulaParser(formula, joined)
    dep, exog = parser.data
    assert_frame_equal(parser.dependent, dep)
    assert_frame_equal(parser.exog, exog)
    parser.eval_env = 3
    assert parser.eval_env == 3
    parser.eval_env = 2
    assert parser.eval_env == 2
    assert parser.entity_effect == ('EntityEffects' in formula)
    assert parser.time_effect == ('TimeEffects' in formula)

    formula += ' + FixedEffects '
    if effects:
        with pytest.raises(ValueError):
            PanelFormulaParser(formula, joined)
    else:
        parser = PanelFormulaParser(formula, joined)
        assert parser.entity_effect
Ejemplo n.º 3
0
def assert_results_equal(res1, res2, test_fit=True, test_df=True):
    n = min(res1.params.shape[0], res2.params.shape[0])

    assert_series_equal(res1.params.iloc[:n], res2.params.iloc[:n])
    assert_series_equal(res1.pvalues.iloc[:n], res2.pvalues.iloc[:n])
    assert_series_equal(res1.tstats.iloc[:n], res2.tstats.iloc[:n])
    assert_frame_equal(res1.cov.iloc[:n, :n], res2.cov.iloc[:n, :n])
    assert_frame_equal(res1.conf_int().iloc[:n], res2.conf_int().iloc[:n])
    assert_allclose(res1.s2, res2.s2)

    delta = 1 + (res1.resids.values - res2.resids.values) / max(
        res1.resids.std(), res2.resids.std())
    assert_allclose(delta, np.ones_like(delta))
    delta = 1 + (res1.wresids.values - res2.wresids.values) / max(
        res1.wresids.std(), res2.wresids.std())
    assert_allclose(delta, np.ones_like(delta))

    if test_df:
        assert_allclose(res1.df_model, res2.df_model)
        assert_allclose(res1.df_resid, res2.df_resid)

    if test_fit:
        assert_allclose(res1.rsquared, res2.rsquared)
        assert_allclose(res1.total_ss, res2.total_ss)
        assert_allclose(res1.resid_ss, res2.resid_ss)
        assert_allclose(res1.model_ss, res2.model_ss)
Ejemplo n.º 4
0
def test_demean_missing_alt_types(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_demean = xpd.demean('entity')
    expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
    assert_frame_equal(entity_demean.dataframe, expected)

    time_demean = xpd.demean('time')
    expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
    assert_frame_equal(time_demean.dataframe, expected)
Ejemplo n.º 5
0
def test_existing_panel_data():
    n, t, k = 11, 7, 3
    x = np.random.random((k, t, n))
    major = pd.date_range('12-31-1999', periods=7)
    items = ['var.{0}'.format(i) for i in range(1, k + 1)]
    minor = ['entities.{0}'.format(i) for i in range(1, n + 1)]
    x = pd.Panel(x, items=items, major_axis=major, minor_axis=minor)
    dh = PanelData(x)
    dh2 = PanelData(dh)
    assert_frame_equal(dh.dataframe, dh2.dataframe)
Ejemplo n.º 6
0
def test_predict_formula(data, model_and_func, formula):
    model, func = model_and_func
    mod = model.from_formula(formula, data)
    res = mod.fit()
    exog = data[['Intercept', 'x3', 'x4', 'x5']]
    endog = data[['x1', 'x2']]
    pred = res.predict(exog, endog)
    pred2 = res.predict(data=data)
    assert_frame_equal(pred, pred2)
    assert_allclose(res.fitted_values, pred)
Ejemplo n.º 7
0
 def test_numpy_1d(self):
     x = np.empty(10)
     xdh = IVData(x)
     assert xdh.ndim == 2
     assert xdh.cols == ['x']
     assert xdh.rows == list(np.arange(10))
     assert_equal(xdh.ndarray, x[:, None])
     df = pd.DataFrame(x[:, None], columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 1)
Ejemplo n.º 8
0
 def test_string_cat_equiv(self):
     s1 = pd.Series(['a', 'b', 'a', 'b', 'c', 'd', 'a', 'b'])
     s2 = pd.Series(np.arange(8.0))
     s3 = pd.Series(['apple', 'banana', 'apple', 'banana',
                     'cherry', 'date', 'apple', 'banana'])
     df = pd.DataFrame({'string': s1, 'number': s2, 'other_string': s3})
     dh = IVData(df)
     df_cat = df.copy()
     df_cat['string'] = df_cat['string'].astype('category')
     dh_cat = IVData(df_cat)
     assert_frame_equal(dh.pandas, dh_cat.pandas)
Ejemplo n.º 9
0
 def test_numpy_2d(self):
     x = np.empty((10, 2))
     xdh = IVData(x)
     assert xdh.ndim == x.ndim
     assert xdh.cols == ['x.0', 'x.1']
     assert xdh.rows == list(np.arange(10))
     assert_equal(xdh.ndarray, x)
     df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 2)
     assert xdh.labels == {0: xdh.rows, 1: xdh.cols}
Ejemplo n.º 10
0
 def test_categorical_no_conversion(self):
     index = pd.date_range('2017-01-01', periods=10)
     cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
     s = pd.Series({'cat': cat}, index=index, name='cat')
     dh = IVData(s, convert_dummies=False)
     assert dh.ndim == 2
     assert dh.shape == (10, 1)
     assert dh.cols == ['cat']
     assert dh.rows == list(index)
     df = pd.DataFrame(s)
     assert_frame_equal(dh.pandas, df)
Ejemplo n.º 11
0
 def test_existing_datahandler(self):
     x = np.empty((10, 2))
     index = pd.date_range('2017-01-01', periods=10)
     xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
     xdh = IVData(xdf)
     xdh2 = IVData(xdh)
     assert xdh is not xdh2
     assert xdh.cols == xdh2.cols
     assert xdh.rows == xdh2.rows
     assert_equal(xdh.ndarray, xdh2.ndarray)
     assert xdh.ndim == xdh2.ndim
     assert_frame_equal(xdh.pandas, xdh2.pandas)
Ejemplo n.º 12
0
 def test_pandas_df_numeric(self):
     x = np.empty((10, 2))
     index = pd.date_range('2017-01-01', periods=10)
     xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
     xdh = IVData(xdf)
     assert xdh.ndim == 2
     assert xdh.cols == list(xdf.columns)
     assert xdh.rows == list(xdf.index)
     assert_equal(xdh.ndarray, x)
     df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 2)
Ejemplo n.º 13
0
 def test_pandas_series_numeric(self):
     x = np.empty(10)
     index = pd.date_range('2017-01-01', periods=10)
     xs = pd.Series(x, name='charlie', index=index)
     xdh = IVData(xs)
     assert xdh.ndim == 2
     assert xdh.cols == [xs.name]
     assert xdh.rows == list(xs.index)
     assert_equal(xdh.ndarray, x[:, None])
     df = pd.DataFrame(x[:, None], columns=xdh.cols, index=xdh.rows)
     assert_frame_equal(xdh.pandas, df)
     assert xdh.shape == (10, 1)
Ejemplo n.º 14
0
def test_predict(config):
    fmla, model, interface = config
    for key in fmla:
        if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
            return
    mod = model.from_formula(fmla, joined)
    res = mod.fit()
    pred = res.predict(data=joined)
    assert isinstance(pred, dict)
    pred2 = res.predict(data=joined, dataframe=True)
    pred3 = res.predict(equations=mod._equations, dataframe=True)
    assert_frame_equal(pred2, pred3)
Ejemplo n.º 15
0
def test_mean_missing(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_mean = xpd.mean('entity')
    expected = xpd.dataframe.groupby(level=0).mean()
    expected = expected.loc[xpd.entities]
    expected.columns.name = None
    assert_frame_equal(entity_mean, expected)

    time_mean = xpd.mean('time')
    expected = xpd.dataframe.groupby(level=1).mean()
    expected = expected.loc[xpd.time]
    expected.columns.name = None
    assert_frame_equal(time_mean, expected)
Ejemplo n.º 16
0
def test_original_unmodified(data):
    pre_y = data.y.copy()
    pre_x = data.x.copy()
    pre_w = data.w.copy()
    mod = PanelOLS(data.y, data.x, weights=data.w)
    mod.fit(debiased=True)
    if isinstance(data.y, (pd.DataFrame, pd.Panel)):
        for after, before in ((data.y, pre_y), (data.x, pre_x), (data.w, pre_w)):
            if isinstance(before, pd.DataFrame):
                assert_frame_equal(before, after)
            else:
                assert_panel_equal(before, after)

        mi_df_y = PanelData(data.y).dataframe
        mi_df_x = PanelData(data.x).dataframe
        mi_df_y.index.names = ['firm', 'period']
        mi_df_x.index.names = ['firm', 'period']
        mi_df_w = PanelData(data.w).dataframe
        pre_y = mi_df_y.copy()
        pre_x = mi_df_x.copy()
        pre_w = mi_df_w.copy()
        mod = PanelOLS(mi_df_y, mi_df_x, weights=mi_df_w)
        mod.fit(debiased=True)
        assert_frame_equal(mi_df_w, pre_w)
        assert_frame_equal(mi_df_y, pre_y)
        assert_frame_equal(mi_df_x, pre_x)
    elif isinstance(data.y, xr.DataArray):
        xr.testing.assert_identical(data.y, pre_y)
        xr.testing.assert_identical(data.w, pre_w)
        xr.testing.assert_identical(data.x, pre_x)
    else:
        assert_allclose(data.y, pre_y)
        assert_allclose(data.x, pre_x)
        assert_allclose(data.w, pre_w)
Ejemplo n.º 17
0
def test_count(data):
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_mean = xpd.count('entity')
    expected = xpd.dataframe.groupby(level=0).count()
    expected = expected.loc[xpd.entities]
    expected.columns.name = None
    expected = expected.astype(np.int64)
    assert_frame_equal(entity_mean, expected)

    time_mean = xpd.count('time')
    expected = xpd.dataframe.groupby(level=1).count()
    expected = expected.loc[xpd.time]
    expected.columns.name = None
    expected = expected.astype(np.int64)
    assert_frame_equal(time_mean, expected)
Ejemplo n.º 18
0
def test_fitted(data):
    mod = IVSystemGMM(data.eqns)
    res = mod.fit()
    expected = []
    for i, key in enumerate(res.equations):
        eq = res.equations[key]
        fv = res.fitted_values[key].copy()
        fv.name = 'fitted_values'
        assert_series_equal(eq.fitted_values, fv)
        b = eq.params.values
        direct = mod._x[i] @ b
        expected.append(direct[:, None])
        assert_allclose(eq.fitted_values, direct, atol=1e-8)
    expected = np.concatenate(expected, 1)
    expected = pd.DataFrame(expected, index=mod._dependent[i].pandas.index,
                            columns=[key for key in res.equations])
    assert_frame_equal(expected, res.fitted_values)
Ejemplo n.º 19
0
def test_demean_missing_alt_types(data):
    check = isinstance(data.x, (pd.DataFrame, np.ndarray))
    xpd = PanelData(data.x)
    xpd.drop(xpd.isnull)
    entity_demean = xpd.demean('entity')
    expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
    assert_frame_equal(entity_demean.dataframe,
                       expected,
                       check_index_type=check,
                       check_column_type=check)

    time_demean = xpd.demean('time')
    expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
    assert_frame_equal(time_demean.dataframe,
                       expected,
                       check_index_type=check,
                       check_column_type=check)
Ejemplo n.º 20
0
def test_numpy_3d():
    n, t, k = 11, 7, 3
    x = np.random.random((k, t, n))
    dh = PanelData(x)
    assert_equal(x, dh.values3d)
    assert dh.nentity == n
    assert dh.nobs == t
    assert dh.nvar == k
    assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
    items = ['entity.{0}'.format(i) for i in range(n)]
    obs = [i for i in range(t)]
    vars = ['x.{0}'.format(i) for i in range(k)]
    expected = pd.Panel(np.reshape(x, (k, t, n)), items=vars,
                        major_axis=obs, minor_axis=items)
    expected_frame = expected.swapaxes(1, 2).to_frame()
    expected_frame.index.levels[0].name = 'entity'
    expected_frame.index.levels[1].name = 'time'
    assert_frame_equal(dh.dataframe, expected_frame)
Ejemplo n.º 21
0
    def test_xarray_1d(self):
        x_np = np.random.randn(10)
        x = xr.DataArray(x_np)
        dh = IVData(x, 'some_variable')
        assert_equal(dh.ndarray, x_np[:, None])
        assert dh.rows == list(np.arange(10))
        assert dh.cols == ['some_variable.0']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)

        index = pd.date_range('2017-01-01', periods=10)
        x = xr.DataArray(x_np, [('time', index)])
        dh = IVData(x, 'some_variable')
        assert_equal(dh.ndarray, x_np[:, None])
        assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
        assert dh.cols == ['some_variable.0']
        expected = pd.DataFrame(x_np[:, None], columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)
Ejemplo n.º 22
0
def test_pandas_panel():
    n, t, k = 11, 7, 3
    x = np.random.random((k, t, n))
    major = pd.date_range('12-31-1999', periods=7)
    items = ['var.{0}'.format(i) for i in range(1, k + 1)]
    minor = ['entities.{0}'.format(i) for i in range(1, n + 1)]
    x = pd.Panel(x, items=items, major_axis=major, minor_axis=minor)
    dh = PanelData(x)
    assert dh.nentity == n
    assert dh.nobs == t
    assert dh.nvar == k
    assert_equal(dh.values3d, x.values)
    expected = np.reshape(x.swapaxes(0, 2).values, (n * t, k))
    assert_equal(dh.values2d, expected)
    expected_frame = x.swapaxes(1, 2).to_frame()
    expected_frame.index.levels[0].name = 'entity'
    expected_frame.index.levels[1].name = 'time'
    assert_frame_equal(dh.dataframe, expected_frame)
Ejemplo n.º 23
0
    def test_xarray_2d(self):
        x_np = np.random.randn(10, 2)
        x = xr.DataArray(x_np)
        dh = IVData(x)
        assert_equal(dh.ndarray, x_np)
        assert dh.rows == list(np.arange(10))
        assert dh.cols == ['x.0', 'x.1']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)

        index = pd.date_range('2017-01-01', periods=10)
        x = xr.DataArray(x_np, [('time', index),
                                ('variables', ['apple', 'banana'])])
        dh = IVData(x)
        assert_equal(dh.ndarray, x_np)
        assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
        assert dh.cols == ['apple', 'banana']
        expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
        assert_frame_equal(expected, dh.pandas)
Ejemplo n.º 24
0
def test_predict_partial(config):
    fmla, model, interface = config
    for key in fmla:
        if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
            return
    mod = model.from_formula(fmla, joined)
    res = mod.fit()
    eqns = AttrDict()
    for key in list(mod._equations.keys())[1:]:
        eqns[key] = mod._equations[key]
    pred = res.predict(equations=eqns, dataframe=True)
    for key in mod._equations:
        if key in eqns:
            assert key in pred
        else:
            assert key not in pred
    pred2 = res.predict(data=joined, dataframe=True)
    assert_frame_equal(pred2[pred.columns], pred)

    eqns = AttrDict()
    for key in list(mod._equations.keys())[1:]:
        eqns[key] = mod._equations[key]
    final = list(mod._equations.keys())[0]
    eqns[final] = {'exog': None, 'endog': None}
    pred3 = res.predict(equations=eqns, dataframe=True)
    assert_frame_equal(pred2[pred3.columns], pred3)

    eqns = AttrDict()
    for key in mod._equations:
        eqns[key] = {
            k: v
            for k, v in mod._equations[key].items() if v.shape[1] > 0
        }
    pred4 = res.predict(equations=eqns, dataframe=True)
    assert_frame_equal(pred2, pred4)
Ejemplo n.º 25
0
def test_predict(missing_data):
    mod = SUR(missing_data)
    res = mod.fit()
    pred = res.predict()
    for key in pred:
        assert_series_equal(pred[key].iloc[:, 0],
                            res.equations[key].fitted_values,
                            check_names=False)
    pred = res.predict(fitted=False, idiosyncratic=True)
    for key in pred:
        assert_series_equal(pred[key].iloc[:, 0],
                            res.equations[key].resids,
                            check_names=False)
    pred = res.predict(fitted=True, idiosyncratic=True)
    assert isinstance(pred, dict)
    for key in res.equations:
        assert key in pred

    pred = res.predict(dataframe=True)
    assert isinstance(pred, DataFrame)
    assert_frame_equal(pred, res.fitted_values)
    pred = res.predict(fitted=False, idiosyncratic=True, dataframe=True)
    assert isinstance(pred, DataFrame)
    assert_frame_equal(pred, res.resids)
    pred = res.predict(fitted=True, idiosyncratic=True, dataframe=True)
    assert isinstance(pred, dict)
    assert 'fitted_values' in pred
    assert_frame_equal(pred['fitted_values'], res.fitted_values)
    assert 'idiosyncratic' in pred
    assert_frame_equal(pred['idiosyncratic'], res.resids)

    nobs = missing_data[list(missing_data.keys())[0]]['dependent'].shape[0]
    pred = res.predict(fitted=True,
                       idiosyncratic=False,
                       dataframe=True,
                       missing=True)
    assert pred.shape[0] == nobs

    pred = res.predict(fitted=True, idiosyncratic=True, missing=True)
    for key in pred:
        assert pred[key].shape[0] == nobs
Ejemplo n.º 26
0
def test_predict_formula_function(data, model_and_func):
    model, func = model_and_func
    fmla = 'y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)'
    mod = model.from_formula(fmla, data)
    res = mod.fit()

    exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
            np.exp(data[['x5']])]
    exog = pd.concat(exog, 1)
    endog = data[['x1', 'x2']]
    pred = res.predict(exog, endog)
    pred2 = res.predict(data=data)
    assert_frame_equal(pred, pred2)
    assert_allclose(res.fitted_values, pred)

    res2 = func(fmla, data).fit()
    pred3 = res2.predict(exog, endog)
    pred4 = res2.predict(data=data)
    assert_frame_equal(pred, pred3)
    assert_frame_equal(pred, pred4)
Ejemplo n.º 27
0
def assert_frame_similar(result, expected):
    r = result.copy()
    r.iloc[:, :] = 0.0
    e = expected.copy()
    e.iloc[:, :] = 0.0
    assert_frame_equal(r, e)
Ejemplo n.º 28
0
def test_series_multiindex(mi_df):
    from_df = PanelData(mi_df.iloc[:, [0]])
    from_series = PanelData(mi_df.iloc[:, 0])
    assert_frame_equal(from_df.dataframe, from_series.dataframe)
Ejemplo n.º 29
0
def test_non_traded_risk_free(data, non_traded_model):
    mod1 = non_traded_model.from_formula(FORMULA, data.joined, risk_free=True)
    mod2 = non_traded_model(data.portfolios, data.factors, risk_free=True)
    res1 = mod1.fit()
    res2 = mod2.fit()
    assert_frame_equal(mod1.factors.pandas, mod2.factors.pandas)
    assert_frame_equal(mod1.portfolios.pandas, mod2.portfolios.pandas)
    assert_frame_equal(res1.params, res2.params)
    assert mod1.formula == FORMULA
    assert mod2.formula is None

    mod1 = non_traded_model.from_formula(FORMULA_FACTORS,
                                         data.joined,
                                         portfolios=data.portfolios,
                                         risk_free=True)
    mod2 = non_traded_model(data.portfolios, data.factors, risk_free=True)
    res1 = mod1.fit()
    res2 = mod2.fit()
    assert_frame_equal(mod1.factors.pandas, mod2.factors.pandas)
    assert_frame_equal(mod1.portfolios.pandas, mod2.portfolios.pandas)
    assert_frame_equal(res1.params, res2.params)
    assert mod1.formula == FORMULA_FACTORS
    assert mod2.formula is None
Ejemplo n.º 30
0
def test_series_multiindex(panel):
    mi = panel.swapaxes(1, 2).to_frame(filter_observations=False)
    from_df = PanelData(mi.iloc[:, [0]])
    from_series = PanelData(mi.iloc[:, 0])
    assert_frame_equal(from_df.dataframe, from_series.dataframe)