def load(
    file: str,
    file_types: list,
    data_files: Dict[Any, Any],
    data_examples: Dict[Any, Any],
) -> pd.DataFrame:
    """Load custom file into dataframe.

    Parameters
    ----------
    file: str
        Path to file
    file_types: list
        Supported file types
    data_files: dict
        Contains all available data files within the Export folder
    data_examples: dict
        Contains all available examples from Statsmodels

    Returns
    -------
    pd.DataFrame:
        Dataframe with custom data
    """
    if file in data_examples:
        if file == "wage_panel":
            return wage_panel.load()
        return eval(f"sm.datasets.{file}.load_pandas().data")

    if file in data_files:
        file = data_files[file]

    if not Path(file).exists():
        console.print(f"[red]Can not find the file {file}[/red]\n")
        return pd.DataFrame()

    file_type = Path(file).suffix

    if file_type == ".xlsx":
        data = pd.read_excel(file)
    elif file_type == ".csv":
        data = pd.read_csv(file)
    else:
        return console.print(
            f"The file type {file_type} is not supported. Please choose one of the following: "
            f"{', '.join(file_types)}")

    return data
Ejemplo n.º 2
0
import numpy as np
from numpy.testing import assert_allclose
from pandas.testing import assert_series_equal
import pytest
from statsmodels.tools.tools import add_constant

from linearmodels.datasets import wage_panel
from linearmodels.iv.model import IV2SLS
from linearmodels.panel.data import PanelData
from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects
from linearmodels.panel.results import compare
from linearmodels.tests.panel._utility import datatypes, generate_data


@pytest.fixture(params=[wage_panel.load()])
def data(request):
    return request.param


missing = [0.0, 0.02, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
ids = list(map(lambda s: "-".join(map(str, s)), perms))


@pytest.fixture(params=perms, ids=ids)
def generated_data(request):
    missing, datatype, const = request.param
    return generate_data(missing,
                         datatype,
Ejemplo n.º 3
0
import numpy as np
import linearmodels as lm
lm.WARN_ON_MISSING = False
from linearmodels import utility
utility.missing_warning(np.array([True, True, False]))

from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
data = wage_panel.load()
data = data.set_index(['nr','year'])
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))

print(data.columns)