def load( file: str, file_types: list, data_files: Dict[Any, Any], data_examples: Dict[Any, Any], ) -> pd.DataFrame: """Load custom file into dataframe. Parameters ---------- file: str Path to file file_types: list Supported file types data_files: dict Contains all available data files within the Export folder data_examples: dict Contains all available examples from Statsmodels Returns ------- pd.DataFrame: Dataframe with custom data """ if file in data_examples: if file == "wage_panel": return wage_panel.load() return eval(f"sm.datasets.{file}.load_pandas().data") if file in data_files: file = data_files[file] if not Path(file).exists(): console.print(f"[red]Can not find the file {file}[/red]\n") return pd.DataFrame() file_type = Path(file).suffix if file_type == ".xlsx": data = pd.read_excel(file) elif file_type == ".csv": data = pd.read_csv(file) else: return console.print( f"The file type {file_type} is not supported. Please choose one of the following: " f"{', '.join(file_types)}") return data
import numpy as np from numpy.testing import assert_allclose from pandas.testing import assert_series_equal import pytest from statsmodels.tools.tools import add_constant from linearmodels.datasets import wage_panel from linearmodels.iv.model import IV2SLS from linearmodels.panel.data import PanelData from linearmodels.panel.model import PanelOLS, PooledOLS, RandomEffects from linearmodels.panel.results import compare from linearmodels.tests.panel._utility import datatypes, generate_data @pytest.fixture(params=[wage_panel.load()]) def data(request): return request.param missing = [0.0, 0.02, 0.20] has_const = [True, False] perms = list(product(missing, datatypes, has_const)) ids = list(map(lambda s: "-".join(map(str, s)), perms)) @pytest.fixture(params=perms, ids=ids) def generated_data(request): missing, datatype, const = request.param return generate_data(missing, datatype,
import numpy as np import linearmodels as lm lm.WARN_ON_MISSING = False from linearmodels import utility utility.missing_warning(np.array([True, True, False])) from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS from linearmodels.datasets import wage_panel import statsmodels.api as sm data = wage_panel.load() data = data.set_index(['nr','year']) dependent = data.lwage exog = sm.add_constant(data[['expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True) res = mod.fit(cov_type='unadjusted') res2 = mod.fit(cov_type='robust') exog = sm.add_constant(data[['exper', 'expersq','married','union']]) mod = PanelOLS(dependent, exog, entity_effects=True) res3 = mod.fit(cov_type='clustered',cluster_entity=True) mod = RandomEffects(dependent, exog) res4 = mod.fit(cov_type='robust') from linearmodels.panel.results import compare exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy()) import pandas as pd exog['year'] = pd.Categorical(data.reset_index()['year']) mod = PooledOLS(dependent, exog) res5 = mod.fit(cov_type='robust') print(compare([res,res2, res3, res4, res5])) print(data.columns)