def test_get_update() -> None: dfs: List[pd.DataFrame] = get_update(_date.today().nfl_year) try: not_empty: List[bool] = [len(df.index) > 0 for df in dfs.values()] except AttributeError: not_empty: List[bool] = [False] urls: List[str] = [key for key in _config.sections()] assert all(not_empty) and len(dfs) == len(urls) - 1
def test_config() -> None: assert isinstance(_config, ConfigParser) and _config.sections() == [ 'offense', 'kicking', 'op_defense', 'rb_defense', 'te_defense', 'qb_defense', 'wr_defense', 'coaches', 'schedule', 'test']
def test_update() -> None: year: int = _date.today().nfl_year - 1 db.update(year) assert all([t in db.views for t in _config.sections() if t != section_name])
# %%% User-Defined from nfetl import DB from nfetl.core import _config from nfetl._datetime import _date # %% Variables db: DB = DB(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'NFL.db'), False) source: pd.DataFrame = pd.read_hdf( _config['DEFAULT']['test_url_data'], 'test_url_data') clean_source: pd.DataFrame = pd.read_hdf( _config['DEFAULT']['test_url_data'], 'test_clean_data') section_name: str = [i for i in _config.sections() if 'test' in i][0] source_table_name: str = section_name + '_2019' extracted_data: Dict[str, pd.DataFrame] = {source_table_name: source} archive: pd.DataFrame = source.copy() archive.insert(len(archive.columns), 'DML_Type', 'I') arc_data: Dict[str, pd.DataFrame] = {f'{source_table_name}': archive} # %% Functions # %%% Private def _truncate_table(prefix: str, dfs: Dict[str, pd.DataFrame] = extracted_data) -> None: for table in extracted_data.keys(): if pd.read_sql( f"SELECT COUNT(1) FROM sqlite_master WHERE name = '{prefix}{table}'", db.connection).iat[0, 0] > 0:
from dask import distributed # %%% User Defined from nfetl._datetime import _date from nfetl.core import _config # %% Variables # %%% System __all__ = ['get_url_data', 'get_update'] # %%% Private _client: object = distributed.Client(processes=False) _default_sets: List[Tuple[str, str, str]] = [ (key, _config[key]['url'], _config[key]['extract_columns']) for key in _config.sections() if key != 'test' ] _default_start_year: int = int(_config['DEFAULT']['start_year']) # %% Functions def get_url_data(url: str, headers: List[str] = []) -> pd.DataFrame: """ Retrieve data table from URL. Parameters ---------- url : str URL for HTML page where data table is found. headers : List[str], optional Headers for retrieved table. Default is source headers.