def _load(data_loc: Union[str, PathLike, Connection, Engine], multiindex=True, table_name: Optional[str] = None): """Load existing data from CSV or SQL.""" try: if isinstance(data_loc, (Engine, Connection)): if multiindex is True: previous_data = sqlutil.read(con=data_loc, table_name=table_name) else: previous_data = pd.read_sql(sql=table_name, con=data_loc, index_col="index", parse_dates="index") else: if multiindex is True: previous_data = pd.read_csv(data_loc, index_col=0, parse_dates=True, header=list(range(9)), float_precision="high") metadata._set(previous_data) else: previous_data = pd.read_csv(data_loc, index_col=0, parse_dates=True, float_precision="high") except (ProgrammingError, OperationalError, FileNotFoundError): print("Data does not exist. No data will be updated") previous_data = pd.DataFrame() return previous_data
def terms_of_trade(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "tfm_tot", index_label: str = "index", only_get: bool = True) -> pd.DataFrame: """ Get terms of trade. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_tot' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc`` for the commodity index. Returns ------- Terms of trade (exports/imports) : pd.DataFrame """ data = trade.get(update_loc=update_loc, save_loc=save_loc, only_get=only_get) exports = data["tb_x_dest_pri"].rename( columns={"Total exportaciones": "Total"}) imports = data["tb_m_orig_pri"].rename( columns={"Total importaciones": "Total"}) tot = exports / imports tot = tot.loc[:, ["Total"]] tot.rename(columns={"Total": "Términos de intercambio"}, inplace=True) tot = transform.base_index(tot, start_date="2005-01-01", end_date="2005-12-31") metadata._set(tot, ts_type="-") if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=tot, name=name, index_label=index_label) return tot
def net_public_debt(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """ Get net public debt excluding deposits at the central bank. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Net public debt excl. deposits at the central bank : pd.DataFrame """ name = "net_public_debt" data = _public_debt_retriever(update_loc=update_loc, save_loc=save_loc, only_get=only_get) gross_debt = data["gps"].loc[:, ["Total deuda"]] assets = data["assets"].loc[:, ["Total activos"]] gross_debt.columns = ["Deuda neta del sector" " público global excl. encajes"] assets.columns = gross_debt.columns deposits = econuy.retrieval.external_sector.reserves( update_loc=update_loc, save_loc=save_loc, only_get=only_get).loc[:, ["Obligaciones en ME con el sector financiero"]] deposits = (transform.resample(deposits, rule="Q-DEC", operation="last") .reindex(gross_debt.index).squeeze()) output = gross_debt.add(assets).add(deposits, axis=0).dropna() metadata._set(output, area="Sector público", currency="USD", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Stock", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def real_wages(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """ Get real wages. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Real wages data : pd.DataFrame """ name = "real_wages" wages = nominal_wages(update_loc=update_loc, only_get=only_get) wages.columns = [ "Índice medio de salarios reales", "Índice medio de salarios reales privados", "Índice medio de salarios reales públicos" ] metadata._set(wages, area="Mercado laboral", currency="UYU", inf_adj="Sí", seas_adj="NSA", ts_type="-", cumperiods=1) output = transform.convert_real(wages, update_loc=update_loc, only_get=only_get) output = transform.rebase(output, start_date="2008-07-31") if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def dummy_df(freq, periods=200, area="Test", currency="Test", inf_adj="Test", unit="Test", seas_adj="Test", ts_type="Test", cumperiods=1): dates = pd.date_range("2000-01-31", periods=periods, freq=freq) cols = ["A", "B", "C"] data = np.hstack([np.random.uniform(-100, 100, [periods, 1]), np.random.uniform(1, 50, [periods, 1]), np.random.uniform(-100, -50, [periods, 1])]) output = pd.DataFrame(index=dates, columns=cols, data=data) metadata._set(output, area=area, currency=currency, inf_adj=inf_adj, unit=unit, seas_adj=seas_adj, ts_type=ts_type, cumperiods=cumperiods) return output
def _balance_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> Dict[str, pd.DataFrame]: """Helper function. See any of the `balance_...()` functions.""" if only_get is True and update_loc is not None: output = {} for dataset in fiscal_sheets.keys(): data = ops._io( operation="update", data_loc=update_loc, name=f"balance_{dataset}") output.update({dataset: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output response = requests.get(urls["balance_gps"]["dl"]["main"]) soup = BeautifulSoup(response.content, "html.parser") links = soup.find_all(href=re.compile("\\.xlsx$")) link = links[0]["href"] xls = pd.ExcelFile(link) output = {} for dataset, meta in fiscal_sheets.items(): data = (pd.read_excel(xls, sheet_name=meta["sheet"]). dropna(axis=0, thresh=4).dropna(axis=1, thresh=4). transpose().set_index(2, drop=True)) data.columns = data.iloc[0] data = data[data.index.notnull()].rename_axis(None) data.index = data.index + MonthEnd(1) data.columns = meta["colnames"] data = data.apply(pd.to_numeric, errors="coerce") metadata._set( data, area="Sector público", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1 ) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"balance_{dataset}") data = ops._revise(new_data=data, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=data, name=f"balance_{dataset}") output.update({dataset: data}) return output
def test_reserves_changes(): remove_clutter() session = Session(location=TEST_CON) previous_data = pd.read_csv(path.join(TEST_DIR, "reserves_changes.csv"), index_col=0, header=list(range(9)), float_precision="high") metadata._set(previous_data) res = session.get(dataset="reserves_changes").dataset previous_data.index = pd.to_datetime(previous_data.index) compare = res.loc[previous_data.index].round(3) compare.columns = previous_data.columns assert compare.equals(previous_data.round(3)) session.only_get = True compare = session.get(dataset="reserves_changes").dataset assert res.round(3).equals(compare.round(3)) remove_clutter()
def test_nxr_daily(): remove_clutter() previous_data = pd.read_csv(path.join(TEST_DIR, "nxr_daily.csv"), index_col=0, header=list(range(9))) metadata._set(previous_data) previous_data.index = pd.to_datetime(previous_data.index) session = Session(location=TEST_DIR) nxr = session.get(dataset="nxr_daily").dataset compare = nxr.loc[previous_data.index].round(4) compare.columns = previous_data.columns assert compare.equals(previous_data.round(4)) session.only_get = True compare = session.get(dataset="nxr_daily").dataset assert compare.round(4).equals(nxr.round(4)) session.only_get = False remove_clutter()
def test_fiscal(): remove_clutter() session = Session(location=TEST_CON) assert isinstance(session, Session) assert isinstance(session.dataset, pd.DataFrame) fiscal_tfm = session.get_frequent(dataset="fiscal", aggregation="nfps", fss=True, unit="gdp").dataset remove_clutter() fiscal_ = session.get(dataset="fiscal").dataset nfps = fiscal_["nfps"] gc = fiscal_["gc-bps"] proc = pd.DataFrame(index=nfps.index) proc["Ingresos: SPNF-SPC"] = nfps["Ingresos: SPNF"] proc["Egresos: Primarios SPNF-SPC"] = nfps["Egresos: Primarios SPNF"] proc["Egresos: Inversiones SPNF-SPC"] = nfps["Egresos: Inversiones"] proc["Intereses: SPNF"] = nfps["Intereses: Totales"] proc["Egresos: Totales SPNF"] = (proc["Egresos: Primarios SPNF-SPC"] + proc["Intereses: SPNF"]) proc["Resultado: Primario intendencias"] = nfps[ "Resultado: Primario intendencias"] proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"] proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"] proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"] proc["Ingresos: FSS"] = gc["Ingresos: FSS"] proc["Intereses: FSS"] = gc["Intereses: BPS-FSS"] proc["Ingresos: SPNF-SPC aj. FSS"] = (proc["Ingresos: SPNF-SPC"] - proc["Ingresos: FSS"]) proc["Intereses: SPNF aj. FSS"] = (proc["Intereses: SPNF"] - proc["Intereses: FSS"]) proc["Egresos: Totales SPNF aj. FSS"] = (proc["Egresos: Totales SPNF"] - proc["Intereses: FSS"]) proc["Resultado: Primario SPNF aj. FSS"] = ( proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS"]) proc["Resultado: Global SPNF aj. FSS"] = (proc["Resultado: Global SPNF"] - proc["Ingresos: FSS"] + proc["Intereses: FSS"]) cols = fiscal_metadata["nfps"][True] compare = proc.loc[:, cols] metadata._set(compare, area="Cuentas fiscales y deuda", currency="UYU", inf_adj="No", unit="No", seas_adj="NSA", ts_type="Flujo", cumperiods=1) compare_gdp = transform.rolling(compare, periods=12, operation="sum") compare_gdp = transform.convert_gdp(compare_gdp) compare_gdp.columns = fiscal_tfm.columns assert compare_gdp.equals(fiscal_tfm) remove_clutter() fiscal_tfm = session.get_frequent(dataset="fiscal", aggregation="nfps", fss=True, unit="usd").dataset compare_usd = transform.convert_usd(compare) compare_usd.columns = fiscal_tfm.columns assert compare_usd.equals(fiscal_tfm) remove_clutter() fiscal_tfm = session.get_frequent(dataset="fiscal", aggregation="nfps", fss=True, unit="real").dataset compare_real = transform.convert_real(compare) compare_real.columns = fiscal_tfm.columns assert compare_real.equals(fiscal_tfm) remove_clutter() start_date = "2010-01-31" end_date = "2010-12-31" fiscal_tfm = session.get_frequent(dataset="fiscal", aggregation="nfps", fss=True, unit="real_usd", start_date=start_date, end_date=end_date).dataset compare_real_usd = transform.convert_real(compare, start_date=start_date, end_date=end_date) xr = nxr.get_monthly(update_loc=None, save_loc=None) compare_real_usd = compare_real_usd.divide( xr[start_date:end_date].mean()[1]) compare_real_usd.columns = fiscal_tfm.columns assert compare_real_usd.equals(fiscal_tfm) remove_clutter() with pytest.raises(ValueError): session.get_frequent(dataset="fiscal", aggregation="nfps", unit="wrong") with pytest.raises(ValueError): session.get_frequent(dataset="fiscal", aggregation="wrong") remove_clutter() fiscal_ = session.get(dataset="fiscal").dataset session.only_get = True compare = session.get(dataset="fiscal").dataset for v, v2 in zip(fiscal_.values(), compare.values()): assert v.round(4).equals(v2.round(4)) remove_clutter() session.only_get = False
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "cpi", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get CPI data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'cpi' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI index : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output cpi_raw = pd.read_excel(urls["cpi"]["dl"]["main"], skiprows=7).dropna(axis=0, thresh=2) cpi = (cpi_raw.drop( ["Mensual", "Acum.año", "Acum.12 meses"], axis=1).dropna(axis=0, how="all").set_index("Mes y año").rename_axis(None)) cpi.columns = ["Índice de precios al consumo"] cpi.index = cpi.index + MonthEnd(1) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) cpi = ops._revise(new_data=cpi, prev_data=previous_data, revise_rows=revise_rows) cpi = cpi.apply(pd.to_numeric, errors="coerce") metadata._set(cpi, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-10=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=cpi, name=name, index_label=index_label) return cpi
def balance_summary(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """ Get the summary fiscal balance table found in the `Budget Law <https://www.gub.uy/contaduria-general-nacion/sites/ contaduria-general-nacion/files/2020-09/ Mensaje%20y%20Exposici%C3%B3n%20de%20motivos.pdf>`_. Includes adjustments for the `Social Security Fund <https://www.impo.com.uy/bases/decretos/ 71-2018/25>`_. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Summary fiscal balance table : pd.DataFrame """ name = "balance_summary" data = _balance_retriever(update_loc=update_loc, save_loc=save_loc, only_get=only_get) gps = data["gps"] nfps = data["nfps"] gc = data["cg-bps"] pe = data["pe"] proc = pd.DataFrame(index=gps.index) proc["Ingresos: GC-BPS"] = gc["Ingresos: GC-BPS"] proc["Ingresos: GC-BPS ex. FSS"] = (gc["Ingresos: GC-BPS"] - gc["Ingresos: FSS - Cincuentones"]) proc["Ingresos: GC"] = gc["Ingresos: GC"] proc["Ingresos: DGI"] = gc["Ingresos: DGI"] proc["Ingresos: Comercio ext."] = gc["Ingresos: Comercio ext."] proc["Ingresos: Otros"] = (gc["Ingresos: GC"] - gc["Ingresos: DGI"] - gc["Ingresos: Comercio ext."]) proc["Ingresos: BPS"] = gc["Ingresos: BPS neto"] proc["Ingresos: FSS - Cincuentones"] = gc["Ingresos: FSS - Cincuentones"] proc["Ingresos: BPS ex FSS"] = (gc["Ingresos: BPS neto"] - gc["Ingresos: FSS - Cincuentones"]) proc["Egresos: Primarios GC-BPS"] = (gc["Egresos: GC-BPS"] - gc["Intereses: Total"]) proc["Egresos: Primarios corrientes GC-BPS"] = (proc["Egresos: Primarios GC-BPS"] - gc["Egresos: Inversión"].squeeze()) proc["Egresos: Remuneraciones"] = gc["Egresos: Remuneraciones"] proc["Egresos: No personales"] = gc["Egresos: No personales"] proc["Egresos: Pasividades"] = gc["Egresos: Pasividades"] proc["Egresos: Transferencias"] = gc["Egresos: Transferencias"] proc["Egresos: Inversión"] = gc["Egresos: Inversión"] proc["Resultado: Primario GC-BPS"] = (proc["Ingresos: GC-BPS"] - proc["Egresos: Primarios GC-BPS"]) proc["Resultado: Primario GC-BPS ex FSS"] = (proc["Ingresos: GC-BPS ex. FSS"] - proc["Egresos: Primarios GC-BPS"]) proc["Intereses: GC-BPS"] = gc["Intereses: Total"] proc["Intereses: FSS - Cincuentones"] = gc["Intereses: FSS - Cincuentones"] proc["Intereses: GC-BPS ex FSS"] = (proc["Intereses: GC-BPS"] - proc["Intereses: FSS - Cincuentones"]) proc["Resultado: Global GC-BPS"] = (proc["Resultado: Primario GC-BPS"] - proc["Intereses: GC-BPS"]) proc["Resultado: Global GC-BPS ex FSS"] = (proc["Resultado: Primario GC-BPS ex FSS"] - proc["Intereses: GC-BPS ex FSS"]) proc["Resultado: Primario corriente EEPP"] = nfps["Ingresos: Res. primario corriente EEPP"] proc["Egresos: Inversiones EEPP"] = pe["Egresos: Inversiones"] proc["Resultado: Primario EEPP"] = (proc["Resultado: Primario corriente EEPP"] - proc["Egresos: Inversiones EEPP"]) proc["Intereses: EEPP"] = pe["Intereses"] proc["Resultado: Global EEPP"] = (proc["Resultado: Primario EEPP"] - proc["Intereses: EEPP"]) proc["Resultado: Primario intendencias"] = nfps["Resultado: Primario intendencias"] proc["Intereses: Intendencias"] = nfps["Intereses: Intendencias"] proc["Resultado: Global intendencias"] = (proc["Resultado: Primario intendencias"] - proc["Intereses: Intendencias"]) proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"] proc["Intereses: BSE"] = nfps["Intereses: BSE"] proc["Resultado: Global BSE"] = (proc["Resultado: Primario BSE"] - proc["Intereses: BSE"]) proc["Resultado: Primario resto SPNF"] = (proc["Resultado: Primario EEPP"] + proc["Resultado: Primario intendencias"] + proc["Resultado: Primario BSE"]) proc["Intereses: Resto SPNF"] = (proc["Intereses: EEPP"] + proc["Intereses: Intendencias"] + proc["Intereses: BSE"]) proc["Resultado: Global resto SPNF"] = (proc["Resultado: Global EEPP"] + proc["Resultado: Global intendencias"] + proc["Resultado: Global BSE"]) proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"] proc["Resultado: Primario SPNF ex FSS"] = (proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS - Cincuentones"]) proc["Intereses: SPNF"] = nfps["Intereses: Totales"] proc["Intereses: SPNF ex FSS"] = (proc["Intereses: SPNF"] - proc["Intereses: FSS - Cincuentones"]) proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"] proc["Resultado: Global SPNF ex FSS"] = (proc["Resultado: Primario SPNF ex FSS"] - proc["Intereses: SPNF ex FSS"]) proc["Resultado: Primario BCU"] = gps["Resultado: Primario BCU"] proc["Intereses: BCU"] = gps["Intereses: BCU"] proc["Resultado: Global BCU"] = gps["Resultado: Global BCU"] proc["Resultado: Primario SPC"] = gps["Resultado: Primario SPC"] proc["Resultado: Primario SPC ex FSS"] = (proc["Resultado: Primario SPNF ex FSS"] + proc["Resultado: Primario BCU"]) proc["Intereses: SPC"] = proc["Intereses: SPNF"] + proc["Intereses: BCU"] proc["Intereses: SPC ex FSS"] = (proc["Intereses: SPNF ex FSS"] + proc["Intereses: BCU"]) proc["Resultado: Global SPC"] = (proc["Resultado: Global SPNF"] + proc["Resultado: Global BCU"]) proc["Resultado: Global SPC ex FSS"] = (proc["Resultado: Global SPNF ex FSS"] + proc["Resultado: Global BCU"]) output = proc metadata._set(output, area="Sector público", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def labor_rate_people(seas_adj: Union[str, None] = None, update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "tfm_labor", index_label: str = "index", only_get: bool = True) -> pd.DataFrame: """ Get labor data, both rates and persons. Allow choosing seasonal adjustment. Parameters ---------- seas_adj : {None, 'trend', 'seas'} Whether to seasonally adjust. update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_labor' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc`` for the commodity index. Returns ------- Labor market data : pd.DataFrame Raises ------ ValueError If ``seas_adj`` is given an invalid keyword. """ if seas_adj not in ["trend", "seas", None]: raise ValueError("'seas_adj' can be 'trend', 'seas' or None.") rates = labor.get_rates(update_loc=update_loc, only_get=only_get) rates = rates.loc[:, [ "Tasa de actividad: total", "Tasa de empleo: total", "Tasa de desempleo: total" ]] rates.columns.set_levels(rates.columns.levels[0].str.replace( ": total", ""), level=0, inplace=True) if seas_adj in ["trend", "seas"]: trend, seasadj = transform.decompose(rates, trading=True, outlier=True) if seas_adj == "trend": rates = trend elif seas_adj == "seas": rates = seasadj working_age = pd.read_excel(urls["tfm_labor"]["dl"]["population"], skiprows=7, index_col=0, nrows=92).dropna(how="all") ages = list(range(14, 90)) + ["90 y más"] working_age = working_age.loc[ages].sum() working_age.index = pd.date_range(start="1996-06-30", end="2050-06-30", freq="A-JUN") monthly_working_age = working_age.resample("M").interpolate("linear") monthly_working_age = monthly_working_age.loc[rates.index] persons = rates.iloc[:, [0, 1]].div(100).mul(monthly_working_age, axis=0) persons["Desempleados"] = rates.iloc[:, 2].div(100).mul(persons.iloc[:, 0]) persons.columns = ["Activos", "Empleados", "Desempleados"] seas_text = "NSA" if seas_adj == "trend": seas_text = "Trend" elif seas_adj == "seas": seas_text = "SA" metadata._set(persons, area="Mercado laboral", currency="-", inf_adj="No", unit="Personas", seas_adj=seas_text, ts_type="-", cumperiods=1) output = pd.concat([rates, persons], axis=1) name = f"{name}_{seas_text.lower()}" if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name, index_label=index_label) return output
def consumer_confidence( update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly consumer confidence data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly consumer confidence data : pd.DataFrame """ name = "consumer_confidence" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=3, usecols="B:F", index_col=0) output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = ["Subíndice: Situación Económica Personal", "Subíndice: Situación Económica del País", "Subíndice: Predisposición a la Compra de Durables", "Índice de Confianza del Consumidor"] output = output.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Actividad económica", currency="-", inf_adj="No", unit="50 = neutralidad", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def long_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get 10-year government bonds interest rates. Countries/aggregates selected are US, Germany, France, Italy, Spain United Kingdom, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily 10-year government bonds interest rates : pd.DataFrame """ name = "global_long_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output bonds = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key=" f"{fred_api_key}&file_type=json") us = pd.DataFrame.from_records(r.json()["observations"]) us = us[["date", "value"]].set_index("date") us.index = pd.to_datetime(us.index) us.columns = ["United States"] bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna()) for country, sid in zip([ "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan", "China" ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]): end_date_dt = dt.datetime(2000, 1, 1) start_date_dt = dt.datetime(2000, 1, 1) aux = [] while end_date_dt < dt.datetime.now(): end_date_dt = start_date_dt + dt.timedelta(days=5000) params = { "curr_id": sid, "smlID": str(randint(1000000, 99999999)), "header": f"{country} 10-Year Bond Yield Historical Data", "st_date": start_date_dt.strftime("%m/%d/%Y"), "end_date": end_date_dt.strftime("%m/%d/%Y"), "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } r = requests.post(urls["global_long_rates"]["dl"]["main"], headers=investing_headers, data=params) aux.append( pd.read_html(r.content, match="Price", index_col=0, parse_dates=True)[0]) start_date_dt = end_date_dt + dt.timedelta(days=1) aux = pd.concat(aux, axis=0)[["Price"]].sort_index() aux.columns = [country] bonds.append(aux) output = bonds[0].join(bonds[1:], how="left") output = output.interpolate(method="linear", limit_area="inside") output.columns = [ "Estados Unidos", "Alemania", "Francia", "Italia", "España", "Reino Unido", "Japón", "China" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3], new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def policy_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get central bank policy interest rates data. Countries/aggregates selected are US, Euro Area, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "global_policy_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output r = requests.get(urls[name]["dl"]["main"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv") raw = pd.read_csv(path_temp, usecols=[0, 7, 19, 36, 37], index_col=0, header=2, parse_dates=True).dropna(how="all") output = (raw.apply(pd.to_numeric, errors="coerce").interpolate(method="linear", limit_area="inside")) output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"] output = output[["Estados Unidos", "Eurozona", "Japón", "China"]] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def fiscal(aggregation: str = "gps", fss: bool = True, unit: Optional[str] = None, start_date: Union[str, date, None] = None, end_date: Union[str, date, None] = None, update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True, name: str = "tfm_fiscal", index_label: str = "index") -> pd.DataFrame: """ Get fiscal accounts data. Allow choosing government aggregation, whether to exclude the FSS (Fideicomiso de la Seguridad Social, Social Security Trust Fund), the unit (UYU, real UYU, USD, real USD or percent of GDP), periods to accumuldate for rolling sums and seasonal adjustment. Parameters ---------- aggregation : {'gps', 'nfps', 'gc'} Government aggregation. Can be ``gps`` (consolidated public sector), ``nfps`` (non-financial public sector) or ``gc`` (central government). fss : bool, default True If ``True``, exclude the `FSS's <https://www.impo.com.uy/bases/decretos /71-2018/25>`_ income from gov't revenues and the FSS's interest revenues from gov't interest payments. unit : {None, 'gdp', 'usd', 'real', 'real_usd'} Unit in which data should be expressed. Possible values are ``real``, ``usd``, ``real_usd`` and ``gdp``. If ``None`` or another string is set, no unit calculations will be performed, rendering the data as is (current UYU). start_date : str, datetime.date or None, default None If ``unit`` is set to ``real`` or ``real_usd``, this parameter and ``end_date`` control how deflation is calculated. end_date : If ``unit`` is set to ``real`` or ``real_usd``, this parameter and ``start_date`` control how deflation is calculated. update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_fiscal' Either CSV filename for updating and/or saving, or table name if using SQL. Options will be appended to the base name. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc`` for the commodity index. Returns ------- Fiscal aggregation : pd.DataFrame Raises ------ ValueError If ``seas_adj``, ``unit`` or ``aggregation`` are given an invalid keywords. """ if unit not in ["gdp", "usd", "real", "real_usd", None]: raise ValueError("'unit' can be 'gdp', 'usd', 'real', 'real_usd' or" " None.") if aggregation not in ["gps", "nfps", "gc"]: raise ValueError("'aggregation' can be 'gps', 'nfps' or 'gc'.") if unit is None: unit = "uyu" name = f"{name}_{aggregation}_{unit}" if fss: name = name + "_fssadj" data = fiscal_accounts.get(update_loc=update_loc, save_loc=save_loc, only_get=only_get) gps = data["gps"] nfps = data["nfps"] gc = data["gc-bps"] proc = pd.DataFrame(index=gps.index) proc["Ingresos: SPNF-SPC"] = nfps["Ingresos: SPNF"] proc["Ingresos: GC-BPS"] = gc["Ingresos: GC-BPS"] proc["Egresos: Primarios SPNF-SPC"] = nfps["Egresos: Primarios SPNF"] proc["Egresos: Totales GC-BPS"] = gc["Egresos: GC-BPS"] proc["Egresos: Inversiones SPNF-SPC"] = nfps["Egresos: Inversiones"] proc["Egresos: Inversiones GC-BPS"] = gc["Egresos: Inversión"] proc["Intereses: SPNF"] = nfps["Intereses: Totales"] proc["Intereses: BCU"] = gps["Intereses: BCU"] proc["Intereses: SPC"] = proc["Intereses: SPNF"] + proc["Intereses: BCU"] proc["Intereses: GC-BPS"] = gc["Intereses: Total"] proc["Egresos: Totales SPNF"] = (proc["Egresos: Primarios SPNF-SPC"] + proc["Intereses: SPNF"]) proc["Egresos: Totales SPC"] = (proc["Egresos: Totales SPNF"] + proc["Intereses: BCU"]) proc["Egresos: Primarios GC-BPS"] = (proc["Egresos: Totales GC-BPS"] - proc["Intereses: GC-BPS"]) proc["Resultado: Primario intendencias"] = nfps[ "Resultado: Primario intendencias"] proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"] proc["Resultado: Primario BCU"] = gps["Resultado: Primario BCU"] proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"] proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"] proc["Resultado: Primario SPC"] = gps["Resultado: Primario SPC"] proc["Resultado: Global SPC"] = gps["Resultado: Global SPC"] proc["Resultado: Primario GC-BPS"] = (proc["Ingresos: GC-BPS"] - proc["Egresos: Primarios GC-BPS"]) proc["Resultado: Global GC-BPS"] = gc["Resultado: Global GC-BPS"] proc["Ingresos: FSS"] = gc["Ingresos: FSS"] proc["Intereses: FSS"] = gc["Intereses: BPS-FSS"] proc["Ingresos: SPNF-SPC aj. FSS"] = (proc["Ingresos: SPNF-SPC"] - proc["Ingresos: FSS"]) proc["Ingresos: GC-BPS aj. FSS"] = (proc["Ingresos: GC-BPS"] - proc["Ingresos: FSS"]) proc["Intereses: SPNF aj. FSS"] = (proc["Intereses: SPNF"] - proc["Intereses: FSS"]) proc["Intereses: SPC aj. FSS"] = (proc["Intereses: SPC"] - proc["Intereses: FSS"]) proc["Intereses: GC-BPS aj. FSS"] = (proc["Intereses: GC-BPS"] - proc["Intereses: FSS"]) proc["Egresos: Totales SPNF aj. FSS"] = (proc["Egresos: Totales SPNF"] - proc["Intereses: FSS"]) proc["Egresos: Totales SPC aj. FSS"] = (proc["Egresos: Totales SPC"] - proc["Intereses: FSS"]) proc["Egresos: Totales GC-BPS aj. FSS"] = ( proc["Egresos: Totales GC-BPS"] - proc["Intereses: FSS"]) proc["Resultado: Primario SPNF aj. FSS"] = ( proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS"]) proc["Resultado: Global SPNF aj. FSS"] = (proc["Resultado: Global SPNF"] - proc["Ingresos: FSS"] + proc["Intereses: FSS"]) proc["Resultado: Primario SPC aj. FSS"] = ( proc["Resultado: Primario SPC"] - proc["Ingresos: FSS"]) proc["Resultado: Global SPC aj. FSS"] = (proc["Resultado: Global SPC"] - proc["Ingresos: FSS"] + proc["Intereses: FSS"]) proc["Resultado: Primario GC-BPS aj. FSS"] = ( proc["Resultado: Primario GC-BPS"] - proc["Ingresos: FSS"]) proc["Resultado: Global GC-BPS aj. FSS"] = ( proc["Resultado: Global GC-BPS"] - proc["Ingresos: FSS"] + proc["Intereses: FSS"]) output = proc.loc[:, fiscal_metadata[aggregation][fss]] metadata._set(output, area="Cuentas fiscales y deuda", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if unit == "gdp": output = transform.rolling(output, periods=12, operation="sum") output = transform.convert_gdp(output, update_loc=update_loc, save_loc=save_loc, only_get=only_get) elif unit == "usd": output = transform.convert_usd(output, update_loc=update_loc, save_loc=save_loc, only_get=only_get) elif unit == "real_usd": output = transform.convert_real(output, start_date=start_date, end_date=end_date, update_loc=update_loc, save_loc=save_loc, only_get=only_get) xr = nxr.get_monthly(update_loc=update_loc, save_loc=save_loc, only_get=only_get) output = output.divide(xr[start_date:end_date].mean()[1]) metadata._set(output, currency="USD") elif unit == "real": output = transform.convert_real(output, start_date=start_date, end_date=end_date, update_loc=update_loc, save_loc=save_loc, only_get=only_get) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name, index_label=index_label) return output
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """ Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual CPI. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI measures : pd.DataFrame """ name = "cpi_measures" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"]) xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"]) prod_97 = (pd.read_excel( urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate) xls_10_14 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["2015-"], verify=certificate) xls_15 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["1997"], verify=certificate) prod_97 = (pd.read_excel(BytesIO( r.content), skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) else: raise err weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"], index_col=0).drop_duplicates( subset="Descripción", keep="first")) weights = pd.read_excel(xls_10_14, sheet_name=xls_10_14.sheet_names[0], usecols="A:C", skiprows=13, index_col=0).dropna(how="any") weights.columns = ["Item", "Weight"] weights_8 = weights.loc[weights.index.str.len() == 8] sheets = [] for excel_file in [xls_10_14, xls_15]: for sheet in excel_file.sheet_names: raw = pd.read_excel(excel_file, sheet_name=sheet, usecols="D:IN", skiprows=8).dropna(how="all") proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna( how="all") sheets.append(proc.T) complete_10 = pd.concat(sheets) complete_10 = complete_10.iloc[:, 1:] complete_10.columns = [weights["Item"], weights.index] complete_10.index = pd.date_range(start="2010-12-31", periods=len(complete_10), freq="M") diff_8 = complete_10.loc[:, complete_10.columns.get_level_values( level=1).str.len() == 8].pct_change() win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1)) win.index = diff_8.index win.columns = diff_8.columns.get_level_values(level=1) cpi_win = win.mul(weights_8.loc[:, "Weight"].T) cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100) weights_97["Weight"] = (weights_97["Rubro"].fillna( weights_97["Agrupación, subrubro, familia"]).fillna( weights_97["Artículo"]).drop( columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"]) ) prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())] prod_97.index = pd.date_range(start="1997-03-31", periods=len(prod_97), freq="M") weights_97 = (weights_97[weights_97["Descripción"].isin( cpi_details["1997_weights"])].set_index("Descripción").drop( columns=["Rubro", "Agrupación, subrubro, " "familia", "Artículo"])).div(100) weights_97.index = prod_97.columns prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())] prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values( level=0).duplicated()] prod_10.columns = prod_10.columns.get_level_values(level=0) weights_10 = (weights.loc[weights["Item"].isin( list(cpi_details["2010_base"].keys()))].drop_duplicates( subset="Item", keep="first")).set_index("Item") items = [] weights = [] for item, weight, details in zip([prod_10, prod_97], [weights_10, weights_97], ["2010_base", "1997_base"]): for tradable in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]] weights.append(aux.div(aux.sum())) for core in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Core"] is core ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Core"] is core ]] weights.append(aux.div(aux.sum())) intermediate = [] for item, weight in zip(items, weights): intermediate.append(item.mul(weight.squeeze()).sum(1)) output = [] for x, y in zip(intermediate[:4], intermediate[4:]): aux = pd.concat([ y.pct_change().loc[y.index < "2011-01-01"], x.pct_change().loc[x.index > "2011-01-01"] ]) output.append(aux.fillna(0).add(1).cumprod().mul(100)) cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True) cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"] output = pd.concat([cpi_re] + output + [cpi_win], axis=1) output.columns = [ "Índice de precios al consumo: total", "Índice de precios al consumo: transables", "Índice de precios al consumo: no transables", "Índice de precios al consumo: subyacente", "Índice de precios al consumo: residual", "Índice de precios al consumo: Winsorized 0.05" ] output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-12=100", seas_adj="NSA", ts_type="-", cumperiods=1) output = transform.rebase(output, start_date="2010-12-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get CPI data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI index : pd.DataFrame """ name = "cpi" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: cpi = pd.read_excel(urls[name]["dl"]["main"], skiprows=7, usecols="A:B", index_col=0).dropna() except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) cpi = pd.read_excel(BytesIO(r.content), skiprows=7, usecols="A:B", index_col=0).dropna() else: raise err cpi.columns = ["Índice de precios al consumo"] cpi.rename_axis(None, inplace=True) cpi.index = cpi.index + MonthEnd(1) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) cpi = ops._revise(new_data=cpi, prev_data=previous_data, revise_rows=revise_rows) cpi = cpi.apply(pd.to_numeric, errors="coerce") metadata._set(cpi, area="Precios", currency="-", inf_adj="No", unit="2010-10=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=cpi, name=name) return cpi
def nxr_daily(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get daily nominal exchange rate data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly nominal exchange rates : pd.DataFrame Sell rate, monthly average and end of period. """ name = "nxr_daily" if only_get is True and update_loc is not None: return ops._io(operation="update", data_loc=update_loc, name=name) start_date = dt.datetime(1999, 12, 31) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) metadata._set(previous_data) try: start_date = previous_data.index[len(previous_data) - 1] except IndexError: pass today = dt.datetime.now() - dt.timedelta(days=1) runs = (today - start_date).days // 30 data = [] base_url = urls[name]['dl']['main'] if runs > 0: for i in range(1, runs + 1): from_ = (start_date + dt.timedelta(days=1)).strftime('%d/%m/%Y') to_ = (start_date + dt.timedelta(days=30)).strftime('%d/%m/%Y') dates = f"%22FechaDesde%22:%22{from_}%22,%22FechaHasta%22:%22{to_}" url = f"{base_url}{dates}%22,%22Grupo%22:%222%22}}" + "}" try: data.append(pd.read_excel(url)) start_date = dt.datetime.strptime(to_, '%d/%m/%Y') except (TypeError, BadZipFile): pass from_ = (start_date + dt.timedelta(days=1)).strftime('%d/%m/%Y') to_ = (dt.datetime.now() - dt.timedelta(days=1)).strftime('%d/%m/%Y') dates = f"%22FechaDesde%22:%22{from_}%22,%22FechaHasta%22:%22{to_}" url = f"{base_url}{dates}%22,%22Grupo%22:%222%22}}" + "}" try: data.append(pd.read_excel(url)) except (TypeError, BadZipFile): pass try: output = pd.concat(data, axis=0) output = output.pivot(index="Fecha", columns="Moneda", values="Venta").rename_axis(None) output.index = pd.to_datetime(output.index, format="%d/%m/%Y", errors="coerce") output.sort_index(inplace=True) output.replace(",", ".", regex=True, inplace=True) output.columns = ["Tipo de cambio US$, Cable"] output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios", currency="UYU/USD", inf_adj="No", unit="-", seas_adj="NSA", ts_type="-", cumperiods=1) output.columns = output.columns.set_levels(["-"], level=2) if update_loc is not None: output = pd.concat([previous_data, output]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) except ValueError as e: if str(e) == "No objects to concatenate": return previous_data return output
def nxr_monthly(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly nominal exchange rate data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly nominal exchange rates : pd.DataFrame Sell rate, monthly average and end of period. """ name = "nxr_monthly" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: nxr_raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=4, index_col=0, usecols="A,C,F") except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) nxr_raw = pd.read_excel(BytesIO(r.content), skiprows=4, index_col=0, usecols="A,C,F") else: raise err nxr = nxr_raw.dropna(how="any", axis=0) nxr.columns = [ "Tipo de cambio venta, fin de período", "Tipo de cambio venta, promedio" ] nxr.index = nxr.index + MonthEnd(1) nxr = nxr.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) nxr = ops._revise(new_data=nxr, prev_data=previous_data, revise_rows=revise_rows) metadata._set(nxr, area="Precios", currency="UYU/USD", inf_adj="No", unit="-", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=nxr, name=name) return nxr
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "tfm_prices", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get core CPI, Winsorized CPI, tradabe CPI and non-tradable CPI. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_prices' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI measures : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output xls = pd.ExcelFile(urls["tfm_prices"]["dl"]["main"]) weights = pd.read_excel(xls, sheet_name=xls.sheet_names[0], usecols="A:C", skiprows=14, index_col=0).dropna(how="any") weights.columns = ["Item", "Weight"] weights_8 = weights.loc[weights.index.str.len() == 8] sheets = [] for sheet in xls.sheet_names: raw = pd.read_excel(xls, sheet_name=sheet, usecols="D:IN", skiprows=9).dropna(how="all") proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna( how="all") sheets.append(proc.T) output = pd.concat(sheets) output = output.iloc[:, 1:] output.columns = [weights["Item"], weights.index] output.index = pd.date_range(start="2010-12-31", periods=len(output), freq="M") diff_8 = output.loc[:, output.columns.get_level_values( level=1).str.len() == 8].pct_change() win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1)) win.index = diff_8.index win.columns = diff_8.columns.get_level_values(level=1) cpi_win = win.mul(weights_8.loc[:, "Weight"].T) cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100) prod_97 = (pd.read_excel( urls["tfm_prices"]["dl"]["historical"], skiprows=5).dropna( how="any").set_index("Rubros, Agrupaciones y Subrubros").T) prod_97 = prod_97.loc[:, prod_details[1]].pct_change() output_8 = output.loc[:, prod_details[0]].pct_change() output_8 = output_8.loc[:, ~output_8.columns.get_level_values( level=0).duplicated()] output_8.columns = output_8.columns.get_level_values(level=0) prod_97.columns = output_8.columns.get_level_values(level=0) complete = pd.concat([prod_97, output_8.iloc[1:]]) complete.index = pd.date_range(start="1997-03-31", freq="M", periods=len(complete)) weights_complete = weights.loc[weights["Item"].isin(complete.columns)] weights_complete = weights_complete.loc[~weights_complete["Item"]. duplicated()].set_index("Item") tradable = complete.loc[:, [bool(x) for x in prod_details[2]]] tradable_weights = weights_complete.loc[ weights_complete.index.isin(tradable.columns), "Weight"].T tradable_weights = tradable_weights.div(tradable_weights.sum()) tradable = (tradable.mul(tradable_weights).sum( axis=1).add(1).cumprod().mul(100)) non_tradable = complete.loc[:, [not bool(x) for x in prod_details[2]]] non_tradable_weights = weights_complete.loc[ weights_complete.index.isin(non_tradable.columns), "Weight"].T non_tradable_weights = non_tradable_weights.div(non_tradable_weights.sum()) non_tradable = (non_tradable.mul(non_tradable_weights).sum( axis=1).add(1).cumprod().mul(100)) core = complete.loc[:, [bool(x) for x in prod_details[3]]] core_weights = weights_complete.loc[ weights_complete.index.isin(core.columns), "Weight"].T core_weights = core_weights.div(core_weights.sum()) core = (core.mul(core_weights).sum(axis=1).add(1).cumprod().mul(100)) cpi_re = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True) cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"] output = pd.concat([cpi_re, tradable, non_tradable, core, cpi_win], axis=1) output = transform.base_index(output, start_date="2010-12-01", end_date="2010-12-31") output.columns = [ "Índice de precios al consumo: total", "Índice de precios al consumo: transables", "Índice de precios al consumo: no transables", "Índice de precios al consumo: subyacente", "Índice de precios al consumo: Winsorized 0.05" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-12=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name, index_label=index_label) return output
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "naccounts", index_label: str = "index", only_get: bool = False) -> Dict[str, pd.DataFrame]: """Get national accounts data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'naccounts' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Quarterly national accounts : Dict[str, pd.DataFrame] Each dataframe corresponds to a national accounts table. """ if only_get is True and update_loc is not None: output = {} for filename, meta in na_metadata.items(): data = ops._io(operation="update", data_loc=update_loc, name=f"{name}_{filename}", index_label=index_label) output.update({filename: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output parsed_excels = {} for filename, meta in na_metadata.items(): raw = pd.read_excel(meta["url"], skiprows=9, nrows=meta["Rows"]) proc = (raw.drop(columns=["Unnamed: 0"]).dropna( axis=0, how="all").dropna(axis=1, how="all")) proc = proc.transpose() proc.columns = meta["Colnames"] proc.drop(["Unnamed: 1"], inplace=True) _fix_dates(proc) if meta["Unit"] == "Miles": proc = proc.divide(1000) unit_ = "Millones" else: unit_ = meta["Unit"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"{name}_{filename}", index_label=index_label) proc = ops._revise(new_data=proc, prev_data=previous_data, revise_rows=revise_rows) proc = proc.apply(pd.to_numeric, errors="coerce") metadata._set(proc, area="Actividad económica", currency="UYU", inf_adj=meta["Inf. Adj."], unit=unit_, seas_adj=meta["Seas"], ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=proc, name=f"{name}_{filename}", index_label=index_label) parsed_excels.update({filename: proc}) return parsed_excels
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data. Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai Composite. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD : pd.DataFrame """ name = "global_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output yahoo = [] for series in ["spy", "n100", "nikkei", "sse"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] yahoo.append(aux) output = pd.concat(yahoo, axis=1).interpolate(method="linear", limit_area="inside") output.columns = [ "S&P 500", "Euronext 100", "Nikkei 225", "Shanghai Stock Exchange Composite" ] metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) output = rebase(output, start_date="2019-01-02") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def tax_revenue( update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """ Get tax revenues data. This retrieval function requires that Ghostscript and Tkinter be found in your system. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly tax revenues : pd.DataFrame """ name = "taxes" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output raw = pd.read_excel(urls[name]["dl"]["main"], usecols="C:AO", index_col=0) raw.index = pd.to_datetime(raw.index, errors="coerce") output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = taxes_columns output = output.div(1000000) latest = _get_taxes_from_pdf(output) output = pd.concat([output, latest], sort=False) output = output.loc[~output.index.duplicated(keep="first")] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Sector público", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get seasonally adjusted real quarterly GDP for select countries. Countries/aggregates are US, EU-27, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Quarterly real GDP in seasonally adjusted terms : pd.DataFrame """ name = "global_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output chn_y = dt.datetime.now().year + 1 chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4") chn_json = chn_r.json() chn_datasets = [] for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]): raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][ "observations"] values = [x[0] for x in raw.values()] df = pd.DataFrame(data=values, index=pd.date_range(start=start, freq="Q-DEC", periods=len(values)), columns=["China"]) chn_datasets.append(df) chn_qoq = chn_datasets[0] chn_yoy = chn_datasets[1] chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"], index_col=0).dropna(how="all", axis=1).dropna(how="all", axis=0) chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01") & (chn_obs.index < "2016-01-01")] chn_yoy["volume"] = chn_obs for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))): if pd.isna(chn_yoy.iloc[row, 1]): chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] / (1 + chn_yoy.iloc[row + 4, 0] / 100)) chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"] metadata._set(chn_yoy) chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"], component="seas", method="x13") chn_sa = pd.concat([chn_sa, chn_qoq], axis=1) for row in range(len(chn_sa)): if not pd.isna(chn_sa.iloc[row, 1]): chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] * (1 + chn_sa.iloc[row, 1] / 100)) chn = chn_sa.iloc[:, [0]].div(10) gdps = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]: r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key=" f"{fred_api_key}&file_type=json") aux = pd.DataFrame.from_records(r.json()["observations"]) aux = aux[["date", "value"]].set_index("date") aux.index = pd.to_datetime(aux.index) aux.index = aux.index.shift(3, freq="M") + MonthEnd(0) aux.columns = [series] aux = aux.apply(pd.to_numeric, errors="coerce") if series == "GDPC1": aux = aux.div(4) elif series == "CLVMNACSCAB1GQEU272020": aux = aux.div(1000) gdps.append(aux) gdps = pd.concat(gdps, axis=1) output = pd.concat([gdps, chn], axis=1) output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="Const.", unit="Miles de millones", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def _public_debt_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> Dict[str, pd.DataFrame]: """Helper function. See any of the `public_debt_...()` functions.""" if only_get is True and update_loc is not None: output = {} for meta in ["gps", "nfps", "cb", "assets"]: data = ops._io(operation="update", data_loc=update_loc, name=f"public_debt_{meta}") output.update({meta: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output colnames = ["Total deuda", "Plazo contractual: hasta 1 año", "Plazo contractual: entre 1 y 5 años", "Plazo contractual: más de 5 años", "Plazo residual: hasta 1 año", "Plazo residual: entre 1 y 5 años", "Plazo residual: más de 5 años", "Moneda: pesos", "Moneda: dólares", "Moneda: euros", "Moneda: yenes", "Moneda: DEG", "Moneda: otras", "Residencia: no residentes", "Residencia: residentes"] xls = pd.ExcelFile(urls["public_debt_gps"]["dl"]["main"]) gps_raw = pd.read_excel(xls, sheet_name="SPG2", usecols="B:Q", index_col=0, skiprows=10, nrows=(dt.datetime.now().year - 1999) * 4) gps = gps_raw.dropna(how="any", thresh=2) gps.index = pd.date_range(start="1999-12-31", periods=len(gps), freq="Q-DEC") gps.columns = colnames nfps_raw = pd.read_excel(xls, sheet_name="SPNM bruta", usecols="B:O", index_col=0) loc = nfps_raw.index.get_loc("9. Deuda Bruta del Sector Público no " "monetario por plazo y moneda.") nfps = nfps_raw.iloc[loc + 5:, :].dropna(how="any") nfps.index = pd.date_range(start="1999-12-31", periods=len(nfps), freq="Q-DEC") nfps_extra_raw = pd.read_excel(xls, sheet_name="SPNM bruta", usecols="O:P", skiprows=11, nrows=(dt.datetime.now().year - 1999) * 4) nfps_extra = nfps_extra_raw.dropna(how="all") nfps_extra.index = nfps.index nfps = pd.concat([nfps, nfps_extra], axis=1) nfps.columns = colnames cb_raw = pd.read_excel(xls, sheet_name="BCU bruta", usecols="B:O", index_col=0, skiprows=(dt.datetime.now().year - 1999) * 8 + 20) cb = cb_raw.dropna(how="any") cb.index = pd.date_range(start="1999-12-31", periods=len(cb), freq="Q-DEC") cb_extra_raw = pd.read_excel(xls, sheet_name="BCU bruta", usecols="O:P", skiprows=11, nrows=(dt.datetime.now().year - 1999) * 4) bcu_extra = cb_extra_raw.dropna(how="all") bcu_extra.index = cb.index cb = pd.concat([cb, bcu_extra], axis=1) cb.columns = colnames assets_raw = pd.read_excel(xls, sheet_name="Activos Neta", usecols="B,C,D,K", index_col=0, skiprows=13, nrows=(dt.datetime.now().year - 1999) * 4) assets = assets_raw.dropna(how="any") assets.index = pd.date_range(start="1999-12-31", periods=len(assets), freq="Q-DEC") assets.columns = ["Total activos", "Sector público no monetario", "BCU"] output = {"gps": gps, "nfps": nfps, "cb": cb, "assets": assets} for meta, data in output.items(): if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"public_debt_{meta}") data = ops._revise(new_data=data, prev_data=previous_data, revise_rows=revise_rows) metadata._set(data, area="Sector público", currency="USD", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Stock", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=data, name=f"public_debt_{meta}") output.update({meta: data}) return output
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get currencies data. Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily currencies : pd.DataFrame """ name = "global_nxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output output = [] for series in ["dollar", "eur", "jpy", "cny"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] if series == "dollar": aux.dropna(inplace=True) output.append(aux) output = output[0].join(output[1:]).interpolate(method="linear", limit_area="inside") output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3, 5], new_arrays=[["USD", "EUR", "JPY", "CNY"], ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "commodity_index", index_label: str = "index", only_get: bool = False, only_get_prices: bool = False, only_get_weights: bool = True) -> pd.DataFrame: """Get export-weighted commodity price index for Uruguay. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'commodity_weights' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc`` for the commodity index. only_get_prices : bool, default False If True, don't download data, retrieve what is available from ``update_loc`` for commodity prices. only_get_weights : bool, default True If True, don't download data, retrieve what is available from ``update_loc`` for commodity weights. Returns ------- Monthly export-weighted commodity index : pd.DataFrame Export-weighted average of commodity prices relevant to Uruguay. """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output prices = _prices(update_loc=update_loc, revise_rows="nodup", save_loc=save_loc, only_get=only_get_prices) prices = prices.interpolate(method="linear", limit=1).dropna(how="any") prices = prices.pct_change(periods=1) weights = _weights(update_loc=update_loc, revise_rows="nodup", save_loc=save_loc, only_get=only_get_weights) weights = weights[prices.columns] weights = weights.reindex(prices.index, method="ffill") product = pd.DataFrame(prices.values * weights.values, columns=prices.columns, index=prices.index) product = product.sum(axis=1).add(1).to_frame().cumprod().multiply(100) product.columns = ["Índice de precios de productos primarios"] metadata._set(product, area="Sector externo", currency="USD", inf_adj="No", unit="2002-01=100", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=product, name=name, index_label=index_label) return product
def income_household(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get average household income. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly average household income : pd.DataFrame """ name = "income_household" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: raw = pd.read_excel(urls[name]["dl"]["main"], sheet_name="Mensual", skiprows=5, index_col=0).dropna(how="all") except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) raw = pd.read_excel(BytesIO(r.content), sheet_name="Mensual", skiprows=5, index_col=0).dropna(how="all") else: raise err raw.index = pd.to_datetime(raw.index) output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = ["Total país", "Montevideo", "Interior: total", "Interior: localidades de más de 5 mil hab.", "Interior: localidades pequeñas y rural"] missing = pd.read_excel(urls[name]["dl"]["missing"], index_col=0, header=0).iloc[:, 10:13] missing.columns = output.columns[:3] output = output.append(missing, sort=False) output = output.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Ingresos", currency="UYU", inf_adj="No", unit="Pesos", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def labor_real_wages(seas_adj: Union[str, None] = None, update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "tfm_wages", index_label: str = "index", only_get: bool = True) -> pd.DataFrame: """ Get real wages. Allow choosing seasonal adjustment. Parameters ---------- seas_adj : {'trend', 'seas', None} Whether to seasonally adjust. update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_wages' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc`` for the commodity index. Returns ------- Real wages data : pd.DataFrame Raises ------ ValueError If ``seas_adj`` is given an invalid keyword. """ if seas_adj not in ["trend", "seas", None]: raise ValueError("'seas_adj' can be 'trend', 'seas' or None.") wages = labor.get_wages(update_loc=update_loc, only_get=only_get) real_wages = wages.copy() real_wages.columns = [ "Índice medio de salarios reales", "Índice medio de salarios reales privados", "Índice medio de salarios reales públicos" ] metadata._set(real_wages, area="Mercado laboral", currency="UYU", inf_adj="Sí", seas_adj="NSA", ts_type="-", cumperiods=1) real_wages = transform.convert_real(real_wages, update_loc=update_loc, only_get=only_get) output = pd.concat([wages, real_wages], axis=1) seas_text = "nsa" if seas_adj in ["trend", "seas"]: trend, seasadj = transform.decompose(output, trading=True, outlier=False) if seas_adj == "trend": output = trend seas_text = "trend" elif seas_adj == "seas": output = seasadj seas_text = "sa" output = transform.base_index(output, start_date="2008-07-31") name = f"{name}_{seas_text}" if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name, index_label=index_label) return output