def _balance_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> Dict[str, pd.DataFrame]: """Helper function. See any of the `balance_...()` functions.""" if only_get is True and update_loc is not None: output = {} for dataset in fiscal_sheets.keys(): data = ops._io( operation="update", data_loc=update_loc, name=f"balance_{dataset}") output.update({dataset: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output response = requests.get(urls["balance_gps"]["dl"]["main"]) soup = BeautifulSoup(response.content, "html.parser") links = soup.find_all(href=re.compile("\\.xlsx$")) link = links[0]["href"] xls = pd.ExcelFile(link) output = {} for dataset, meta in fiscal_sheets.items(): data = (pd.read_excel(xls, sheet_name=meta["sheet"]). dropna(axis=0, thresh=4).dropna(axis=1, thresh=4). transpose().set_index(2, drop=True)) data.columns = data.iloc[0] data = data[data.index.notnull()].rename_axis(None) data.index = data.index + MonthEnd(1) data.columns = meta["colnames"] data = data.apply(pd.to_numeric, errors="coerce") metadata._set( data, area="Sector público", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1 ) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"balance_{dataset}") data = ops._revise(new_data=data, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=data, name=f"balance_{dataset}") output.update({dataset: data}) return output
def get_wages(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "wages", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get general, public and private sector wages data Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'wages' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly wages separated by public and private sector : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output historical = pd.read_excel(urls["wages"]["dl"]["historical"], skiprows=8, usecols="A:B") historical = historical.dropna(how="any").set_index("Unnamed: 0") current = pd.read_excel(urls["wages"]["dl"]["current"], skiprows=8, usecols="A,C:D") current = current.dropna(how="any").set_index("Unnamed: 0") wages = pd.concat([historical, current], axis=1) wages.index = wages.index + MonthEnd(1) wages.columns = [ "Índice medio de salarios", "Índice medio de salarios privados", "Índice medio de salarios públicos" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) wages = ops._revise(new_data=wages, prev_data=previous_data, revise_rows=revise_rows) wages = wages.apply(pd.to_numeric, errors="coerce") metadata._set(wages, area="Mercado laboral", currency="UYU", inf_adj="No", unit="2008-07=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=wages, name=name, index_label=index_label) return wages
def consumer_confidence( update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly consumer confidence data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly consumer confidence data : pd.DataFrame """ name = "consumer_confidence" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=3, usecols="B:F", index_col=0) output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = ["Subíndice: Situación Económica Personal", "Subíndice: Situación Económica del País", "Subíndice: Predisposición a la Compra de Durables", "Índice de Confianza del Consumidor"] output = output.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Actividad económica", currency="-", inf_adj="No", unit="50 = neutralidad", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def long_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get 10-year government bonds interest rates. Countries/aggregates selected are US, Germany, France, Italy, Spain United Kingdom, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily 10-year government bonds interest rates : pd.DataFrame """ name = "global_long_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output bonds = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key=" f"{fred_api_key}&file_type=json") us = pd.DataFrame.from_records(r.json()["observations"]) us = us[["date", "value"]].set_index("date") us.index = pd.to_datetime(us.index) us.columns = ["United States"] bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna()) for country, sid in zip([ "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan", "China" ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]): end_date_dt = dt.datetime(2000, 1, 1) start_date_dt = dt.datetime(2000, 1, 1) aux = [] while end_date_dt < dt.datetime.now(): end_date_dt = start_date_dt + dt.timedelta(days=5000) params = { "curr_id": sid, "smlID": str(randint(1000000, 99999999)), "header": f"{country} 10-Year Bond Yield Historical Data", "st_date": start_date_dt.strftime("%m/%d/%Y"), "end_date": end_date_dt.strftime("%m/%d/%Y"), "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } r = requests.post(urls["global_long_rates"]["dl"]["main"], headers=investing_headers, data=params) aux.append( pd.read_html(r.content, match="Price", index_col=0, parse_dates=True)[0]) start_date_dt = end_date_dt + dt.timedelta(days=1) aux = pd.concat(aux, axis=0)[["Price"]].sort_index() aux.columns = [country] bonds.append(aux) output = bonds[0].join(bonds[1:], how="left") output = output.interpolate(method="linear", limit_area="inside") output.columns = [ "Estados Unidos", "Alemania", "Francia", "Italia", "España", "Reino Unido", "Japón", "China" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3], new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def policy_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get central bank policy interest rates data. Countries/aggregates selected are US, Euro Area, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "global_policy_rates" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output r = requests.get(urls[name]["dl"]["main"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv") raw = pd.read_csv(path_temp, usecols=[0, 7, 19, 36, 37], index_col=0, header=2, parse_dates=True).dropna(how="all") output = (raw.apply(pd.to_numeric, errors="coerce").interpolate(method="linear", limit_area="inside")) output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"] output = output[["Estados Unidos", "Eurozona", "Japón", "China"]] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "naccounts", index_label: str = "index", only_get: bool = False) -> Dict[str, pd.DataFrame]: """Get national accounts data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'naccounts' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Quarterly national accounts : Dict[str, pd.DataFrame] Each dataframe corresponds to a national accounts table. """ if only_get is True and update_loc is not None: output = {} for filename, meta in na_metadata.items(): data = ops._io(operation="update", data_loc=update_loc, name=f"{name}_{filename}", index_label=index_label) output.update({filename: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output parsed_excels = {} for filename, meta in na_metadata.items(): raw = pd.read_excel(meta["url"], skiprows=9, nrows=meta["Rows"]) proc = (raw.drop(columns=["Unnamed: 0"]).dropna( axis=0, how="all").dropna(axis=1, how="all")) proc = proc.transpose() proc.columns = meta["Colnames"] proc.drop(["Unnamed: 1"], inplace=True) _fix_dates(proc) if meta["Unit"] == "Miles": proc = proc.divide(1000) unit_ = "Millones" else: unit_ = meta["Unit"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"{name}_{filename}", index_label=index_label) proc = ops._revise(new_data=proc, prev_data=previous_data, revise_rows=revise_rows) proc = proc.apply(pd.to_numeric, errors="coerce") metadata._set(proc, area="Actividad económica", currency="UYU", inf_adj=meta["Inf. Adj."], unit=unit_, seas_adj=meta["Seas"], ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=proc, name=f"{name}_{filename}", index_label=index_label) parsed_excels.update({filename: proc}) return parsed_excels
def _public_debt_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> Dict[str, pd.DataFrame]: """Helper function. See any of the `public_debt_...()` functions.""" if only_get is True and update_loc is not None: output = {} for meta in ["gps", "nfps", "cb", "assets"]: data = ops._io(operation="update", data_loc=update_loc, name=f"public_debt_{meta}") output.update({meta: data}) if all(not value.equals(pd.DataFrame()) for value in output.values()): return output colnames = ["Total deuda", "Plazo contractual: hasta 1 año", "Plazo contractual: entre 1 y 5 años", "Plazo contractual: más de 5 años", "Plazo residual: hasta 1 año", "Plazo residual: entre 1 y 5 años", "Plazo residual: más de 5 años", "Moneda: pesos", "Moneda: dólares", "Moneda: euros", "Moneda: yenes", "Moneda: DEG", "Moneda: otras", "Residencia: no residentes", "Residencia: residentes"] xls = pd.ExcelFile(urls["public_debt_gps"]["dl"]["main"]) gps_raw = pd.read_excel(xls, sheet_name="SPG2", usecols="B:Q", index_col=0, skiprows=10, nrows=(dt.datetime.now().year - 1999) * 4) gps = gps_raw.dropna(how="any", thresh=2) gps.index = pd.date_range(start="1999-12-31", periods=len(gps), freq="Q-DEC") gps.columns = colnames nfps_raw = pd.read_excel(xls, sheet_name="SPNM bruta", usecols="B:O", index_col=0) loc = nfps_raw.index.get_loc("9. Deuda Bruta del Sector Público no " "monetario por plazo y moneda.") nfps = nfps_raw.iloc[loc + 5:, :].dropna(how="any") nfps.index = pd.date_range(start="1999-12-31", periods=len(nfps), freq="Q-DEC") nfps_extra_raw = pd.read_excel(xls, sheet_name="SPNM bruta", usecols="O:P", skiprows=11, nrows=(dt.datetime.now().year - 1999) * 4) nfps_extra = nfps_extra_raw.dropna(how="all") nfps_extra.index = nfps.index nfps = pd.concat([nfps, nfps_extra], axis=1) nfps.columns = colnames cb_raw = pd.read_excel(xls, sheet_name="BCU bruta", usecols="B:O", index_col=0, skiprows=(dt.datetime.now().year - 1999) * 8 + 20) cb = cb_raw.dropna(how="any") cb.index = pd.date_range(start="1999-12-31", periods=len(cb), freq="Q-DEC") cb_extra_raw = pd.read_excel(xls, sheet_name="BCU bruta", usecols="O:P", skiprows=11, nrows=(dt.datetime.now().year - 1999) * 4) bcu_extra = cb_extra_raw.dropna(how="all") bcu_extra.index = cb.index cb = pd.concat([cb, bcu_extra], axis=1) cb.columns = colnames assets_raw = pd.read_excel(xls, sheet_name="Activos Neta", usecols="B,C,D,K", index_col=0, skiprows=13, nrows=(dt.datetime.now().year - 1999) * 4) assets = assets_raw.dropna(how="any") assets.index = pd.date_range(start="1999-12-31", periods=len(assets), freq="Q-DEC") assets.columns = ["Total activos", "Sector público no monetario", "BCU"] output = {"gps": gps, "nfps": nfps, "cb": cb, "assets": assets} for meta, data in output.items(): if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=f"public_debt_{meta}") data = ops._revise(new_data=data, prev_data=previous_data, revise_rows=revise_rows) metadata._set(data, area="Sector público", currency="USD", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Stock", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=data, name=f"public_debt_{meta}") output.update({meta: data}) return output
def embi_spreads(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get EMBI spread for Argentina, Brazil and the EMBI Global. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily 10-year government bond spreads : pd.DataFrame """ name = "regional_embi_spreads" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output global_ = pd.read_excel(urls[name]["dl"]["global"], usecols="A:B", skiprows=1, index_col=0, parse_dates=True) global_ = global_.loc[~pd.isna(global_.index)].mul(100) region = [] for cnt in ["argentina", "brasil"]: r = requests.get(urls[name]["dl"][cnt]) aux = pd.DataFrame(r.json()) aux.set_index(0, drop=True, inplace=True) aux.drop("Fecha", inplace=True) aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric) aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y") aux.sort_index(inplace=True) aux.columns = [cnt] region.append(aux) region = region[0].join(region[1]).interpolate(limit_area="inside") output = region.join(global_, how="left").interpolate(method="linear", limit_area="inside") output.columns = ["Argentina", "Brasil", "EMBI Global"] output = output.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", unit="PBS", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def embi_yields(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get EMBI yields for Argentina, Brazil and the EMBI Global. Yields are calculated by adding EMBI spreads to the 10-year US Treasury bond rate. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily 10-year government bonds interest rates : pd.DataFrame """ name = "regional_embi_yields" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output treasuries = long_rates(update_loc=update_loc, save_loc=save_loc, only_get=only_get)["Estados Unidos"] spreads = embi_spreads(update_loc=update_loc, save_loc=save_loc, only_get=only_get) treasuries = (treasuries.reindex(spreads.index).interpolate( method="linear", limit_direction="forward")) output = spreads.div(100).add(treasuries.squeeze(), axis=0) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get consumer price index for Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI : pd.DataFrame """ name = "regional_cpi" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = requests.get(urls[name]["dl"]["ar"], params=urls[name]["dl"]["ar_payload"]) arg = pd.read_html(arg.content)[0] arg.set_index("Fecha", drop=True, inplace=True) arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y") arg.columns = ["nivel"] arg = arg.divide(10) arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"]) arg_unoff.set_index("date", drop=True, inplace=True) arg_unoff.index = arg_unoff.index + MonthEnd(0) arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") & (arg_unoff.index <= "2016-12-01"), "index"] arg_unoff = arg_unoff.to_frame().pct_change( periods=1).multiply(100).dropna() arg_unoff.columns = ["nivel"] arg = (arg.append(arg_unoff).reset_index().drop_duplicates( subset="index", keep="last").set_index("index", drop=True).sort_index()) arg = arg.divide(100).add(1).cumprod() bra_r = requests.get(urls[name]["dl"]["bra"]) bra = pd.DataFrame(bra_r.json())[["v"]] bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra)) bra = bra.apply(pd.to_numeric, errors="coerce") bra = bra.divide(100).add(1).cumprod() output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-10-01", end_date="2010-10-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False, driver: WebDriver = None) -> pd.DataFrame: """Get seasonally adjusted real GDP for Argentina and Brazil. This function requires a Selenium webdriver. It can be provided in the driver parameter, or it will attempt to configure a Chrome webdriver. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. driver : selenium.webdriver.chrome.webdriver.WebDriver, default None Selenium webdriver for scraping. If None, build a Chrome webdriver. Returns ------- Quarterly real GDP : pd.DataFrame """ name = "regional_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output if driver is None: driver = _build() driver.get(urls[name]["dl"]["arg_new"]) time.sleep(5) soup = BeautifulSoup(driver.page_source, "lxml") driver.quit() url = soup.find_all(href=re.compile("desest"))[0]["href"] full_url = f"https://www.indec.gob.ar{url}" arg = pd.read_excel(full_url, skiprows=3, usecols="D").dropna(how="all") arg.index = pd.date_range(start="2004-03-31", freq="Q-DEC", periods=len(arg)) arg_old = pd.read_excel(urls[name]["dl"]["arg_old"], skiprows=7, usecols="D").dropna(how="all") arg_old.index = pd.date_range(start="1993-03-31", freq="Q-DEC", periods=len(arg_old)) arg = pd.concat([arg, arg_old], axis=1) for row in reversed(range(len(arg))): if pd.isna(arg.iloc[row, 0]): arg.iloc[row, 0] = (arg.iloc[row, 1] / arg.iloc[row + 1, 1] * arg.iloc[row + 1, 0]) arg = arg.iloc[:, [0]] r = requests.get(urls[name]["dl"]["bra"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, listdir(temp_dir.name)[0]) bra = pd.read_excel(path_temp, usecols="Q", skiprows=3, sheet_name="Val encad preços 95 com ajuste") bra.index = pd.date_range(start="1996-03-31", freq="Q-DEC", periods=len(bra)) output = pd.concat([arg, bra], axis=1).div(1000) output.columns = ["Argentina", "Brasil"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="-", inf_adj="Const.", seas_adj="SA", unit="Miles de millones", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly GDP data. Countries/aggregates selected are Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "regional_monthly_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D", skiprows=4).dropna(how="all") arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg)) bra = pd.read_csv(urls[name]["dl"]["bra"], sep=";", index_col=0, decimal=",") bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra)) output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="Const.", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-01-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def get_monthly(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "nxr_monthly", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get monthly nominal exchange rate data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'nxr_monthly' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly nominal exchange rates : pd.DataFrame Sell rate, monthly average and end of period. """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output nxr_raw = pd.read_excel(urls["nxr_monthly"]["dl"]["main"], skiprows=4, index_col=0, usecols="A,C,F") nxr = nxr_raw.dropna(how="any", axis=0) nxr.columns = [ "Tipo de cambio venta, fin de período", "Tipo de cambio venta, promedio" ] nxr.index = nxr.index + MonthEnd(1) nxr = nxr.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) nxr = ops._revise(new_data=nxr, prev_data=previous_data, revise_rows=revise_rows) metadata._set(nxr, area="Precios y salarios", currency="UYU/USD", inf_adj="No", unit="-", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=nxr, name=name, index_label=index_label) return nxr
def get_rates(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "labor", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get labor market data. Get monthly labor force participation rate, employment rate (employment to working-age population) and unemployment rate. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'labor' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly participation, employment and unemployment rates : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output labor_raw = pd.read_excel(urls["labor"]["dl"]["main"], skiprows=39).dropna(axis=0, thresh=2) labor = labor_raw[~labor_raw["Unnamed: 0"].str. contains("-|/|Total", regex=True)] labor.index = pd.date_range(start="2006-01-01", periods=len(labor), freq="M") labor = labor.drop(columns="Unnamed: 0") labor.columns = [ "Tasa de actividad: total", "Tasa de actividad: hombres", "Tasa de actividad: mujeres", "Tasa de empleo: total", "Tasa de empleo: hombres", "Tasa de empleo: mujeres", "Tasa de desempleo: total", "Tasa de desempleo: hombres", "Tasa de desempleo: mujeres" ] missing = pd.read_excel(urls["labor"]["dl"]["missing"], index_col=0, header=0) missing.columns = labor.columns labor = labor.append(missing) labor = labor.loc[~labor.index.duplicated(keep="first")] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) labor = ops._revise(new_data=labor, prev_data=previous_data, revise_rows=revise_rows) labor = labor.apply(pd.to_numeric, errors="coerce") metadata._set(labor, area="Mercado laboral", currency="-", inf_adj="No", unit="Tasa", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=labor, name=name, index_label=index_label) return labor
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get CPI data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI index : pd.DataFrame """ name = "cpi" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: cpi = pd.read_excel(urls[name]["dl"]["main"], skiprows=7, usecols="A:B", index_col=0).dropna() except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) cpi = pd.read_excel(BytesIO(r.content), skiprows=7, usecols="A:B", index_col=0).dropna() else: raise err cpi.columns = ["Índice de precios al consumo"] cpi.rename_axis(None, inplace=True) cpi.index = cpi.index + MonthEnd(1) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) cpi = ops._revise(new_data=cpi, prev_data=previous_data, revise_rows=revise_rows) cpi = cpi.apply(pd.to_numeric, errors="coerce") metadata._set(cpi, area="Precios", currency="-", inf_adj="No", unit="2010-10=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=cpi, name=name) return cpi
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get USDARS and USDBRL. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily exchange rates : pd.DataFrame """ name = "regional_nxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = [] for dollar in ["ar", "ar_unofficial"]: r = requests.get(urls[name]["dl"][dollar]) aux = pd.DataFrame(r.json())[[0, 2]] aux.set_index(0, drop=True, inplace=True) aux.drop("Fecha", inplace=True) aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric) aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y") aux.sort_index(inplace=True) aux.columns = [dollar] arg.append(aux) arg = arg[0].join(arg[1], how="left") arg.columns = ["Argentina - oficial", "Argentina - informal"] r = requests.get(urls[name]["dl"]["bra"]) bra = pd.DataFrame(r.json()) bra = [(x["VALDATA"], x["VALVALOR"]) for x in bra["value"]] bra = pd.DataFrame.from_records(bra).dropna(how="any") bra.set_index(0, inplace=True) bra.index = pd.to_datetime(bra.index.str[:-4], format="%Y-%m-%dT%H:%M:%S").tz_localize(None) bra.columns = ["Brasil"] output = arg.join(bra, how="left").interpolate(method="linear", limit_area="inside") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", unit="Tasa", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3, 5], new_arrays=[["ARS", "ARS", "BRL"], ["ARS/USD", "ARS/USD", "BRL/USD"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """ Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual CPI. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI measures : pd.DataFrame """ name = "cpi_measures" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"]) xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"]) prod_97 = (pd.read_excel( urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate) xls_10_14 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["2015-"], verify=certificate) xls_15 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["1997"], verify=certificate) prod_97 = (pd.read_excel(BytesIO( r.content), skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) else: raise err weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"], index_col=0).drop_duplicates( subset="Descripción", keep="first")) weights = pd.read_excel(xls_10_14, sheet_name=xls_10_14.sheet_names[0], usecols="A:C", skiprows=13, index_col=0).dropna(how="any") weights.columns = ["Item", "Weight"] weights_8 = weights.loc[weights.index.str.len() == 8] sheets = [] for excel_file in [xls_10_14, xls_15]: for sheet in excel_file.sheet_names: raw = pd.read_excel(excel_file, sheet_name=sheet, usecols="D:IN", skiprows=8).dropna(how="all") proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna( how="all") sheets.append(proc.T) complete_10 = pd.concat(sheets) complete_10 = complete_10.iloc[:, 1:] complete_10.columns = [weights["Item"], weights.index] complete_10.index = pd.date_range(start="2010-12-31", periods=len(complete_10), freq="M") diff_8 = complete_10.loc[:, complete_10.columns.get_level_values( level=1).str.len() == 8].pct_change() win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1)) win.index = diff_8.index win.columns = diff_8.columns.get_level_values(level=1) cpi_win = win.mul(weights_8.loc[:, "Weight"].T) cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100) weights_97["Weight"] = (weights_97["Rubro"].fillna( weights_97["Agrupación, subrubro, familia"]).fillna( weights_97["Artículo"]).drop( columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"]) ) prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())] prod_97.index = pd.date_range(start="1997-03-31", periods=len(prod_97), freq="M") weights_97 = (weights_97[weights_97["Descripción"].isin( cpi_details["1997_weights"])].set_index("Descripción").drop( columns=["Rubro", "Agrupación, subrubro, " "familia", "Artículo"])).div(100) weights_97.index = prod_97.columns prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())] prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values( level=0).duplicated()] prod_10.columns = prod_10.columns.get_level_values(level=0) weights_10 = (weights.loc[weights["Item"].isin( list(cpi_details["2010_base"].keys()))].drop_duplicates( subset="Item", keep="first")).set_index("Item") items = [] weights = [] for item, weight, details in zip([prod_10, prod_97], [weights_10, weights_97], ["2010_base", "1997_base"]): for tradable in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]] weights.append(aux.div(aux.sum())) for core in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Core"] is core ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Core"] is core ]] weights.append(aux.div(aux.sum())) intermediate = [] for item, weight in zip(items, weights): intermediate.append(item.mul(weight.squeeze()).sum(1)) output = [] for x, y in zip(intermediate[:4], intermediate[4:]): aux = pd.concat([ y.pct_change().loc[y.index < "2011-01-01"], x.pct_change().loc[x.index > "2011-01-01"] ]) output.append(aux.fillna(0).add(1).cumprod().mul(100)) cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True) cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"] output = pd.concat([cpi_re] + output + [cpi_win], axis=1) output.columns = [ "Índice de precios al consumo: total", "Índice de precios al consumo: transables", "Índice de precios al consumo: no transables", "Índice de precios al consumo: subyacente", "Índice de precios al consumo: residual", "Índice de precios al consumo: Winsorized 0.05" ] output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-12=100", seas_adj="NSA", ts_type="-", cumperiods=1) output = transform.rebase(output, start_date="2010-12-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data in USD terms. Indexes selected are MERVAL and BOVESPA. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD terms: pd.DataFrame """ name = "regional_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output end_date_dt = dt.datetime(2000, 1, 1) start_date_dt = dt.datetime(2000, 1, 1) aux = [] while end_date_dt < dt.datetime.now(): end_date_dt = start_date_dt + dt.timedelta(days=5000) params = { "curr_id": "13376", "smlID": str(randint(1000000, 99999999)), "header": "S&P Merval Historical Data", "st_date": start_date_dt.strftime("%m/%d/%Y"), "end_date": end_date_dt.strftime("%m/%d/%Y"), "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } r = requests.post(urls[name]["dl"]["arg"], headers=investing_headers, data=params) aux.append( pd.read_html(r.content, match="Price", index_col=0, parse_dates=True)[0]) start_date_dt = end_date_dt + dt.timedelta(days=1) arg = pd.concat(aux, axis=0)[["Price"]].sort_index() bra = pd.read_csv(urls[name]["dl"]["bra"], index_col=0, parse_dates=True)[["Close"]] bra = bra.loc[bra.index >= "2000-01-01"] converters = nxr(update_loc=update_loc, only_get=only_get) converters.columns = converters.columns.get_level_values(0) arg = pd.merge_asof(arg, converters[["Argentina - informal"]], left_index=True, right_index=True) arg = (arg.iloc[:, 0] / arg.iloc[:, 1]).to_frame() arg.columns = ["Argentina"] bra = pd.merge_asof(bra, converters[["Brasil"]], left_index=True, right_index=True) bra = (bra.iloc[:, 0] / bra.iloc[:, 1]).to_frame() bra.columns = ["Brasil"] output = arg.join(bra, how="left").interpolate(method="linear", limit_area="inside") metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) output = rebase(output, start_date="2019-01-02").dropna(how="all") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def tax_revenue( update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """ Get tax revenues data. This retrieval function requires that Ghostscript and Tkinter be found in your system. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly tax revenues : pd.DataFrame """ name = "taxes" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output raw = pd.read_excel(urls[name]["dl"]["main"], usecols="C:AO", index_col=0) raw.index = pd.to_datetime(raw.index, errors="coerce") output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = taxes_columns output = output.div(1000000) latest = _get_taxes_from_pdf(output) output = pd.concat([output, latest], sort=False) output = output.loc[~output.index.duplicated(keep="first")] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Sector público", currency="UYU", inf_adj="No", unit="Millones", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil . Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly real exchange rate : pd.DataFrame """ name = "regional_rxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get) output = pd.DataFrame() output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] / proc["ARG CPI"]) output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"] metadata._set(output, area="Regional", currency="-", inf_adj="-", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS/USD", "BRL/USD"]]) output = rebase(output, start_date="2019-01-01", end_date="2019-01-31").dropna(how="all") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "cpi", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get CPI data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'cpi' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI index : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output cpi_raw = pd.read_excel(urls["cpi"]["dl"]["main"], skiprows=7).dropna(axis=0, thresh=2) cpi = (cpi_raw.drop( ["Mensual", "Acum.año", "Acum.12 meses"], axis=1).dropna(axis=0, how="all").set_index("Mes y año").rename_axis(None)) cpi.columns = ["Índice de precios al consumo"] cpi.index = cpi.index + MonthEnd(1) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) cpi = ops._revise(new_data=cpi, prev_data=previous_data, revise_rows=revise_rows) cpi = cpi.apply(pd.to_numeric, errors="coerce") metadata._set(cpi, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-10=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=cpi, name=name, index_label=index_label) return cpi
def _prices(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """Get commodity prices for Uruguay. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Commodity prices : pd.DataFrame Prices and price indexes of relevant commodities for Uruguay. """ bushel_conv = 36.74 / 100 name = "commodity_prices" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output url = urls["commodity_index"]["dl"] raw_beef = (pd.read_excel(url["beef"], header=4, index_col=0).dropna(how="all")) raw_beef.columns = raw_beef.columns.str.strip() proc_beef = raw_beef["Ing. Prom./Ton."].to_frame() proc_beef.index = pd.date_range(start="2002-01-04", periods=len(proc_beef), freq="W-SAT") proc_beef["Ing. Prom./Ton."] = np.where( proc_beef > np.mean(proc_beef) + np.std(proc_beef) * 2, proc_beef / 1000, proc_beef, ) beef = proc_beef.resample("M").mean() raw_pulp_r = requests.get(url["pulp"]) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(BytesIO(raw_pulp_r.content), "r") as f: f.extractall(path=temp_dir.name) path_temp = path.join(temp_dir.name, "monthly_values.csv") raw_pulp = pd.read_csv(path_temp, sep=";").dropna(how="any") proc_pulp = raw_pulp.copy().sort_index(ascending=False) proc_pulp.index = pd.date_range(start="1990-01-31", periods=len(proc_pulp), freq="M") proc_pulp.drop(["Label", "Codes"], axis=1, inplace=True) pulp = proc_pulp soy_wheat = [] for link in [url["soybean"], url["wheat"]]: raw = pd.read_csv(link, index_col=0) proc = (raw["Settle"] * bushel_conv).to_frame() proc.index = pd.to_datetime(proc.index, format="%Y-%m-%d") proc.sort_index(inplace=True) soy_wheat.append(proc.resample("M").mean()) soybean = soy_wheat[0] wheat = soy_wheat[1] milk_r = requests.get(url["milk1"]) milk_soup = BeautifulSoup(milk_r.content, "html.parser") links = milk_soup.find_all(href=re.compile("Oceanía")) xls = links[0]["href"] raw_milk = pd.read_excel(requests.utils.quote(xls).replace("%3A", ":"), skiprows=14, nrows=dt.datetime.now().year - 2006) raw_milk.dropna(how="all", axis=1, inplace=True) raw_milk.drop(["Promedio ", "Variación"], axis=1, inplace=True) raw_milk.columns = ["Año/Mes"] + list(range(1, 13)) proc_milk = pd.melt(raw_milk, id_vars=["Año/Mes"]) proc_milk.sort_values(by=["Año/Mes", "variable"], inplace=True) proc_milk.index = pd.date_range(start="2007-01-31", periods=len(proc_milk), freq="M") proc_milk = proc_milk.iloc[:, 2].to_frame() prev_milk = pd.read_excel(url["milk2"], sheet_name="Dairy Products Prices", index_col=0, usecols="A,D", skiprows=5) prev_milk = prev_milk.resample("M").mean() eurusd_r = requests.get( "http://fx.sauder.ubc.ca/cgi/fxdata", params=f"b=USD&c=EUR&rd=&fd=1&fm=1&fy=2001&ld=31&lm=12&ly=" f"{dt.datetime.now().year}&y=monthly&q=volume&f=html&o=&cu=on") eurusd = pd.read_html(eurusd_r.content)[0].drop("MMM YYYY", axis=1) eurusd.index = pd.date_range(start="2001-01-31", periods=len(eurusd), freq="M") eurusd = eurusd.reindex(prev_milk.index) prev_milk = prev_milk.divide(eurusd.values).multiply(10) prev_milk = prev_milk.loc[prev_milk.index < min(proc_milk.index)] prev_milk.columns, proc_milk.columns = ["Price"], ["Price"] milk = prev_milk.append(proc_milk) raw_imf = pd.read_excel(url["imf"]) raw_imf.columns = raw_imf.iloc[0, :] proc_imf = raw_imf.iloc[3:, 1:] proc_imf.index = pd.date_range(start="1980-01-31", periods=len(proc_imf), freq="M") rice = proc_imf[proc_imf.columns[proc_imf.columns.str.contains("Rice")]] wood = proc_imf[proc_imf.columns[proc_imf.columns.str.contains( "Sawnwood")]] wood = wood.mean(axis=1).to_frame() wool = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Wool")]] wool = wool.mean(axis=1).to_frame() barley = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith( "Barley")]] gold = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Gold")]] complete = pd.concat( [beef, pulp, soybean, milk, rice, wood, wool, barley, gold, wheat], axis=1) complete = complete.reindex(beef.index).dropna(thresh=8) complete.columns = [ "Beef", "Pulp", "Soybeans", "Milk", "Rice", "Wood", "Wool", "Barley", "Gold", "Wheat" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) complete = ops._revise(new_data=complete, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=complete, name=name) return complete
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data. Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai Composite. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD : pd.DataFrame """ name = "global_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output yahoo = [] for series in ["spy", "n100", "nikkei", "sse"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] yahoo.append(aux) output = pd.concat(yahoo, axis=1).interpolate(method="linear", limit_area="inside") output.columns = [ "S&P 500", "Euronext 100", "Nikkei 225", "Shanghai Stock Exchange Composite" ] metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) output = rebase(output, start_date="2019-01-02") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def _weights(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """Get commodity export weights for Uruguay. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Commodity weights : pd.DataFrame Export-based weights for relevant commodities to Uruguay. """ name = "commodity_weights" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, multiindex=False) if not output.equals(pd.DataFrame()): return output base_url = "http://comtrade.un.org/api/get?max=1000&type=C&freq=A&px=S3&ps" prods = "%2C".join([ "0011", "011", "01251", "01252", "0176", "022", "041", "042", "043", "2222", "24", "25", "268", "97" ]) raw = [] for year in range(1992, dt.datetime.now().year - 1): full_url = f"{base_url}={year}&r=all&p=858&rg=1&cc={prods}" un_r = requests.get(full_url) raw.append(pd.DataFrame(un_r.json()["dataset"])) raw = pd.concat(raw, axis=0) table = raw.groupby(["period", "cmdDescE"]).sum().reset_index() table = table.pivot(index="period", columns="cmdDescE", values="TradeValue") table.fillna(0, inplace=True) percentage = table.div(table.sum(axis=1), axis=0) percentage.index = (pd.to_datetime(percentage.index, format="%Y") + YearEnd(1)) roll = percentage.rolling(window=3, min_periods=3).mean() output = roll.resample("M").bfill() beef = [ "BOVINE MEAT", "Edible offal of bovine animals, fresh or chilled", "Meat and offal (other than liver), of bovine animals, " "prepared or preserv", "Edible offal of bovine animals, frozen", "Bovine animals, live" ] output["Beef"] = output[beef].sum(axis=1, min_count=len(beef)) output.drop(beef, axis=1, inplace=True) output.columns = [ "Barley", "Wood", "Gold", "Milk", "Pulp", "Rice", "Soybeans", "Wheat", "Wool", "Beef" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get seasonally adjusted real quarterly GDP for select countries. Countries/aggregates are US, EU-27, Japan and China. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Quarterly real GDP in seasonally adjusted terms : pd.DataFrame """ name = "global_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output chn_y = dt.datetime.now().year + 1 chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4") chn_json = chn_r.json() chn_datasets = [] for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]): raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][ "observations"] values = [x[0] for x in raw.values()] df = pd.DataFrame(data=values, index=pd.date_range(start=start, freq="Q-DEC", periods=len(values)), columns=["China"]) chn_datasets.append(df) chn_qoq = chn_datasets[0] chn_yoy = chn_datasets[1] chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"], index_col=0).dropna(how="all", axis=1).dropna(how="all", axis=0) chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01") & (chn_obs.index < "2016-01-01")] chn_yoy["volume"] = chn_obs for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))): if pd.isna(chn_yoy.iloc[row, 1]): chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] / (1 + chn_yoy.iloc[row + 4, 0] / 100)) chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"] metadata._set(chn_yoy) chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"], component="seas", method="x13") chn_sa = pd.concat([chn_sa, chn_qoq], axis=1) for row in range(len(chn_sa)): if not pd.isna(chn_sa.iloc[row, 1]): chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] * (1 + chn_sa.iloc[row, 1] / 100)) chn = chn_sa.iloc[:, [0]].div(10) gdps = [] load_dotenv(Path(get_project_root(), ".env")) fred_api_key = os.environ.get("FRED_API_KEY") for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]: r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key=" f"{fred_api_key}&file_type=json") aux = pd.DataFrame.from_records(r.json()["observations"]) aux = aux[["date", "value"]].set_index("date") aux.index = pd.to_datetime(aux.index) aux.index = aux.index.shift(3, freq="M") + MonthEnd(0) aux.columns = [series] aux = aux.apply(pd.to_numeric, errors="coerce") if series == "GDPC1": aux = aux.div(4) elif series == "CLVMNACSCAB1GQEU272020": aux = aux.div(1000) gdps.append(aux) gdps = pd.concat(gdps, axis=1) output = pd.concat([gdps, chn], axis=1) output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="Const.", unit="Miles de millones", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "tfm_prices", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get core CPI, Winsorized CPI, tradabe CPI and non-tradable CPI. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'tfm_prices' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI measures : pd.DataFrame """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output xls = pd.ExcelFile(urls["tfm_prices"]["dl"]["main"]) weights = pd.read_excel(xls, sheet_name=xls.sheet_names[0], usecols="A:C", skiprows=14, index_col=0).dropna(how="any") weights.columns = ["Item", "Weight"] weights_8 = weights.loc[weights.index.str.len() == 8] sheets = [] for sheet in xls.sheet_names: raw = pd.read_excel(xls, sheet_name=sheet, usecols="D:IN", skiprows=9).dropna(how="all") proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna( how="all") sheets.append(proc.T) output = pd.concat(sheets) output = output.iloc[:, 1:] output.columns = [weights["Item"], weights.index] output.index = pd.date_range(start="2010-12-31", periods=len(output), freq="M") diff_8 = output.loc[:, output.columns.get_level_values( level=1).str.len() == 8].pct_change() win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1)) win.index = diff_8.index win.columns = diff_8.columns.get_level_values(level=1) cpi_win = win.mul(weights_8.loc[:, "Weight"].T) cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100) prod_97 = (pd.read_excel( urls["tfm_prices"]["dl"]["historical"], skiprows=5).dropna( how="any").set_index("Rubros, Agrupaciones y Subrubros").T) prod_97 = prod_97.loc[:, prod_details[1]].pct_change() output_8 = output.loc[:, prod_details[0]].pct_change() output_8 = output_8.loc[:, ~output_8.columns.get_level_values( level=0).duplicated()] output_8.columns = output_8.columns.get_level_values(level=0) prod_97.columns = output_8.columns.get_level_values(level=0) complete = pd.concat([prod_97, output_8.iloc[1:]]) complete.index = pd.date_range(start="1997-03-31", freq="M", periods=len(complete)) weights_complete = weights.loc[weights["Item"].isin(complete.columns)] weights_complete = weights_complete.loc[~weights_complete["Item"]. duplicated()].set_index("Item") tradable = complete.loc[:, [bool(x) for x in prod_details[2]]] tradable_weights = weights_complete.loc[ weights_complete.index.isin(tradable.columns), "Weight"].T tradable_weights = tradable_weights.div(tradable_weights.sum()) tradable = (tradable.mul(tradable_weights).sum( axis=1).add(1).cumprod().mul(100)) non_tradable = complete.loc[:, [not bool(x) for x in prod_details[2]]] non_tradable_weights = weights_complete.loc[ weights_complete.index.isin(non_tradable.columns), "Weight"].T non_tradable_weights = non_tradable_weights.div(non_tradable_weights.sum()) non_tradable = (non_tradable.mul(non_tradable_weights).sum( axis=1).add(1).cumprod().mul(100)) core = complete.loc[:, [bool(x) for x in prod_details[3]]] core_weights = weights_complete.loc[ weights_complete.index.isin(core.columns), "Weight"].T core_weights = core_weights.div(core_weights.sum()) core = (core.mul(core_weights).sum(axis=1).add(1).cumprod().mul(100)) cpi_re = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True) cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"] output = pd.concat([cpi_re, tradable, non_tradable, core, cpi_win], axis=1) output = transform.base_index(output, start_date="2010-12-01", end_date="2010-12-31") output.columns = [ "Índice de precios al consumo: total", "Índice de precios al consumo: transables", "Índice de precios al consumo: no transables", "Índice de precios al consumo: subyacente", "Índice de precios al consumo: Winsorized 0.05" ] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-12=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name, index_label=index_label) return output
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get currencies data. Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily currencies : pd.DataFrame """ name = "global_nxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output output = [] for series in ["dollar", "eur", "jpy", "cny"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] if series == "dollar": aux.dropna(inplace=True) output.append(aux) output = output[0].join(output[1:]).interpolate(method="linear", limit_area="inside") output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"] if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex( output, levels=[3, 5], new_arrays=[["USD", "EUR", "JPY", "CNY"], ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]]) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def nxr_monthly(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly nominal exchange rate data. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly nominal exchange rates : pd.DataFrame Sell rate, monthly average and end of period. """ name = "nxr_monthly" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: nxr_raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=4, index_col=0, usecols="A,C,F") except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) nxr_raw = pd.read_excel(BytesIO(r.content), skiprows=4, index_col=0, usecols="A,C,F") else: raise err nxr = nxr_raw.dropna(how="any", axis=0) nxr.columns = [ "Tipo de cambio venta, fin de período", "Tipo de cambio venta, promedio" ] nxr.index = nxr.index + MonthEnd(1) nxr = nxr.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) nxr = ops._revise(new_data=nxr, prev_data=previous_data, revise_rows=revise_rows) metadata._set(nxr, area="Precios", currency="UYU/USD", inf_adj="No", unit="-", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=nxr, name=name) return nxr
def income_household(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get average household income. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly average household income : pd.DataFrame """ name = "income_household" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: raw = pd.read_excel(urls[name]["dl"]["main"], sheet_name="Mensual", skiprows=5, index_col=0).dropna(how="all") except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["main"], verify=certificate) raw = pd.read_excel(BytesIO(r.content), sheet_name="Mensual", skiprows=5, index_col=0).dropna(how="all") else: raise err raw.index = pd.to_datetime(raw.index) output = raw.loc[~pd.isna(raw.index)] output.index = output.index + MonthEnd(0) output.columns = ["Total país", "Montevideo", "Interior: total", "Interior: localidades de más de 5 mil hab.", "Interior: localidades pequeñas y rural"] missing = pd.read_excel(urls[name]["dl"]["missing"], index_col=0, header=0).iloc[:, 10:13] missing.columns = output.columns[:3] output = output.append(missing, sort=False) output = output.apply(pd.to_numeric, errors="coerce") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) metadata._set(output, area="Ingresos", currency="UYU", inf_adj="No", unit="Pesos", seas_adj="NSA", ts_type="Flujo", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def get_official(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, name: str = "rxr_official", index_label: str = "index", only_get: bool = False) -> pd.DataFrame: """Get official real exchange rates from the BCU website. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. name : str, default 'rxr_official' Either CSV filename for updating and/or saving, or table name if using SQL. index_label : str, default 'index' Label for SQL indexes. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly real exchange rates vs select countries/regions : pd.DataFrame Available: global, regional, extraregional, Argentina, Brazil, US. """ if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) if not output.equals(pd.DataFrame()): return output r = requests.get(urls["rxr_official"]["dl"]["main"]) soup = BeautifulSoup(r.content, "html.parser") links = soup.find_all(href=re.compile("eese[A-z0-9]+\\.xls$")) xls = "https://www.bcu.gub.uy" + links[0]["href"] raw = pd.read_excel(xls, skiprows=8, usecols="B:N", index_col=0) proc = raw.dropna(how="any") proc.columns = [ "Global", "Extrarregional", "Regional", "Argentina", "Brasil", "EE.UU.", "México", "Alemania", "España", "Reino Unido", "Italia", "China" ] proc.index = pd.to_datetime(proc.index) + MonthEnd(1) if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name, index_label=index_label) proc = ops._revise(new_data=proc, prev_data=previous_data, revise_rows=revise_rows) metadata._set(proc, area="Precios y salarios", currency="UYU/Otro", inf_adj="No", unit="2017=100", seas_adj="NSA", ts_type="-", cumperiods=1) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=proc, name=name, index_label=index_label) return proc