def real_wages(update_loc: Union[str, PathLike, Engine, Connection, None] = None, save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = True) -> pd.DataFrame: """ Get real wages. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default True If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Real wages data : pd.DataFrame """ name = "real_wages" wages = nominal_wages(update_loc=update_loc, only_get=only_get) wages.columns = [ "Índice medio de salarios reales", "Índice medio de salarios reales privados", "Índice medio de salarios reales públicos" ] metadata._set(wages, area="Mercado laboral", currency="UYU", inf_adj="Sí", seas_adj="NSA", ts_type="-", cumperiods=1) output = transform.convert_real(wages, update_loc=update_loc, only_get=only_get) output = transform.rebase(output, start_date="2008-07-31") if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data. Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai Composite. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD : pd.DataFrame """ name = "global_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output yahoo = [] for series in ["spy", "n100", "nikkei", "sse"]: aux = pd.read_csv(urls[name]["dl"][series], index_col=0, usecols=[0, 4], parse_dates=True) aux.columns = [series] yahoo.append(aux) output = pd.concat(yahoo, axis=1).interpolate(method="linear", limit_area="inside") output.columns = [ "S&P 500", "Euronext 100", "Nikkei 225", "Shanghai Stock Exchange Composite" ] metadata._set(output, area="Global", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["USD", "EUR", "JPY", "CNY"]]) output = rebase(output, start_date="2019-01-02") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def store_transformed_data( real_start, real_end, resample_freq, resample_operation, rolling_periods, rolling_operation, chg_diff_operation, chg_diff_period, rebase_start, rebase_end, rebase_base, decompose_method, decompose_component, order, query_data, query_metadata, ): if not order: return query_data, query_metadata if not query_data: return {}, {} if ( ("resample" in order and (not resample_freq or not resample_operation)) or ("rolling" in order and (not rolling_periods or not rolling_operation)) or ("chg-diff" in order and (not chg_diff_operation or not chg_diff_period)) or ("rebase" in order and (not rebase_start or not rebase_base)) or ( "decompose" in order and (not decompose_method or not decompose_component) ) ): raise PreventUpdate data = pd.DataFrame.from_records(query_data, coerce_float=True, index="index") data.index = pd.to_datetime(data.index) metadata = pd.DataFrame.from_records(query_metadata) data.columns = pd.MultiIndex.from_frame(metadata) p = Pipeline(location=db.engine, download=False) transformations = { "usd": lambda x: convert_usd(x, pipeline=p, errors="ignore"), "real": lambda x: convert_real( x, start_date=real_start, end_date=real_end, pipeline=p, errors="ignore" ), "gdp": lambda x: convert_gdp(x, pipeline=p, errors="ignore"), "resample": lambda x: resample( x, rule=resample_freq, operation=resample_operation ), "rolling": lambda x: rolling( x, window=rolling_periods, operation=rolling_operation ), "chg-diff": lambda x: chg_diff( x, operation=chg_diff_operation, period=chg_diff_period ), "rebase": lambda x: rebase( x, start_date=rebase_start, end_date=rebase_end, base=rebase_base ), "decompose": lambda x: decompose( x, component=decompose_component, method=decompose_method, force_x13=True, errors="ignore", ), } transformed_data = data.copy() for t in order: transformed_data = transformations[t](transformed_data) transformed_metadata = transformed_data.columns.to_frame() transformed_data.columns = transformed_data.columns.get_level_values(0) transformed_data.reset_index(inplace=True) return transformed_data.to_dict("records"), transformed_metadata.to_dict( "records" )
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """ Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual CPI. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI measures : pd.DataFrame """ name = "cpi_measures" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output try: xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"]) xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"]) prod_97 = (pd.read_excel( urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) except URLError as err: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err): certificate = Path(get_project_root(), "utils", "files", "ine_certs.pem") r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate) xls_10_14 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["2015-"], verify=certificate) xls_15 = pd.ExcelFile(BytesIO(r.content)) r = requests.get(urls[name]["dl"]["1997"], verify=certificate) prod_97 = (pd.read_excel(BytesIO( r.content), skiprows=5).dropna(how="any").set_index( "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T) else: raise err weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"], index_col=0).drop_duplicates( subset="Descripción", keep="first")) weights = pd.read_excel(xls_10_14, sheet_name=xls_10_14.sheet_names[0], usecols="A:C", skiprows=13, index_col=0).dropna(how="any") weights.columns = ["Item", "Weight"] weights_8 = weights.loc[weights.index.str.len() == 8] sheets = [] for excel_file in [xls_10_14, xls_15]: for sheet in excel_file.sheet_names: raw = pd.read_excel(excel_file, sheet_name=sheet, usecols="D:IN", skiprows=8).dropna(how="all") proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna( how="all") sheets.append(proc.T) complete_10 = pd.concat(sheets) complete_10 = complete_10.iloc[:, 1:] complete_10.columns = [weights["Item"], weights.index] complete_10.index = pd.date_range(start="2010-12-31", periods=len(complete_10), freq="M") diff_8 = complete_10.loc[:, complete_10.columns.get_level_values( level=1).str.len() == 8].pct_change() win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1)) win.index = diff_8.index win.columns = diff_8.columns.get_level_values(level=1) cpi_win = win.mul(weights_8.loc[:, "Weight"].T) cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100) weights_97["Weight"] = (weights_97["Rubro"].fillna( weights_97["Agrupación, subrubro, familia"]).fillna( weights_97["Artículo"]).drop( columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"]) ) prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())] prod_97.index = pd.date_range(start="1997-03-31", periods=len(prod_97), freq="M") weights_97 = (weights_97[weights_97["Descripción"].isin( cpi_details["1997_weights"])].set_index("Descripción").drop( columns=["Rubro", "Agrupación, subrubro, " "familia", "Artículo"])).div(100) weights_97.index = prod_97.columns prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())] prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values( level=0).duplicated()] prod_10.columns = prod_10.columns.get_level_values(level=0) weights_10 = (weights.loc[weights["Item"].isin( list(cpi_details["2010_base"].keys()))].drop_duplicates( subset="Item", keep="first")).set_index("Item") items = [] weights = [] for item, weight, details in zip([prod_10, prod_97], [weights_10, weights_97], ["2010_base", "1997_base"]): for tradable in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Tradable"] is tradable ]] weights.append(aux.div(aux.sum())) for core in [True, False]: items.append(item.loc[:, [ k for k, v in cpi_details[details].items() if v["Core"] is core ]]) aux = weight.loc[[ k for k, v in cpi_details[details].items() if v["Core"] is core ]] weights.append(aux.div(aux.sum())) intermediate = [] for item, weight in zip(items, weights): intermediate.append(item.mul(weight.squeeze()).sum(1)) output = [] for x, y in zip(intermediate[:4], intermediate[4:]): aux = pd.concat([ y.pct_change().loc[y.index < "2011-01-01"], x.pct_change().loc[x.index > "2011-01-01"] ]) output.append(aux.fillna(0).add(1).cumprod().mul(100)) cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True) cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"] output = pd.concat([cpi_re] + output + [cpi_win], axis=1) output.columns = [ "Índice de precios al consumo: total", "Índice de precios al consumo: transables", "Índice de precios al consumo: no transables", "Índice de precios al consumo: subyacente", "Índice de precios al consumo: residual", "Índice de precios al consumo: Winsorized 0.05" ] output = output.apply(pd.to_numeric, errors="coerce") metadata._set(output, area="Precios y salarios", currency="-", inf_adj="No", unit="2010-12=100", seas_adj="NSA", ts_type="-", cumperiods=1) output = transform.rebase(output, start_date="2010-12-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil . Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly real exchange rate : pd.DataFrame """ name = "regional_rxr" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get) output = pd.DataFrame() output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] / proc["ARG CPI"]) output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"] metadata._set(output, area="Regional", currency="-", inf_adj="-", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS/USD", "BRL/USD"]]) output = rebase(output, start_date="2019-01-01", end_date="2019-01-31").dropna(how="all") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get stock market index data in USD terms. Indexes selected are MERVAL and BOVESPA. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily stock market index in USD terms: pd.DataFrame """ name = "regional_stocks" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output end_date_dt = dt.datetime(2000, 1, 1) start_date_dt = dt.datetime(2000, 1, 1) aux = [] while end_date_dt < dt.datetime.now(): end_date_dt = start_date_dt + dt.timedelta(days=5000) params = { "curr_id": "13376", "smlID": str(randint(1000000, 99999999)), "header": "S&P Merval Historical Data", "st_date": start_date_dt.strftime("%m/%d/%Y"), "end_date": end_date_dt.strftime("%m/%d/%Y"), "interval_sec": "Daily", "sort_col": "date", "sort_ord": "DESC", "action": "historical_data" } r = requests.post(urls[name]["dl"]["arg"], headers=investing_headers, data=params) aux.append( pd.read_html(r.content, match="Price", index_col=0, parse_dates=True)[0]) start_date_dt = end_date_dt + dt.timedelta(days=1) arg = pd.concat(aux, axis=0)[["Price"]].sort_index() bra = pd.read_csv(urls[name]["dl"]["bra"], index_col=0, parse_dates=True)[["Close"]] bra = bra.loc[bra.index >= "2000-01-01"] converters = nxr(update_loc=update_loc, only_get=only_get) converters.columns = converters.columns.get_level_values(0) arg = pd.merge_asof(arg, converters[["Argentina - informal"]], left_index=True, right_index=True) arg = (arg.iloc[:, 0] / arg.iloc[:, 1]).to_frame() arg.columns = ["Argentina"] bra = pd.merge_asof(bra, converters[["Brasil"]], left_index=True, right_index=True) bra = (bra.iloc[:, 0] / bra.iloc[:, 1]).to_frame() bra.columns = ["Brasil"] output = arg.join(bra, how="left").interpolate(method="linear", limit_area="inside") metadata._set(output, area="Regional", currency="USD", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) output = rebase(output, start_date="2019-01-02").dropna(how="all") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get consumer price index for Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Monthly CPI : pd.DataFrame """ name = "regional_cpi" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = requests.get(urls[name]["dl"]["ar"], params=urls[name]["dl"]["ar_payload"]) arg = pd.read_html(arg.content)[0] arg.set_index("Fecha", drop=True, inplace=True) arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y") arg.columns = ["nivel"] arg = arg.divide(10) arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"]) arg_unoff.set_index("date", drop=True, inplace=True) arg_unoff.index = arg_unoff.index + MonthEnd(0) arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") & (arg_unoff.index <= "2016-12-01"), "index"] arg_unoff = arg_unoff.to_frame().pct_change( periods=1).multiply(100).dropna() arg_unoff.columns = ["nivel"] arg = (arg.append(arg_unoff).reset_index().drop_duplicates( subset="index", keep="last").set_index("index", drop=True).sort_index()) arg = arg.divide(100).add(1).cumprod() bra_r = requests.get(urls[name]["dl"]["bra"]) bra = pd.DataFrame(bra_r.json())[["v"]] bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra)) bra = bra.apply(pd.to_numeric, errors="coerce") bra = bra.divide(100).add(1).cumprod() output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="No", seas_adj="NSA", ts_type="-", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-10-01", end_date="2010-10-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output
def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None, revise_rows: Union[str, int] = "nodup", save_loc: Union[str, PathLike, Engine, Connection, None] = None, only_get: bool = False) -> pd.DataFrame: """Get monthly GDP data. Countries/aggregates selected are Argentina and Brazil. Parameters ---------- update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to find a CSV for updating, SQLAlchemy connection or engine object, or ``None``, don't update. revise_rows : {'nodup', 'auto', int} Defines how to process data updates. An integer indicates how many rows to remove from the tail of the dataframe and replace with new data. String can either be ``auto``, which automatically determines number of rows to replace from the inferred data frequency, or ``nodup``, which replaces existing periods with new data. save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \ default None Either Path or path-like string pointing to a directory where to save the CSV, SQL Alchemy connection or engine object, or ``None``, don't save. only_get : bool, default False If True, don't download data, retrieve what is available from ``update_loc``. Returns ------- Daily policy interest rates : pd.DataFrame """ name = "regional_monthly_gdp" if only_get is True and update_loc is not None: output = ops._io(operation="update", data_loc=update_loc, name=name) if not output.equals(pd.DataFrame()): return output arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D", skiprows=4).dropna(how="all") arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg)) bra = pd.read_csv(urls[name]["dl"]["bra"], sep=";", index_col=0, decimal=",") bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra)) output = pd.concat([arg, bra], axis=1) output.columns = ["Argentina", "Brasil"] metadata._set(output, area="Regional", currency="-", inf_adj="Const.", seas_adj="SA", ts_type="Flujo", cumperiods=1) metadata._modify_multiindex(output, levels=[3], new_arrays=[["ARS", "BRL"]]) output = rebase(output, start_date="2010-01-01", end_date="2010-12-31") if update_loc is not None: previous_data = ops._io(operation="update", data_loc=update_loc, name=name) output = ops._revise(new_data=output, prev_data=previous_data, revise_rows=revise_rows) if save_loc is not None: ops._io(operation="save", data_loc=save_loc, data=output, name=name) return output