Beispiel #1
0
def _balance_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
                       revise_rows: Union[str, int] = "nodup",
                       save_loc: Union[str, PathLike, Engine, Connection, None] = None,
                       only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Helper function. See any of the `balance_...()` functions."""
    if only_get is True and update_loc is not None:
        output = {}
        for dataset in fiscal_sheets.keys():
            data = ops._io(
                operation="update", data_loc=update_loc,
                name=f"balance_{dataset}")
            output.update({dataset: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    response = requests.get(urls["balance_gps"]["dl"]["main"])
    soup = BeautifulSoup(response.content, "html.parser")
    links = soup.find_all(href=re.compile("\\.xlsx$"))
    link = links[0]["href"]
    xls = pd.ExcelFile(link)
    output = {}
    for dataset, meta in fiscal_sheets.items():
        data = (pd.read_excel(xls, sheet_name=meta["sheet"]).
                dropna(axis=0, thresh=4).dropna(axis=1, thresh=4).
                transpose().set_index(2, drop=True))
        data.columns = data.iloc[0]
        data = data[data.index.notnull()].rename_axis(None)
        data.index = data.index + MonthEnd(1)
        data.columns = meta["colnames"]
        data = data.apply(pd.to_numeric, errors="coerce")
        metadata._set(
            data, area="Sector público", currency="UYU",
            inf_adj="No", unit="Millones", seas_adj="NSA",
            ts_type="Flujo", cumperiods=1
        )

        if update_loc is not None:
            previous_data = ops._io(operation="update", data_loc=update_loc,
                                    name=f"balance_{dataset}")
            data = ops._revise(new_data=data,
                               prev_data=previous_data,
                               revise_rows=revise_rows)

        if save_loc is not None:
            ops._io(operation="save", data_loc=save_loc, data=data,
                    name=f"balance_{dataset}")

        output.update({dataset: data})

    return output
Beispiel #2
0
def get_wages(update_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
              revise_rows: Union[str, int] = "nodup",
              save_loc: Union[str, PathLike, Engine, Connection, None] = None,
              name: str = "wages",
              index_label: str = "index",
              only_get: bool = False) -> pd.DataFrame:
    """Get general, public and private sector wages data

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'wages'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly wages separated by public and private sector : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    historical = pd.read_excel(urls["wages"]["dl"]["historical"],
                               skiprows=8,
                               usecols="A:B")
    historical = historical.dropna(how="any").set_index("Unnamed: 0")
    current = pd.read_excel(urls["wages"]["dl"]["current"],
                            skiprows=8,
                            usecols="A,C:D")
    current = current.dropna(how="any").set_index("Unnamed: 0")
    wages = pd.concat([historical, current], axis=1)
    wages.index = wages.index + MonthEnd(1)
    wages.columns = [
        "Índice medio de salarios", "Índice medio de salarios privados",
        "Índice medio de salarios públicos"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        wages = ops._revise(new_data=wages,
                            prev_data=previous_data,
                            revise_rows=revise_rows)

    wages = wages.apply(pd.to_numeric, errors="coerce")
    metadata._set(wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="No",
                  unit="2008-07=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=wages,
                name=name,
                index_label=index_label)

    return wages
Beispiel #3
0
def consumer_confidence(
        update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get monthly consumer confidence data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly consumer confidence data : pd.DataFrame

    """
    name = "consumer_confidence"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=3,
                        usecols="B:F", index_col=0)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = ["Subíndice: Situación Económica Personal",
                      "Subíndice: Situación Económica del País",
                      "Subíndice: Predisposición a la Compra de Durables",
                      "Índice de Confianza del Consumidor"]
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update", data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output, area="Actividad económica", currency="-",
                  inf_adj="No", unit="50 = neutralidad", seas_adj="NSA",
                  ts_type="-", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
Beispiel #4
0
def long_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               revise_rows: Union[str, int] = "nodup",
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = False) -> pd.DataFrame:
    """Get 10-year government bonds interest rates.

    Countries/aggregates selected are US, Germany, France, Italy, Spain
    United Kingdom, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bonds interest rates : pd.DataFrame

    """
    name = "global_long_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    bonds = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key="
                     f"{fred_api_key}&file_type=json")
    us = pd.DataFrame.from_records(r.json()["observations"])
    us = us[["date", "value"]].set_index("date")
    us.index = pd.to_datetime(us.index)
    us.columns = ["United States"]
    bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna())

    for country, sid in zip([
            "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan",
            "China"
    ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]):
        end_date_dt = dt.datetime(2000, 1, 1)
        start_date_dt = dt.datetime(2000, 1, 1)
        aux = []
        while end_date_dt < dt.datetime.now():
            end_date_dt = start_date_dt + dt.timedelta(days=5000)
            params = {
                "curr_id": sid,
                "smlID": str(randint(1000000, 99999999)),
                "header": f"{country} 10-Year Bond Yield Historical Data",
                "st_date": start_date_dt.strftime("%m/%d/%Y"),
                "end_date": end_date_dt.strftime("%m/%d/%Y"),
                "interval_sec": "Daily",
                "sort_col": "date",
                "sort_ord": "DESC",
                "action": "historical_data"
            }
            r = requests.post(urls["global_long_rates"]["dl"]["main"],
                              headers=investing_headers,
                              data=params)
            aux.append(
                pd.read_html(r.content,
                             match="Price",
                             index_col=0,
                             parse_dates=True)[0])
            start_date_dt = end_date_dt + dt.timedelta(days=1)
        aux = pd.concat(aux, axis=0)[["Price"]].sort_index()
        aux.columns = [country]
        bonds.append(aux)

    output = bonds[0].join(bonds[1:], how="left")
    output = output.interpolate(method="linear", limit_area="inside")
    output.columns = [
        "Estados Unidos", "Alemania", "Francia", "Italia", "España",
        "Reino Unido", "Japón", "China"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3],
        new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #5
0
def policy_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """Get central bank policy interest rates data.

    Countries/aggregates selected are US, Euro Area, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "global_policy_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    r = requests.get(urls[name]["dl"]["main"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(r.content), "r") as f:
        f.extractall(path=temp_dir.name)
        path_temp = path.join(temp_dir.name,
                              "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv")
        raw = pd.read_csv(path_temp,
                          usecols=[0, 7, 19, 36, 37],
                          index_col=0,
                          header=2,
                          parse_dates=True).dropna(how="all")
    output = (raw.apply(pd.to_numeric,
                        errors="coerce").interpolate(method="linear",
                                                     limit_area="inside"))
    output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"]
    output = output[["Estados Unidos", "Eurozona", "Japón", "China"]]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #6
0
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        name: str = "naccounts",
        index_label: str = "index",
        only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Get national accounts data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'naccounts'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly national accounts : Dict[str, pd.DataFrame]
        Each dataframe corresponds to a national accounts table.

    """
    if only_get is True and update_loc is not None:
        output = {}
        for filename, meta in na_metadata.items():
            data = ops._io(operation="update",
                           data_loc=update_loc,
                           name=f"{name}_{filename}",
                           index_label=index_label)
            output.update({filename: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    parsed_excels = {}
    for filename, meta in na_metadata.items():
        raw = pd.read_excel(meta["url"], skiprows=9, nrows=meta["Rows"])
        proc = (raw.drop(columns=["Unnamed: 0"]).dropna(
            axis=0, how="all").dropna(axis=1, how="all"))
        proc = proc.transpose()
        proc.columns = meta["Colnames"]
        proc.drop(["Unnamed: 1"], inplace=True)
        _fix_dates(proc)
        if meta["Unit"] == "Miles":
            proc = proc.divide(1000)
            unit_ = "Millones"
        else:
            unit_ = meta["Unit"]

        if update_loc is not None:
            previous_data = ops._io(operation="update",
                                    data_loc=update_loc,
                                    name=f"{name}_{filename}",
                                    index_label=index_label)
            proc = ops._revise(new_data=proc,
                               prev_data=previous_data,
                               revise_rows=revise_rows)
        proc = proc.apply(pd.to_numeric, errors="coerce")

        metadata._set(proc,
                      area="Actividad económica",
                      currency="UYU",
                      inf_adj=meta["Inf. Adj."],
                      unit=unit_,
                      seas_adj=meta["Seas"],
                      ts_type="Flujo",
                      cumperiods=1)

        if save_loc is not None:
            ops._io(operation="save",
                    data_loc=save_loc,
                    data=proc,
                    name=f"{name}_{filename}",
                    index_label=index_label)

        parsed_excels.update({filename: proc})

    return parsed_excels
Beispiel #7
0
def _public_debt_retriever(update_loc: Union[str, PathLike,
                                             Engine, Connection, None] = None,
                           revise_rows: Union[str, int] = "nodup",
                           save_loc: Union[str, PathLike,
                                           Engine, Connection, None] = None,
                           only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Helper function. See any of the `public_debt_...()` functions."""
    if only_get is True and update_loc is not None:
        output = {}
        for meta in ["gps", "nfps", "cb", "assets"]:
            data = ops._io(operation="update", data_loc=update_loc,
                           name=f"public_debt_{meta}")
            output.update({meta: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    colnames = ["Total deuda", "Plazo contractual: hasta 1 año",
                "Plazo contractual: entre 1 y 5 años",
                "Plazo contractual: más de 5 años",
                "Plazo residual: hasta 1 año",
                "Plazo residual: entre 1 y 5 años",
                "Plazo residual: más de 5 años",
                "Moneda: pesos", "Moneda: dólares", "Moneda: euros",
                "Moneda: yenes", "Moneda: DEG", "Moneda: otras",
                "Residencia: no residentes", "Residencia: residentes"]

    xls = pd.ExcelFile(urls["public_debt_gps"]["dl"]["main"])
    gps_raw = pd.read_excel(xls, sheet_name="SPG2",
                            usecols="B:Q", index_col=0, skiprows=10,
                            nrows=(dt.datetime.now().year - 1999) * 4)
    gps = gps_raw.dropna(how="any", thresh=2)
    gps.index = pd.date_range(start="1999-12-31", periods=len(gps),
                              freq="Q-DEC")
    gps.columns = colnames

    nfps_raw = pd.read_excel(xls, sheet_name="SPNM bruta",
                             usecols="B:O", index_col=0)
    loc = nfps_raw.index.get_loc("9. Deuda Bruta del Sector Público no "
                                 "monetario por plazo y  moneda.")
    nfps = nfps_raw.iloc[loc + 5:, :].dropna(how="any")
    nfps.index = pd.date_range(start="1999-12-31", periods=len(nfps),
                               freq="Q-DEC")
    nfps_extra_raw = pd.read_excel(xls, sheet_name="SPNM bruta",
                                   usecols="O:P", skiprows=11,
                                   nrows=(dt.datetime.now().year - 1999) * 4)
    nfps_extra = nfps_extra_raw.dropna(how="all")
    nfps_extra.index = nfps.index
    nfps = pd.concat([nfps, nfps_extra], axis=1)
    nfps.columns = colnames

    cb_raw = pd.read_excel(xls, sheet_name="BCU bruta",
                           usecols="B:O", index_col=0,
                           skiprows=(dt.datetime.now().year - 1999) * 8 + 20)
    cb = cb_raw.dropna(how="any")
    cb.index = pd.date_range(start="1999-12-31", periods=len(cb),
                             freq="Q-DEC")
    cb_extra_raw = pd.read_excel(xls, sheet_name="BCU bruta",
                                 usecols="O:P", skiprows=11,
                                 nrows=(dt.datetime.now().year - 1999) * 4)
    bcu_extra = cb_extra_raw.dropna(how="all")
    bcu_extra.index = cb.index
    cb = pd.concat([cb, bcu_extra], axis=1)
    cb.columns = colnames

    assets_raw = pd.read_excel(xls, sheet_name="Activos Neta",
                               usecols="B,C,D,K", index_col=0, skiprows=13,
                               nrows=(dt.datetime.now().year - 1999) * 4)
    assets = assets_raw.dropna(how="any")
    assets.index = pd.date_range(start="1999-12-31", periods=len(assets),
                                 freq="Q-DEC")
    assets.columns = ["Total activos", "Sector público no monetario",
                      "BCU"]

    output = {"gps": gps, "nfps": nfps, "cb": cb, "assets": assets}

    for meta, data in output.items():
        if update_loc is not None:
            previous_data = ops._io(operation="update", data_loc=update_loc,
                                    name=f"public_debt_{meta}")
            data = ops._revise(new_data=data,
                               prev_data=previous_data,
                               revise_rows=revise_rows)
        metadata._set(data, area="Sector público", currency="USD",
                      inf_adj="No", unit="Millones", seas_adj="NSA",
                      ts_type="Stock", cumperiods=1)

        if save_loc is not None:
            ops._io(operation="save", data_loc=save_loc,
                    data=data, name=f"public_debt_{meta}")

        output.update({meta: data})

    return output
Beispiel #8
0
def embi_spreads(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """Get EMBI spread for Argentina, Brazil and the EMBI Global.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bond spreads : pd.DataFrame

    """
    name = "regional_embi_spreads"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    global_ = pd.read_excel(urls[name]["dl"]["global"],
                            usecols="A:B",
                            skiprows=1,
                            index_col=0,
                            parse_dates=True)
    global_ = global_.loc[~pd.isna(global_.index)].mul(100)
    region = []
    for cnt in ["argentina", "brasil"]:
        r = requests.get(urls[name]["dl"][cnt])
        aux = pd.DataFrame(r.json())
        aux.set_index(0, drop=True, inplace=True)
        aux.drop("Fecha", inplace=True)
        aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric)
        aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y")
        aux.sort_index(inplace=True)
        aux.columns = [cnt]
        region.append(aux)
    region = region[0].join(region[1]).interpolate(limit_area="inside")
    output = region.join(global_, how="left").interpolate(method="linear",
                                                          limit_area="inside")
    output.columns = ["Argentina", "Brasil", "EMBI Global"]
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="PBS",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #9
0
def embi_yields(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get EMBI yields for Argentina, Brazil and the EMBI Global.

    Yields are calculated by adding EMBI spreads to the 10-year US Treasury
    bond rate.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bonds interest rates : pd.DataFrame

    """
    name = "regional_embi_yields"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    treasuries = long_rates(update_loc=update_loc,
                            save_loc=save_loc,
                            only_get=only_get)["Estados Unidos"]
    spreads = embi_spreads(update_loc=update_loc,
                           save_loc=save_loc,
                           only_get=only_get)
    treasuries = (treasuries.reindex(spreads.index).interpolate(
        method="linear", limit_direction="forward"))
    output = spreads.div(100).add(treasuries.squeeze(), axis=0)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #10
0
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get consumer price index for Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI : pd.DataFrame

    """
    name = "regional_cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = requests.get(urls[name]["dl"]["ar"],
                       params=urls[name]["dl"]["ar_payload"])
    arg = pd.read_html(arg.content)[0]
    arg.set_index("Fecha", drop=True, inplace=True)
    arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y")
    arg.columns = ["nivel"]
    arg = arg.divide(10)

    arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"])
    arg_unoff.set_index("date", drop=True, inplace=True)
    arg_unoff.index = arg_unoff.index + MonthEnd(0)
    arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") &
                              (arg_unoff.index <= "2016-12-01"), "index"]
    arg_unoff = arg_unoff.to_frame().pct_change(
        periods=1).multiply(100).dropna()
    arg_unoff.columns = ["nivel"]
    arg = (arg.append(arg_unoff).reset_index().drop_duplicates(
        subset="index", keep="last").set_index("index",
                                               drop=True).sort_index())
    arg = arg.divide(100).add(1).cumprod()

    bra_r = requests.get(urls[name]["dl"]["bra"])
    bra = pd.DataFrame(bra_r.json())[["v"]]
    bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra))
    bra = bra.apply(pd.to_numeric, errors="coerce")
    bra = bra.divide(100).add(1).cumprod()

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-10-01", end_date="2010-10-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #11
0
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False,
        driver: WebDriver = None) -> pd.DataFrame:
    """Get seasonally adjusted real GDP for Argentina and Brazil.

    This function requires a Selenium webdriver. It can be provided in the
    driver parameter, or it will attempt to configure a Chrome webdriver.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.
    driver : selenium.webdriver.chrome.webdriver.WebDriver, default None
        Selenium webdriver for scraping. If None, build a Chrome webdriver.

    Returns
    -------
    Quarterly real GDP : pd.DataFrame

    """
    name = "regional_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    if driver is None:
        driver = _build()
    driver.get(urls[name]["dl"]["arg_new"])
    time.sleep(5)
    soup = BeautifulSoup(driver.page_source, "lxml")
    driver.quit()
    url = soup.find_all(href=re.compile("desest"))[0]["href"]
    full_url = f"https://www.indec.gob.ar{url}"
    arg = pd.read_excel(full_url, skiprows=3, usecols="D").dropna(how="all")
    arg.index = pd.date_range(start="2004-03-31",
                              freq="Q-DEC",
                              periods=len(arg))
    arg_old = pd.read_excel(urls[name]["dl"]["arg_old"],
                            skiprows=7,
                            usecols="D").dropna(how="all")
    arg_old.index = pd.date_range(start="1993-03-31",
                                  freq="Q-DEC",
                                  periods=len(arg_old))
    arg = pd.concat([arg, arg_old], axis=1)
    for row in reversed(range(len(arg))):
        if pd.isna(arg.iloc[row, 0]):
            arg.iloc[row, 0] = (arg.iloc[row, 1] / arg.iloc[row + 1, 1] *
                                arg.iloc[row + 1, 0])
    arg = arg.iloc[:, [0]]

    r = requests.get(urls[name]["dl"]["bra"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(r.content), "r") as f:
        f.extractall(path=temp_dir.name)
    path_temp = path.join(temp_dir.name, listdir(temp_dir.name)[0])
    bra = pd.read_excel(path_temp,
                        usecols="Q",
                        skiprows=3,
                        sheet_name="Val encad preços 95 com ajuste")
    bra.index = pd.date_range(start="1996-03-31",
                              freq="Q-DEC",
                              periods=len(bra))

    output = pd.concat([arg, bra], axis=1).div(1000)
    output.columns = ["Argentina", "Brasil"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="Const.",
                  seas_adj="SA",
                  unit="Miles de millones",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #12
0
def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly GDP data.

    Countries/aggregates selected are Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "regional_monthly_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D",
                        skiprows=4).dropna(how="all")
    arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg))

    bra = pd.read_csv(urls[name]["dl"]["bra"],
                      sep=";",
                      index_col=0,
                      decimal=",")
    bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra))

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="Const.",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-01-01", end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #13
0
def get_monthly(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                name: str = "nxr_monthly",
                index_label: str = "index",
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly nominal exchange rate data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'nxr_monthly'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly nominal exchange rates : pd.DataFrame
        Sell rate, monthly average and end of period.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    nxr_raw = pd.read_excel(urls["nxr_monthly"]["dl"]["main"],
                            skiprows=4,
                            index_col=0,
                            usecols="A,C,F")
    nxr = nxr_raw.dropna(how="any", axis=0)
    nxr.columns = [
        "Tipo de cambio venta, fin de período",
        "Tipo de cambio venta, promedio"
    ]
    nxr.index = nxr.index + MonthEnd(1)
    nxr = nxr.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        nxr = ops._revise(new_data=nxr,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    metadata._set(nxr,
                  area="Precios y salarios",
                  currency="UYU/USD",
                  inf_adj="No",
                  unit="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=nxr,
                name=name,
                index_label=index_label)

    return nxr
Beispiel #14
0
def get_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
              revise_rows: Union[str, int] = "nodup",
              save_loc: Union[str, PathLike, Engine, Connection, None] = None,
              name: str = "labor",
              index_label: str = "index",
              only_get: bool = False) -> pd.DataFrame:
    """Get labor market data.

    Get monthly labor force participation rate, employment rate (employment to
    working-age population) and unemployment rate.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'labor'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly participation, employment and unemployment rates : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    labor_raw = pd.read_excel(urls["labor"]["dl"]["main"],
                              skiprows=39).dropna(axis=0, thresh=2)
    labor = labor_raw[~labor_raw["Unnamed: 0"].str.
                      contains("-|/|Total", regex=True)]
    labor.index = pd.date_range(start="2006-01-01",
                                periods=len(labor),
                                freq="M")
    labor = labor.drop(columns="Unnamed: 0")
    labor.columns = [
        "Tasa de actividad: total", "Tasa de actividad: hombres",
        "Tasa de actividad: mujeres", "Tasa de empleo: total",
        "Tasa de empleo: hombres", "Tasa de empleo: mujeres",
        "Tasa de desempleo: total", "Tasa de desempleo: hombres",
        "Tasa de desempleo: mujeres"
    ]
    missing = pd.read_excel(urls["labor"]["dl"]["missing"],
                            index_col=0,
                            header=0)
    missing.columns = labor.columns
    labor = labor.append(missing)
    labor = labor.loc[~labor.index.duplicated(keep="first")]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        labor = ops._revise(new_data=labor,
                            prev_data=previous_data,
                            revise_rows=revise_rows)

    labor = labor.apply(pd.to_numeric, errors="coerce")
    metadata._set(labor,
                  area="Mercado laboral",
                  currency="-",
                  inf_adj="No",
                  unit="Tasa",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=labor,
                name=name,
                index_label=index_label)

    return labor
Beispiel #15
0
def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get CPI data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI index : pd.DataFrame

    """
    name = "cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    try:
        cpi = pd.read_excel(urls[name]["dl"]["main"],
                            skiprows=7,
                            usecols="A:B",
                            index_col=0).dropna()
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            cpi = pd.read_excel(BytesIO(r.content),
                                skiprows=7,
                                usecols="A:B",
                                index_col=0).dropna()
        else:
            raise err
    cpi.columns = ["Índice de precios al consumo"]
    cpi.rename_axis(None, inplace=True)
    cpi.index = cpi.index + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        cpi = ops._revise(new_data=cpi,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    cpi = cpi.apply(pd.to_numeric, errors="coerce")
    metadata._set(cpi,
                  area="Precios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-10=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=cpi, name=name)

    return cpi
Beispiel #16
0
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get USDARS and USDBRL.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily exchange rates : pd.DataFrame

    """
    name = "regional_nxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = []
    for dollar in ["ar", "ar_unofficial"]:
        r = requests.get(urls[name]["dl"][dollar])
        aux = pd.DataFrame(r.json())[[0, 2]]
        aux.set_index(0, drop=True, inplace=True)
        aux.drop("Fecha", inplace=True)
        aux = aux.replace(",", ".", regex=True).apply(pd.to_numeric)
        aux.index = pd.to_datetime(aux.index, format="%d-%m-%Y")
        aux.sort_index(inplace=True)
        aux.columns = [dollar]
        arg.append(aux)
    arg = arg[0].join(arg[1], how="left")
    arg.columns = ["Argentina - oficial", "Argentina - informal"]

    r = requests.get(urls[name]["dl"]["bra"])
    bra = pd.DataFrame(r.json())
    bra = [(x["VALDATA"], x["VALVALOR"]) for x in bra["value"]]
    bra = pd.DataFrame.from_records(bra).dropna(how="any")
    bra.set_index(0, inplace=True)
    bra.index = pd.to_datetime(bra.index.str[:-4],
                               format="%Y-%m-%dT%H:%M:%S").tz_localize(None)
    bra.columns = ["Brasil"]

    output = arg.join(bra, how="left").interpolate(method="linear",
                                                   limit_area="inside")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3, 5],
                                new_arrays=[["ARS", "ARS", "BRL"],
                                            ["ARS/USD", "ARS/USD", "BRL/USD"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #17
0
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """
    Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual
    CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    name = "cpi_measures"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"])
        xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"])
        prod_97 = (pd.read_excel(
            urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index(
                "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate)
            xls_10_14 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["2015-"], verify=certificate)
            xls_15 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["1997"], verify=certificate)
            prod_97 = (pd.read_excel(BytesIO(
                r.content), skiprows=5).dropna(how="any").set_index(
                    "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
        else:
            raise err
    weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"],
                                index_col=0).drop_duplicates(
                                    subset="Descripción", keep="first"))
    weights = pd.read_excel(xls_10_14,
                            sheet_name=xls_10_14.sheet_names[0],
                            usecols="A:C",
                            skiprows=13,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]

    sheets = []
    for excel_file in [xls_10_14, xls_15]:
        for sheet in excel_file.sheet_names:
            raw = pd.read_excel(excel_file,
                                sheet_name=sheet,
                                usecols="D:IN",
                                skiprows=8).dropna(how="all")
            proc = raw.loc[:,
                           raw.columns.str.contains("Indice|Índice")].dropna(
                               how="all")
            sheets.append(proc.T)
    complete_10 = pd.concat(sheets)
    complete_10 = complete_10.iloc[:, 1:]
    complete_10.columns = [weights["Item"], weights.index]
    complete_10.index = pd.date_range(start="2010-12-31",
                                      periods=len(complete_10),
                                      freq="M")
    diff_8 = complete_10.loc[:,
                             complete_10.columns.get_level_values(
                                 level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    weights_97["Weight"] = (weights_97["Rubro"].fillna(
        weights_97["Agrupación, subrubro, familia"]).fillna(
            weights_97["Artículo"]).drop(
                columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"])
                            )
    prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())]
    prod_97.index = pd.date_range(start="1997-03-31",
                                  periods=len(prod_97),
                                  freq="M")
    weights_97 = (weights_97[weights_97["Descripción"].isin(
        cpi_details["1997_weights"])].set_index("Descripción").drop(
            columns=["Rubro", "Agrupación, subrubro, "
                     "familia", "Artículo"])).div(100)
    weights_97.index = prod_97.columns
    prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())]
    prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values(
        level=0).duplicated()]
    prod_10.columns = prod_10.columns.get_level_values(level=0)
    weights_10 = (weights.loc[weights["Item"].isin(
        list(cpi_details["2010_base"].keys()))].drop_duplicates(
            subset="Item", keep="first")).set_index("Item")
    items = []
    weights = []
    for item, weight, details in zip([prod_10, prod_97],
                                     [weights_10, weights_97],
                                     ["2010_base", "1997_base"]):
        for tradable in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]]
            weights.append(aux.div(aux.sum()))
        for core in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]]
            weights.append(aux.div(aux.sum()))

    intermediate = []
    for item, weight in zip(items, weights):
        intermediate.append(item.mul(weight.squeeze()).sum(1))

    output = []
    for x, y in zip(intermediate[:4], intermediate[4:]):
        aux = pd.concat([
            y.pct_change().loc[y.index < "2011-01-01"],
            x.pct_change().loc[x.index > "2011-01-01"]
        ])
        output.append(aux.fillna(0).add(1).cumprod().mul(100))

    cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re] + output + [cpi_win], axis=1)
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: residual",
        "Índice de precios al consumo: Winsorized 0.05"
    ]
    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.rebase(output,
                              start_date="2010-12-01",
                              end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #18
0
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data in USD terms.

    Indexes selected are MERVAL and BOVESPA.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD terms: pd.DataFrame

    """
    name = "regional_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    end_date_dt = dt.datetime(2000, 1, 1)
    start_date_dt = dt.datetime(2000, 1, 1)
    aux = []
    while end_date_dt < dt.datetime.now():
        end_date_dt = start_date_dt + dt.timedelta(days=5000)
        params = {
            "curr_id": "13376",
            "smlID": str(randint(1000000, 99999999)),
            "header": "S&amp;P Merval Historical Data",
            "st_date": start_date_dt.strftime("%m/%d/%Y"),
            "end_date": end_date_dt.strftime("%m/%d/%Y"),
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }
        r = requests.post(urls[name]["dl"]["arg"],
                          headers=investing_headers,
                          data=params)
        aux.append(
            pd.read_html(r.content,
                         match="Price",
                         index_col=0,
                         parse_dates=True)[0])
        start_date_dt = end_date_dt + dt.timedelta(days=1)
    arg = pd.concat(aux, axis=0)[["Price"]].sort_index()

    bra = pd.read_csv(urls[name]["dl"]["bra"], index_col=0,
                      parse_dates=True)[["Close"]]
    bra = bra.loc[bra.index >= "2000-01-01"]

    converters = nxr(update_loc=update_loc, only_get=only_get)
    converters.columns = converters.columns.get_level_values(0)
    arg = pd.merge_asof(arg,
                        converters[["Argentina - informal"]],
                        left_index=True,
                        right_index=True)
    arg = (arg.iloc[:, 0] / arg.iloc[:, 1]).to_frame()
    arg.columns = ["Argentina"]
    bra = pd.merge_asof(bra,
                        converters[["Brasil"]],
                        left_index=True,
                        right_index=True)
    bra = (bra.iloc[:, 0] / bra.iloc[:, 1]).to_frame()
    bra.columns = ["Brasil"]

    output = arg.join(bra, how="left").interpolate(method="linear",
                                                   limit_area="inside")
    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = rebase(output, start_date="2019-01-02").dropna(how="all")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #19
0
def tax_revenue(
        update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """
    Get tax revenues data.

    This retrieval function requires that Ghostscript and Tkinter be found in
    your system.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly tax revenues : pd.DataFrame

    """
    name = "taxes"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    raw = pd.read_excel(urls[name]["dl"]["main"],
                        usecols="C:AO", index_col=0)
    raw.index = pd.to_datetime(raw.index, errors="coerce")
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = taxes_columns
    output = output.div(1000000)
    latest = _get_taxes_from_pdf(output)
    output = pd.concat([output, latest], sort=False)
    output = output.loc[~output.index.duplicated(keep="first")]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output, area="Sector público", currency="UYU",
                  inf_adj="No", unit="Millones", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
Beispiel #20
0
def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil .

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly real exchange rate : pd.DataFrame

    """
    name = "regional_rxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get)

    output = pd.DataFrame()
    output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] /
                           proc["ARG CPI"])
    output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS/USD", "BRL/USD"]])
    output = rebase(output, start_date="2019-01-01",
                    end_date="2019-01-31").dropna(how="all")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #21
0
def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        name: str = "cpi",
        index_label: str = "index",
        only_get: bool = False) -> pd.DataFrame:
    """Get CPI data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'cpi'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI index : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    cpi_raw = pd.read_excel(urls["cpi"]["dl"]["main"],
                            skiprows=7).dropna(axis=0, thresh=2)
    cpi = (cpi_raw.drop(
        ["Mensual", "Acum.año", "Acum.12 meses"],
        axis=1).dropna(axis=0,
                       how="all").set_index("Mes y año").rename_axis(None))
    cpi.columns = ["Índice de precios al consumo"]
    cpi.index = cpi.index + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        cpi = ops._revise(new_data=cpi,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    cpi = cpi.apply(pd.to_numeric, errors="coerce")
    metadata._set(cpi,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-10=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=cpi,
                name=name,
                index_label=index_label)

    return cpi
Beispiel #22
0
def _prices(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
            revise_rows: Union[str, int] = "nodup",
            save_loc: Union[str, PathLike, Engine, Connection, None] = None,
            only_get: bool = True) -> pd.DataFrame:
    """Get commodity prices for Uruguay.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Commodity prices : pd.DataFrame
        Prices and price indexes of relevant commodities for Uruguay.

    """
    bushel_conv = 36.74 / 100
    name = "commodity_prices"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    url = urls["commodity_index"]["dl"]
    raw_beef = (pd.read_excel(url["beef"], header=4,
                              index_col=0).dropna(how="all"))
    raw_beef.columns = raw_beef.columns.str.strip()
    proc_beef = raw_beef["Ing. Prom./Ton."].to_frame()
    proc_beef.index = pd.date_range(start="2002-01-04",
                                    periods=len(proc_beef),
                                    freq="W-SAT")
    proc_beef["Ing. Prom./Ton."] = np.where(
        proc_beef > np.mean(proc_beef) + np.std(proc_beef) * 2,
        proc_beef / 1000,
        proc_beef,
    )
    beef = proc_beef.resample("M").mean()

    raw_pulp_r = requests.get(url["pulp"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(raw_pulp_r.content), "r") as f:
        f.extractall(path=temp_dir.name)
        path_temp = path.join(temp_dir.name, "monthly_values.csv")
        raw_pulp = pd.read_csv(path_temp, sep=";").dropna(how="any")
    proc_pulp = raw_pulp.copy().sort_index(ascending=False)
    proc_pulp.index = pd.date_range(start="1990-01-31",
                                    periods=len(proc_pulp),
                                    freq="M")
    proc_pulp.drop(["Label", "Codes"], axis=1, inplace=True)
    pulp = proc_pulp

    soy_wheat = []
    for link in [url["soybean"], url["wheat"]]:
        raw = pd.read_csv(link, index_col=0)
        proc = (raw["Settle"] * bushel_conv).to_frame()
        proc.index = pd.to_datetime(proc.index, format="%Y-%m-%d")
        proc.sort_index(inplace=True)
        soy_wheat.append(proc.resample("M").mean())
    soybean = soy_wheat[0]
    wheat = soy_wheat[1]

    milk_r = requests.get(url["milk1"])
    milk_soup = BeautifulSoup(milk_r.content, "html.parser")
    links = milk_soup.find_all(href=re.compile("Oceanía"))
    xls = links[0]["href"]
    raw_milk = pd.read_excel(requests.utils.quote(xls).replace("%3A", ":"),
                             skiprows=14,
                             nrows=dt.datetime.now().year - 2006)
    raw_milk.dropna(how="all", axis=1, inplace=True)
    raw_milk.drop(["Promedio ", "Variación"], axis=1, inplace=True)
    raw_milk.columns = ["Año/Mes"] + list(range(1, 13))
    proc_milk = pd.melt(raw_milk, id_vars=["Año/Mes"])
    proc_milk.sort_values(by=["Año/Mes", "variable"], inplace=True)
    proc_milk.index = pd.date_range(start="2007-01-31",
                                    periods=len(proc_milk),
                                    freq="M")
    proc_milk = proc_milk.iloc[:, 2].to_frame()

    prev_milk = pd.read_excel(url["milk2"],
                              sheet_name="Dairy Products Prices",
                              index_col=0,
                              usecols="A,D",
                              skiprows=5)
    prev_milk = prev_milk.resample("M").mean()
    eurusd_r = requests.get(
        "http://fx.sauder.ubc.ca/cgi/fxdata",
        params=f"b=USD&c=EUR&rd=&fd=1&fm=1&fy=2001&ld=31&lm=12&ly="
        f"{dt.datetime.now().year}&y=monthly&q=volume&f=html&o=&cu=on")
    eurusd = pd.read_html(eurusd_r.content)[0].drop("MMM YYYY", axis=1)
    eurusd.index = pd.date_range(start="2001-01-31",
                                 periods=len(eurusd),
                                 freq="M")
    eurusd = eurusd.reindex(prev_milk.index)
    prev_milk = prev_milk.divide(eurusd.values).multiply(10)
    prev_milk = prev_milk.loc[prev_milk.index < min(proc_milk.index)]
    prev_milk.columns, proc_milk.columns = ["Price"], ["Price"]
    milk = prev_milk.append(proc_milk)

    raw_imf = pd.read_excel(url["imf"])
    raw_imf.columns = raw_imf.iloc[0, :]
    proc_imf = raw_imf.iloc[3:, 1:]
    proc_imf.index = pd.date_range(start="1980-01-31",
                                   periods=len(proc_imf),
                                   freq="M")
    rice = proc_imf[proc_imf.columns[proc_imf.columns.str.contains("Rice")]]
    wood = proc_imf[proc_imf.columns[proc_imf.columns.str.contains(
        "Sawnwood")]]
    wood = wood.mean(axis=1).to_frame()
    wool = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Wool")]]
    wool = wool.mean(axis=1).to_frame()
    barley = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith(
        "Barley")]]
    gold = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Gold")]]

    complete = pd.concat(
        [beef, pulp, soybean, milk, rice, wood, wool, barley, gold, wheat],
        axis=1)
    complete = complete.reindex(beef.index).dropna(thresh=8)
    complete.columns = [
        "Beef", "Pulp", "Soybeans", "Milk", "Rice", "Wood", "Wool", "Barley",
        "Gold", "Wheat"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        complete = ops._revise(new_data=complete,
                               prev_data=previous_data,
                               revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=complete, name=name)

    return complete
Beispiel #23
0
def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data.

    Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai
    Composite.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD : pd.DataFrame

    """
    name = "global_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    yahoo = []
    for series in ["spy", "n100", "nikkei", "sse"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        yahoo.append(aux)
    output = pd.concat(yahoo, axis=1).interpolate(method="linear",
                                                  limit_area="inside")
    output.columns = [
        "S&P 500", "Euronext 100", "Nikkei 225",
        "Shanghai Stock Exchange Composite"
    ]
    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])
    output = rebase(output, start_date="2019-01-02")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #24
0
def _weights(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
             revise_rows: Union[str, int] = "nodup",
             save_loc: Union[str, PathLike, Engine, Connection, None] = None,
             only_get: bool = True) -> pd.DataFrame:
    """Get commodity export weights for Uruguay.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Commodity weights : pd.DataFrame
        Export-based weights for relevant commodities to Uruguay.

    """
    name = "commodity_weights"
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         multiindex=False)
        if not output.equals(pd.DataFrame()):
            return output

    base_url = "http://comtrade.un.org/api/get?max=1000&type=C&freq=A&px=S3&ps"
    prods = "%2C".join([
        "0011", "011", "01251", "01252", "0176", "022", "041", "042", "043",
        "2222", "24", "25", "268", "97"
    ])
    raw = []
    for year in range(1992, dt.datetime.now().year - 1):
        full_url = f"{base_url}={year}&r=all&p=858&rg=1&cc={prods}"
        un_r = requests.get(full_url)
        raw.append(pd.DataFrame(un_r.json()["dataset"]))
    raw = pd.concat(raw, axis=0)

    table = raw.groupby(["period", "cmdDescE"]).sum().reset_index()
    table = table.pivot(index="period",
                        columns="cmdDescE",
                        values="TradeValue")
    table.fillna(0, inplace=True)
    percentage = table.div(table.sum(axis=1), axis=0)
    percentage.index = (pd.to_datetime(percentage.index, format="%Y") +
                        YearEnd(1))
    roll = percentage.rolling(window=3, min_periods=3).mean()
    output = roll.resample("M").bfill()

    beef = [
        "BOVINE MEAT", "Edible offal of bovine animals, fresh or chilled",
        "Meat and offal (other than liver), of bovine animals, "
        "prepared or preserv", "Edible offal of bovine animals, frozen",
        "Bovine animals, live"
    ]
    output["Beef"] = output[beef].sum(axis=1, min_count=len(beef))
    output.drop(beef, axis=1, inplace=True)
    output.columns = [
        "Barley", "Wood", "Gold", "Milk", "Pulp", "Rice", "Soybeans", "Wheat",
        "Wool", "Beef"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #25
0
def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get seasonally adjusted real quarterly GDP for select countries.

    Countries/aggregates are US, EU-27, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly real GDP in seasonally adjusted terms : pd.DataFrame

    """
    name = "global_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    chn_y = dt.datetime.now().year + 1
    chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4")
    chn_json = chn_r.json()
    chn_datasets = []
    for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]):
        raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][
            "observations"]
        values = [x[0] for x in raw.values()]
        df = pd.DataFrame(data=values,
                          index=pd.date_range(start=start,
                                              freq="Q-DEC",
                                              periods=len(values)),
                          columns=["China"])
        chn_datasets.append(df)
    chn_qoq = chn_datasets[0]
    chn_yoy = chn_datasets[1]
    chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"],
                            index_col=0).dropna(how="all",
                                                axis=1).dropna(how="all",
                                                               axis=0)
    chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01")
                          & (chn_obs.index < "2016-01-01")]
    chn_yoy["volume"] = chn_obs
    for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))):
        if pd.isna(chn_yoy.iloc[row, 1]):
            chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] /
                                    (1 + chn_yoy.iloc[row + 4, 0] / 100))
    chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"]
    metadata._set(chn_yoy)
    chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"],
                       component="seas",
                       method="x13")
    chn_sa = pd.concat([chn_sa, chn_qoq], axis=1)
    for row in range(len(chn_sa)):
        if not pd.isna(chn_sa.iloc[row, 1]):
            chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] *
                                   (1 + chn_sa.iloc[row, 1] / 100))
    chn = chn_sa.iloc[:, [0]].div(10)

    gdps = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]:
        r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key="
                         f"{fred_api_key}&file_type=json")
        aux = pd.DataFrame.from_records(r.json()["observations"])
        aux = aux[["date", "value"]].set_index("date")
        aux.index = pd.to_datetime(aux.index)
        aux.index = aux.index.shift(3, freq="M") + MonthEnd(0)
        aux.columns = [series]
        aux = aux.apply(pd.to_numeric, errors="coerce")
        if series == "GDPC1":
            aux = aux.div(4)
        elif series == "CLVMNACSCAB1GQEU272020":
            aux = aux.div(1000)
        gdps.append(aux)
    gdps = pd.concat(gdps, axis=1)

    output = pd.concat([gdps, chn], axis=1)
    output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="Const.",
                  unit="Miles de millones",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #26
0
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 name: str = "tfm_prices",
                 index_label: str = "index",
                 only_get: bool = False) -> pd.DataFrame:
    """Get core CPI, Winsorized CPI, tradabe CPI and non-tradable CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_prices'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    xls = pd.ExcelFile(urls["tfm_prices"]["dl"]["main"])
    weights = pd.read_excel(xls,
                            sheet_name=xls.sheet_names[0],
                            usecols="A:C",
                            skiprows=14,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]
    sheets = []
    for sheet in xls.sheet_names:
        raw = pd.read_excel(xls, sheet_name=sheet, usecols="D:IN",
                            skiprows=9).dropna(how="all")
        proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna(
            how="all")
        sheets.append(proc.T)
    output = pd.concat(sheets)
    output = output.iloc[:, 1:]
    output.columns = [weights["Item"], weights.index]
    output.index = pd.date_range(start="2010-12-31",
                                 periods=len(output),
                                 freq="M")
    diff_8 = output.loc[:,
                        output.columns.get_level_values(
                            level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    prod_97 = (pd.read_excel(
        urls["tfm_prices"]["dl"]["historical"], skiprows=5).dropna(
            how="any").set_index("Rubros, Agrupaciones y Subrubros").T)
    prod_97 = prod_97.loc[:, prod_details[1]].pct_change()
    output_8 = output.loc[:, prod_details[0]].pct_change()
    output_8 = output_8.loc[:, ~output_8.columns.get_level_values(
        level=0).duplicated()]
    output_8.columns = output_8.columns.get_level_values(level=0)
    prod_97.columns = output_8.columns.get_level_values(level=0)
    complete = pd.concat([prod_97, output_8.iloc[1:]])
    complete.index = pd.date_range(start="1997-03-31",
                                   freq="M",
                                   periods=len(complete))
    weights_complete = weights.loc[weights["Item"].isin(complete.columns)]
    weights_complete = weights_complete.loc[~weights_complete["Item"].
                                            duplicated()].set_index("Item")
    tradable = complete.loc[:, [bool(x) for x in prod_details[2]]]
    tradable_weights = weights_complete.loc[
        weights_complete.index.isin(tradable.columns), "Weight"].T
    tradable_weights = tradable_weights.div(tradable_weights.sum())
    tradable = (tradable.mul(tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    non_tradable = complete.loc[:, [not bool(x) for x in prod_details[2]]]
    non_tradable_weights = weights_complete.loc[
        weights_complete.index.isin(non_tradable.columns), "Weight"].T
    non_tradable_weights = non_tradable_weights.div(non_tradable_weights.sum())
    non_tradable = (non_tradable.mul(non_tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    core = complete.loc[:, [bool(x) for x in prod_details[3]]]
    core_weights = weights_complete.loc[
        weights_complete.index.isin(core.columns), "Weight"].T
    core_weights = core_weights.div(core_weights.sum())
    core = (core.mul(core_weights).sum(axis=1).add(1).cumprod().mul(100))

    cpi_re = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re, tradable, non_tradable, core, cpi_win], axis=1)
    output = transform.base_index(output,
                                  start_date="2010-12-01",
                                  end_date="2010-12-31")
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: Winsorized 0.05"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output
Beispiel #27
0
def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get currencies data.

    Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily currencies : pd.DataFrame

    """
    name = "global_nxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    output = []
    for series in ["dollar", "eur", "jpy", "cny"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        if series == "dollar":
            aux.dropna(inplace=True)
        output.append(aux)
    output = output[0].join(output[1:]).interpolate(method="linear",
                                                    limit_area="inside")
    output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3, 5],
        new_arrays=[["USD", "EUR", "JPY", "CNY"],
                    ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output
Beispiel #28
0
def nxr_monthly(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly nominal exchange rate data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly nominal exchange rates : pd.DataFrame
        Sell rate, monthly average and end of period.

    """
    name = "nxr_monthly"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        nxr_raw = pd.read_excel(urls[name]["dl"]["main"],
                                skiprows=4,
                                index_col=0,
                                usecols="A,C,F")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            nxr_raw = pd.read_excel(BytesIO(r.content),
                                    skiprows=4,
                                    index_col=0,
                                    usecols="A,C,F")
        else:
            raise err
    nxr = nxr_raw.dropna(how="any", axis=0)
    nxr.columns = [
        "Tipo de cambio venta, fin de período",
        "Tipo de cambio venta, promedio"
    ]
    nxr.index = nxr.index + MonthEnd(1)
    nxr = nxr.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        nxr = ops._revise(new_data=nxr,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    metadata._set(nxr,
                  area="Precios",
                  currency="UYU/USD",
                  inf_adj="No",
                  unit="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=nxr, name=name)

    return nxr
Beispiel #29
0
def income_household(update_loc: Union[str, PathLike,
                                       Engine, Connection, None] = None,
                     revise_rows: Union[str, int] = "nodup",
                     save_loc: Union[str, PathLike,
                                     Engine, Connection, None] = None,
                     only_get: bool = False) -> pd.DataFrame:
    """Get average household income.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly average household income : pd.DataFrame

    """
    name = "income_household"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        raw = pd.read_excel(urls[name]["dl"]["main"], sheet_name="Mensual",
                            skiprows=5, index_col=0).dropna(how="all")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"],
                             verify=certificate)
            raw = pd.read_excel(BytesIO(r.content),
                                sheet_name="Mensual",
                                skiprows=5, index_col=0).dropna(how="all")
        else:
            raise err
    raw.index = pd.to_datetime(raw.index)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = ["Total país", "Montevideo", "Interior: total",
                      "Interior: localidades de más de 5 mil hab.",
                      "Interior: localidades pequeñas y rural"]

    missing = pd.read_excel(urls[name]["dl"]["missing"],
                            index_col=0, header=0).iloc[:, 10:13]
    missing.columns = output.columns[:3]
    output = output.append(missing, sort=False)
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output, area="Ingresos", currency="UYU",
                  inf_adj="No", unit="Pesos", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
Beispiel #30
0
def get_official(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 name: str = "rxr_official",
                 index_label: str = "index",
                 only_get: bool = False) -> pd.DataFrame:
    """Get official real exchange rates from the BCU website.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'rxr_official'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly real exchange rates vs select countries/regions : pd.DataFrame
        Available: global, regional, extraregional, Argentina, Brazil, US.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    r = requests.get(urls["rxr_official"]["dl"]["main"])
    soup = BeautifulSoup(r.content, "html.parser")
    links = soup.find_all(href=re.compile("eese[A-z0-9]+\\.xls$"))
    xls = "https://www.bcu.gub.uy" + links[0]["href"]
    raw = pd.read_excel(xls, skiprows=8, usecols="B:N", index_col=0)
    proc = raw.dropna(how="any")
    proc.columns = [
        "Global", "Extrarregional", "Regional", "Argentina", "Brasil",
        "EE.UU.", "México", "Alemania", "España", "Reino Unido", "Italia",
        "China"
    ]
    proc.index = pd.to_datetime(proc.index) + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        proc = ops._revise(new_data=proc,
                           prev_data=previous_data,
                           revise_rows=revise_rows)

    metadata._set(proc,
                  area="Precios y salarios",
                  currency="UYU/Otro",
                  inf_adj="No",
                  unit="2017=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=proc,
                name=name,
                index_label=index_label)

    return proc