Esempi in Python per _io

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: econuy.utils.ops

Metodo/funzione: _io

Esempi su hotexamples.com: 30

_io in Python: 30 esempi trovati. Questi sono i migliori esempi reali in Python per econuy.utils.ops._io, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: frequent.py Progetto: vierja/econuy

def terms_of_trade(update_loc: Union[str, PathLike, Engine, Connection,
                                     None] = None,
                   save_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                   name: str = "tfm_tot",
                   index_label: str = "index",
                   only_get: bool = True) -> pd.DataFrame:
    """
    Get terms of trade.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_tot'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Terms of trade (exports/imports) : pd.DataFrame

    """
    data = trade.get(update_loc=update_loc,
                     save_loc=save_loc,
                     only_get=only_get)
    exports = data["tb_x_dest_pri"].rename(
        columns={"Total exportaciones": "Total"})
    imports = data["tb_m_orig_pri"].rename(
        columns={"Total importaciones": "Total"})
    tot = exports / imports
    tot = tot.loc[:, ["Total"]]
    tot.rename(columns={"Total": "Términos de intercambio"}, inplace=True)
    tot = transform.base_index(tot,
                               start_date="2005-01-01",
                               end_date="2005-12-31")
    metadata._set(tot, ts_type="-")

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=tot,
                name=name,
                index_label=index_label)

    return tot

Esempio n. 2

Mostra file

def net_public_debt(update_loc: Union[str, PathLike, Engine,
                                      Connection, None] = None,
                    save_loc: Union[str, PathLike, Engine,
                                    Connection, None] = None,
                    only_get: bool = True) -> pd.DataFrame:
    """
    Get net public debt excluding deposits at the central bank.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Net public debt excl. deposits at the central bank : pd.DataFrame

    """
    name = "net_public_debt"

    data = _public_debt_retriever(update_loc=update_loc,
                                  save_loc=save_loc, only_get=only_get)
    gross_debt = data["gps"].loc[:, ["Total deuda"]]
    assets = data["assets"].loc[:, ["Total activos"]]
    gross_debt.columns = ["Deuda neta del sector"
                          " público global excl. encajes"]
    assets.columns = gross_debt.columns
    deposits = econuy.retrieval.external_sector.reserves(
        update_loc=update_loc, save_loc=save_loc,
        only_get=only_get).loc[:,
                               ["Obligaciones en ME con el sector financiero"]]
    deposits = (transform.resample(deposits, rule="Q-DEC", operation="last")
                .reindex(gross_debt.index).squeeze())
    output = gross_debt.add(assets).add(deposits, axis=0).dropna()

    metadata._set(output, area="Sector público",
                  currency="USD", inf_adj="No", unit="Millones",
                  seas_adj="NSA", ts_type="Stock", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output

Esempio n. 3

Mostra file

File: frequent.py Progetto: vierja/econuy

def trade_balance(update_loc: Union[str, PathLike, Engine, Connection,
                                    None] = None,
                  save_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                  name: str = "tfm_tb",
                  index_label: str = "index",
                  only_get: bool = True) -> pd.DataFrame:
    """
    Get trade balance values by country/region.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_tb'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Net trade balance value by region/country : pd.DataFrame

    """
    data = trade.get(update_loc=update_loc,
                     save_loc=save_loc,
                     only_get=only_get)
    exports = data["tb_x_dest_val"].rename(
        columns={"Total exportaciones": "Total"})
    imports = data["tb_m_orig_val"].rename(
        columns={"Total importaciones": "Total"})
    net = exports - imports

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=net,
                name=name,
                index_label=index_label)

    return net

Esempio n. 4

Mostra file

File: labor.py Progetto: rxavier/econuy

def real_wages(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = True) -> pd.DataFrame:
    """
    Get real wages.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Real wages data : pd.DataFrame

    """
    name = "real_wages"

    wages = nominal_wages(update_loc=update_loc, only_get=only_get)
    wages.columns = [
        "Índice medio de salarios reales",
        "Índice medio de salarios reales privados",
        "Índice medio de salarios reales públicos"
    ]
    metadata._set(wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="Sí",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.convert_real(wages,
                                    update_loc=update_loc,
                                    only_get=only_get)

    output = transform.rebase(output, start_date="2008-07-31")

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 5

Mostra file

def _balance_retriever(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
                       revise_rows: Union[str, int] = "nodup",
                       save_loc: Union[str, PathLike, Engine, Connection, None] = None,
                       only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Helper function. See any of the `balance_...()` functions."""
    if only_get is True and update_loc is not None:
        output = {}
        for dataset in fiscal_sheets.keys():
            data = ops._io(
                operation="update", data_loc=update_loc,
                name=f"balance_{dataset}")
            output.update({dataset: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    response = requests.get(urls["balance_gps"]["dl"]["main"])
    soup = BeautifulSoup(response.content, "html.parser")
    links = soup.find_all(href=re.compile("\\.xlsx$"))
    link = links[0]["href"]
    xls = pd.ExcelFile(link)
    output = {}
    for dataset, meta in fiscal_sheets.items():
        data = (pd.read_excel(xls, sheet_name=meta["sheet"]).
                dropna(axis=0, thresh=4).dropna(axis=1, thresh=4).
                transpose().set_index(2, drop=True))
        data.columns = data.iloc[0]
        data = data[data.index.notnull()].rename_axis(None)
        data.index = data.index + MonthEnd(1)
        data.columns = meta["colnames"]
        data = data.apply(pd.to_numeric, errors="coerce")
        metadata._set(
            data, area="Sector público", currency="UYU",
            inf_adj="No", unit="Millones", seas_adj="NSA",
            ts_type="Flujo", cumperiods=1
        )

        if update_loc is not None:
            previous_data = ops._io(operation="update", data_loc=update_loc,
                                    name=f"balance_{dataset}")
            data = ops._revise(new_data=data,
                               prev_data=previous_data,
                               revise_rows=revise_rows)

        if save_loc is not None:
            ops._io(operation="save", data_loc=save_loc, data=data,
                    name=f"balance_{dataset}")

        output.update({dataset: data})

    return output

Esempio n. 6

Mostra file

File: session.py Progetto: vierja/econuy

    def save(self, name: str, index_label: str = "index"):
        """Save :attr:`dataset` attribute to a CSV or SQL."""
        name = Path(name).with_suffix("").as_posix()

        if isinstance(self.dataset, dict):
            for key, value in self.dataset.items():
                ops._io(operation="save", data_loc=self.location,
                        data=value, name=f"{name}_{key}",
                        index_label=index_label)
        else:
            ops._io(operation="save", data_loc=self.location,
                    data=self.dataset, name=name,
                    index_label=index_label)

        self.logger.info(f"Saved dataset to '{self.location}'.")

Esempio n. 7

Mostra file

def balance_summary(update_loc: Union[str, PathLike, Engine,
                                      Connection, None] = None,
                    save_loc: Union[str, PathLike, Engine,
                                    Connection, None] = None,
                    only_get: bool = True) -> pd.DataFrame:
    """
    Get the summary fiscal balance table found in the `Budget Law
    <https://www.gub.uy/contaduria-general-nacion/sites/
    contaduria-general-nacion/files/2020-09/
    Mensaje%20y%20Exposici%C3%B3n%20de%20motivos.pdf>`_. Includes adjustments
    for the `Social Security Fund <https://www.impo.com.uy/bases/decretos/
    71-2018/25>`_.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Summary fiscal balance table : pd.DataFrame

    """
    name = "balance_summary"

    data = _balance_retriever(update_loc=update_loc,
                              save_loc=save_loc, only_get=only_get)
    gps = data["gps"]
    nfps = data["nfps"]
    gc = data["cg-bps"]
    pe = data["pe"]

    proc = pd.DataFrame(index=gps.index)

    proc["Ingresos: GC-BPS"] = gc["Ingresos: GC-BPS"]
    proc["Ingresos: GC-BPS ex. FSS"] = (gc["Ingresos: GC-BPS"]
                                        - gc["Ingresos: FSS - Cincuentones"])
    proc["Ingresos: GC"] = gc["Ingresos: GC"]
    proc["Ingresos: DGI"] = gc["Ingresos: DGI"]
    proc["Ingresos: Comercio ext."] = gc["Ingresos: Comercio ext."]
    proc["Ingresos: Otros"] = (gc["Ingresos: GC"]
                               - gc["Ingresos: DGI"]
                               - gc["Ingresos: Comercio ext."])
    proc["Ingresos: BPS"] = gc["Ingresos: BPS neto"]
    proc["Ingresos: FSS - Cincuentones"] = gc["Ingresos: FSS - Cincuentones"]
    proc["Ingresos: BPS ex FSS"] = (gc["Ingresos: BPS neto"]
                                    - gc["Ingresos: FSS - Cincuentones"])
    proc["Egresos: Primarios GC-BPS"] = (gc["Egresos: GC-BPS"]
                                         - gc["Intereses: Total"])
    proc["Egresos: Primarios corrientes GC-BPS"] = (proc["Egresos: Primarios GC-BPS"]
                                                    - gc["Egresos: Inversión"].squeeze())
    proc["Egresos: Remuneraciones"] = gc["Egresos: Remuneraciones"]
    proc["Egresos: No personales"] = gc["Egresos: No personales"]
    proc["Egresos: Pasividades"] = gc["Egresos: Pasividades"]
    proc["Egresos: Transferencias"] = gc["Egresos: Transferencias"]
    proc["Egresos: Inversión"] = gc["Egresos: Inversión"]
    proc["Resultado: Primario GC-BPS"] = (proc["Ingresos: GC-BPS"]
                                          - proc["Egresos: Primarios GC-BPS"])
    proc["Resultado: Primario GC-BPS ex FSS"] = (proc["Ingresos: GC-BPS ex. FSS"]
                                                 - proc["Egresos: Primarios GC-BPS"])
    proc["Intereses: GC-BPS"] = gc["Intereses: Total"]
    proc["Intereses: FSS - Cincuentones"] = gc["Intereses: FSS - Cincuentones"]
    proc["Intereses: GC-BPS ex FSS"] = (proc["Intereses: GC-BPS"]
                                        - proc["Intereses: FSS - Cincuentones"])
    proc["Resultado: Global GC-BPS"] = (proc["Resultado: Primario GC-BPS"]
                                        - proc["Intereses: GC-BPS"])
    proc["Resultado: Global GC-BPS ex FSS"] = (proc["Resultado: Primario GC-BPS ex FSS"]
                                               - proc["Intereses: GC-BPS ex FSS"])

    proc["Resultado: Primario corriente EEPP"] = nfps["Ingresos: Res. primario corriente EEPP"]
    proc["Egresos: Inversiones EEPP"] = pe["Egresos: Inversiones"]
    proc["Resultado: Primario EEPP"] = (proc["Resultado: Primario corriente EEPP"]
                                        - proc["Egresos: Inversiones EEPP"])
    proc["Intereses: EEPP"] = pe["Intereses"]
    proc["Resultado: Global EEPP"] = (proc["Resultado: Primario EEPP"]
                                      - proc["Intereses: EEPP"])

    proc["Resultado: Primario intendencias"] = nfps["Resultado: Primario intendencias"]
    proc["Intereses: Intendencias"] = nfps["Intereses: Intendencias"]
    proc["Resultado: Global intendencias"] = (proc["Resultado: Primario intendencias"]
                                              - proc["Intereses: Intendencias"])

    proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"]
    proc["Intereses: BSE"] = nfps["Intereses: BSE"]
    proc["Resultado: Global BSE"] = (proc["Resultado: Primario BSE"]
                                     - proc["Intereses: BSE"])

    proc["Resultado: Primario resto SPNF"] = (proc["Resultado: Primario EEPP"]
                                              + proc["Resultado: Primario intendencias"]
                                              + proc["Resultado: Primario BSE"])
    proc["Intereses: Resto SPNF"] = (proc["Intereses: EEPP"]
                                     + proc["Intereses: Intendencias"]
                                     + proc["Intereses: BSE"])
    proc["Resultado: Global resto SPNF"] = (proc["Resultado: Global EEPP"]
                                            + proc["Resultado: Global intendencias"]
                                            + proc["Resultado: Global BSE"])
    proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"]
    proc["Resultado: Primario SPNF ex FSS"] = (proc["Resultado: Primario SPNF"]
                                               - proc["Ingresos: FSS - Cincuentones"])
    proc["Intereses: SPNF"] = nfps["Intereses: Totales"]
    proc["Intereses: SPNF ex FSS"] = (proc["Intereses: SPNF"]
                                      - proc["Intereses: FSS - Cincuentones"])
    proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"]
    proc["Resultado: Global SPNF ex FSS"] = (proc["Resultado: Primario SPNF ex FSS"]
                                             - proc["Intereses: SPNF ex FSS"])

    proc["Resultado: Primario BCU"] = gps["Resultado: Primario BCU"]
    proc["Intereses: BCU"] = gps["Intereses: BCU"]
    proc["Resultado: Global BCU"] = gps["Resultado: Global BCU"]

    proc["Resultado: Primario SPC"] = gps["Resultado: Primario SPC"]
    proc["Resultado: Primario SPC ex FSS"] = (proc["Resultado: Primario SPNF ex FSS"]
                                              + proc["Resultado: Primario BCU"])
    proc["Intereses: SPC"] = proc["Intereses: SPNF"] + proc["Intereses: BCU"]
    proc["Intereses: SPC ex FSS"] = (proc["Intereses: SPNF ex FSS"]
                                     + proc["Intereses: BCU"])
    proc["Resultado: Global SPC"] = (proc["Resultado: Global SPNF"]
                                     + proc["Resultado: Global BCU"])
    proc["Resultado: Global SPC ex FSS"] = (proc["Resultado: Global SPNF ex FSS"]
                                            + proc["Resultado: Global BCU"])
    output = proc

    metadata._set(output, area="Sector público",
                  currency="UYU", inf_adj="No", unit="Millones",
                  seas_adj="NSA", ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output

Esempio n. 8

Mostra file

def _weights(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
             revise_rows: Union[str, int] = "nodup",
             save_loc: Union[str, PathLike, Engine, Connection, None] = None,
             only_get: bool = True) -> pd.DataFrame:
    """Get commodity export weights for Uruguay.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Commodity weights : pd.DataFrame
        Export-based weights for relevant commodities to Uruguay.

    """
    name = "commodity_weights"
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         multiindex=False)
        if not output.equals(pd.DataFrame()):
            return output

    base_url = "http://comtrade.un.org/api/get?max=1000&type=C&freq=A&px=S3&ps"
    prods = "%2C".join([
        "0011", "011", "01251", "01252", "0176", "022", "041", "042", "043",
        "2222", "24", "25", "268", "97"
    ])
    raw = []
    for year in range(1992, dt.datetime.now().year - 1):
        full_url = f"{base_url}={year}&r=all&p=858&rg=1&cc={prods}"
        un_r = requests.get(full_url)
        raw.append(pd.DataFrame(un_r.json()["dataset"]))
    raw = pd.concat(raw, axis=0)

    table = raw.groupby(["period", "cmdDescE"]).sum().reset_index()
    table = table.pivot(index="period",
                        columns="cmdDescE",
                        values="TradeValue")
    table.fillna(0, inplace=True)
    percentage = table.div(table.sum(axis=1), axis=0)
    percentage.index = (pd.to_datetime(percentage.index, format="%Y") +
                        YearEnd(1))
    roll = percentage.rolling(window=3, min_periods=3).mean()
    output = roll.resample("M").bfill()

    beef = [
        "BOVINE MEAT", "Edible offal of bovine animals, fresh or chilled",
        "Meat and offal (other than liver), of bovine animals, "
        "prepared or preserv", "Edible offal of bovine animals, frozen",
        "Bovine animals, live"
    ]
    output["Beef"] = output[beef].sum(axis=1, min_count=len(beef))
    output.drop(beef, axis=1, inplace=True)
    output.columns = [
        "Barley", "Wood", "Gold", "Milk", "Pulp", "Rice", "Soybeans", "Wheat",
        "Wool", "Beef"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 9

Mostra file

File: frequent.py Progetto: vierja/econuy

def labor_real_wages(seas_adj: Union[str, None] = None,
                     update_loc: Union[str, PathLike, Engine, Connection,
                                       None] = None,
                     save_loc: Union[str, PathLike, Engine, Connection,
                                     None] = None,
                     name: str = "tfm_wages",
                     index_label: str = "index",
                     only_get: bool = True) -> pd.DataFrame:
    """
    Get real wages. Allow choosing seasonal adjustment.

    Parameters
    ----------
    seas_adj : {'trend', 'seas', None}
        Whether to seasonally adjust.
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_wages'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Real wages data : pd.DataFrame

    Raises
    ------
    ValueError
        If ``seas_adj`` is given an invalid keyword.

    """
    if seas_adj not in ["trend", "seas", None]:
        raise ValueError("'seas_adj' can be 'trend', 'seas' or None.")

    wages = labor.get_wages(update_loc=update_loc, only_get=only_get)
    real_wages = wages.copy()
    real_wages.columns = [
        "Índice medio de salarios reales",
        "Índice medio de salarios reales privados",
        "Índice medio de salarios reales públicos"
    ]
    metadata._set(real_wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="Sí",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    real_wages = transform.convert_real(real_wages,
                                        update_loc=update_loc,
                                        only_get=only_get)
    output = pd.concat([wages, real_wages], axis=1)
    seas_text = "nsa"
    if seas_adj in ["trend", "seas"]:
        trend, seasadj = transform.decompose(output,
                                             trading=True,
                                             outlier=False)
        if seas_adj == "trend":
            output = trend
            seas_text = "trend"
        elif seas_adj == "seas":
            output = seasadj
            seas_text = "sa"

    output = transform.base_index(output, start_date="2008-07-31")

    name = f"{name}_{seas_text}"

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output

Esempio n. 10

Mostra file

File: income.py Progetto: rxavier/econuy

def consumer_confidence(
        update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get monthly consumer confidence data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly consumer confidence data : pd.DataFrame

    """
    name = "consumer_confidence"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    raw = pd.read_excel(urls[name]["dl"]["main"], skiprows=3,
                        usecols="B:F", index_col=0)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = ["Subíndice: Situación Económica Personal",
                      "Subíndice: Situación Económica del País",
                      "Subíndice: Predisposición a la Compra de Durables",
                      "Índice de Confianza del Consumidor"]
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update", data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output, area="Actividad económica", currency="-",
                  inf_adj="No", unit="50 = neutralidad", seas_adj="NSA",
                  ts_type="-", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output

Esempio n. 11

Mostra file

def long_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               revise_rows: Union[str, int] = "nodup",
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = False) -> pd.DataFrame:
    """Get 10-year government bonds interest rates.

    Countries/aggregates selected are US, Germany, France, Italy, Spain
    United Kingdom, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily 10-year government bonds interest rates : pd.DataFrame

    """
    name = "global_long_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    bonds = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    r = requests.get(f"{urls[name]['dl']['fred']}DGS10&api_key="
                     f"{fred_api_key}&file_type=json")
    us = pd.DataFrame.from_records(r.json()["observations"])
    us = us[["date", "value"]].set_index("date")
    us.index = pd.to_datetime(us.index)
    us.columns = ["United States"]
    bonds.append(us.apply(pd.to_numeric, errors="coerce").dropna())

    for country, sid in zip([
            "Germany", "France", "Italy", "Spain", "United Kingdom", "Japan",
            "China"
    ], ["23693", "23778", "23738", "23806", "23673", "23901", "29227"]):
        end_date_dt = dt.datetime(2000, 1, 1)
        start_date_dt = dt.datetime(2000, 1, 1)
        aux = []
        while end_date_dt < dt.datetime.now():
            end_date_dt = start_date_dt + dt.timedelta(days=5000)
            params = {
                "curr_id": sid,
                "smlID": str(randint(1000000, 99999999)),
                "header": f"{country} 10-Year Bond Yield Historical Data",
                "st_date": start_date_dt.strftime("%m/%d/%Y"),
                "end_date": end_date_dt.strftime("%m/%d/%Y"),
                "interval_sec": "Daily",
                "sort_col": "date",
                "sort_ord": "DESC",
                "action": "historical_data"
            }
            r = requests.post(urls["global_long_rates"]["dl"]["main"],
                              headers=investing_headers,
                              data=params)
            aux.append(
                pd.read_html(r.content,
                             match="Price",
                             index_col=0,
                             parse_dates=True)[0])
            start_date_dt = end_date_dt + dt.timedelta(days=1)
        aux = pd.concat(aux, axis=0)[["Price"]].sort_index()
        aux.columns = [country]
        bonds.append(aux)

    output = bonds[0].join(bonds[1:], how="left")
    output = output.interpolate(method="linear", limit_area="inside")
    output.columns = [
        "Estados Unidos", "Alemania", "Francia", "Italia", "España",
        "Reino Unido", "Japón", "China"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3],
        new_arrays=[["USD", "EUR", "EUR", "EUR", "EUR", "GBP", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 12

Mostra file

def policy_rates(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """Get central bank policy interest rates data.

    Countries/aggregates selected are US, Euro Area, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "global_policy_rates"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    r = requests.get(urls[name]["dl"]["main"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(r.content), "r") as f:
        f.extractall(path=temp_dir.name)
        path_temp = path.join(temp_dir.name,
                              "WEBSTATS_CBPOL_D_DATAFLOW_csv_row.csv")
        raw = pd.read_csv(path_temp,
                          usecols=[0, 7, 19, 36, 37],
                          index_col=0,
                          header=2,
                          parse_dates=True).dropna(how="all")
    output = (raw.apply(pd.to_numeric,
                        errors="coerce").interpolate(method="linear",
                                                     limit_area="inside"))
    output.columns = ["China", "Japón", "Estados Unidos", "Eurozona"]
    output = output[["Estados Unidos", "Eurozona", "Japón", "China"]]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  unit="Tasa",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 13

Mostra file

File: national_accounts.py Progetto: vierja/econuy

def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        name: str = "naccounts",
        index_label: str = "index",
        only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Get national accounts data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'naccounts'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly national accounts : Dict[str, pd.DataFrame]
        Each dataframe corresponds to a national accounts table.

    """
    if only_get is True and update_loc is not None:
        output = {}
        for filename, meta in na_metadata.items():
            data = ops._io(operation="update",
                           data_loc=update_loc,
                           name=f"{name}_{filename}",
                           index_label=index_label)
            output.update({filename: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    parsed_excels = {}
    for filename, meta in na_metadata.items():
        raw = pd.read_excel(meta["url"], skiprows=9, nrows=meta["Rows"])
        proc = (raw.drop(columns=["Unnamed: 0"]).dropna(
            axis=0, how="all").dropna(axis=1, how="all"))
        proc = proc.transpose()
        proc.columns = meta["Colnames"]
        proc.drop(["Unnamed: 1"], inplace=True)
        _fix_dates(proc)
        if meta["Unit"] == "Miles":
            proc = proc.divide(1000)
            unit_ = "Millones"
        else:
            unit_ = meta["Unit"]

        if update_loc is not None:
            previous_data = ops._io(operation="update",
                                    data_loc=update_loc,
                                    name=f"{name}_{filename}",
                                    index_label=index_label)
            proc = ops._revise(new_data=proc,
                               prev_data=previous_data,
                               revise_rows=revise_rows)
        proc = proc.apply(pd.to_numeric, errors="coerce")

        metadata._set(proc,
                      area="Actividad económica",
                      currency="UYU",
                      inf_adj=meta["Inf. Adj."],
                      unit=unit_,
                      seas_adj=meta["Seas"],
                      ts_type="Flujo",
                      cumperiods=1)

        if save_loc is not None:
            ops._io(operation="save",
                    data_loc=save_loc,
                    data=proc,
                    name=f"{name}_{filename}",
                    index_label=index_label)

        parsed_excels.update({filename: proc})

    return parsed_excels

Esempio n. 14

Mostra file

def nxr_daily(update_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
              save_loc: Union[str, PathLike, Engine, Connection, None] = None,
              only_get: bool = False) -> pd.DataFrame:
    """Get daily nominal exchange rate data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly nominal exchange rates : pd.DataFrame
        Sell rate, monthly average and end of period.

    """
    name = "nxr_daily"

    if only_get is True and update_loc is not None:
        return ops._io(operation="update", data_loc=update_loc, name=name)

    start_date = dt.datetime(1999, 12, 31)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        metadata._set(previous_data)
        try:
            start_date = previous_data.index[len(previous_data) - 1]
        except IndexError:
            pass

    today = dt.datetime.now() - dt.timedelta(days=1)
    runs = (today - start_date).days // 30
    data = []
    base_url = urls[name]['dl']['main']
    if runs > 0:
        for i in range(1, runs + 1):
            from_ = (start_date + dt.timedelta(days=1)).strftime('%d/%m/%Y')
            to_ = (start_date + dt.timedelta(days=30)).strftime('%d/%m/%Y')
            dates = f"%22FechaDesde%22:%22{from_}%22,%22FechaHasta%22:%22{to_}"
            url = f"{base_url}{dates}%22,%22Grupo%22:%222%22}}" + "}"
            try:
                data.append(pd.read_excel(url))
                start_date = dt.datetime.strptime(to_, '%d/%m/%Y')
            except (TypeError, BadZipFile):
                pass
    from_ = (start_date + dt.timedelta(days=1)).strftime('%d/%m/%Y')
    to_ = (dt.datetime.now() - dt.timedelta(days=1)).strftime('%d/%m/%Y')
    dates = f"%22FechaDesde%22:%22{from_}%22,%22FechaHasta%22:%22{to_}"
    url = f"{base_url}{dates}%22,%22Grupo%22:%222%22}}" + "}"
    try:
        data.append(pd.read_excel(url))
    except (TypeError, BadZipFile):
        pass
    try:
        output = pd.concat(data, axis=0)
        output = output.pivot(index="Fecha", columns="Moneda",
                              values="Venta").rename_axis(None)
        output.index = pd.to_datetime(output.index,
                                      format="%d/%m/%Y",
                                      errors="coerce")
        output.sort_index(inplace=True)
        output.replace(",", ".", regex=True, inplace=True)
        output.columns = ["Tipo de cambio US$, Cable"]
        output = output.apply(pd.to_numeric, errors="coerce")

        metadata._set(output,
                      area="Precios",
                      currency="UYU/USD",
                      inf_adj="No",
                      unit="-",
                      seas_adj="NSA",
                      ts_type="-",
                      cumperiods=1)
        output.columns = output.columns.set_levels(["-"], level=2)

        if update_loc is not None:
            output = pd.concat([previous_data, output])

        if save_loc is not None:
            ops._io(operation="save",
                    data_loc=save_loc,
                    data=output,
                    name=name)

    except ValueError as e:
        if str(e) == "No objects to concatenate":
            return previous_data

    return output

Esempio n. 15

Mostra file

def nxr_monthly(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly nominal exchange rate data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly nominal exchange rates : pd.DataFrame
        Sell rate, monthly average and end of period.

    """
    name = "nxr_monthly"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        nxr_raw = pd.read_excel(urls[name]["dl"]["main"],
                                skiprows=4,
                                index_col=0,
                                usecols="A,C,F")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            nxr_raw = pd.read_excel(BytesIO(r.content),
                                    skiprows=4,
                                    index_col=0,
                                    usecols="A,C,F")
        else:
            raise err
    nxr = nxr_raw.dropna(how="any", axis=0)
    nxr.columns = [
        "Tipo de cambio venta, fin de período",
        "Tipo de cambio venta, promedio"
    ]
    nxr.index = nxr.index + MonthEnd(1)
    nxr = nxr.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        nxr = ops._revise(new_data=nxr,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    metadata._set(nxr,
                  area="Precios",
                  currency="UYU/USD",
                  inf_adj="No",
                  unit="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=nxr, name=name)

    return nxr

Esempio n. 16

Mostra file

File: frequent.py Progetto: vierja/econuy

def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 name: str = "tfm_prices",
                 index_label: str = "index",
                 only_get: bool = False) -> pd.DataFrame:
    """Get core CPI, Winsorized CPI, tradabe CPI and non-tradable CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_prices'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    xls = pd.ExcelFile(urls["tfm_prices"]["dl"]["main"])
    weights = pd.read_excel(xls,
                            sheet_name=xls.sheet_names[0],
                            usecols="A:C",
                            skiprows=14,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]
    sheets = []
    for sheet in xls.sheet_names:
        raw = pd.read_excel(xls, sheet_name=sheet, usecols="D:IN",
                            skiprows=9).dropna(how="all")
        proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna(
            how="all")
        sheets.append(proc.T)
    output = pd.concat(sheets)
    output = output.iloc[:, 1:]
    output.columns = [weights["Item"], weights.index]
    output.index = pd.date_range(start="2010-12-31",
                                 periods=len(output),
                                 freq="M")
    diff_8 = output.loc[:,
                        output.columns.get_level_values(
                            level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    prod_97 = (pd.read_excel(
        urls["tfm_prices"]["dl"]["historical"], skiprows=5).dropna(
            how="any").set_index("Rubros, Agrupaciones y Subrubros").T)
    prod_97 = prod_97.loc[:, prod_details[1]].pct_change()
    output_8 = output.loc[:, prod_details[0]].pct_change()
    output_8 = output_8.loc[:, ~output_8.columns.get_level_values(
        level=0).duplicated()]
    output_8.columns = output_8.columns.get_level_values(level=0)
    prod_97.columns = output_8.columns.get_level_values(level=0)
    complete = pd.concat([prod_97, output_8.iloc[1:]])
    complete.index = pd.date_range(start="1997-03-31",
                                   freq="M",
                                   periods=len(complete))
    weights_complete = weights.loc[weights["Item"].isin(complete.columns)]
    weights_complete = weights_complete.loc[~weights_complete["Item"].
                                            duplicated()].set_index("Item")
    tradable = complete.loc[:, [bool(x) for x in prod_details[2]]]
    tradable_weights = weights_complete.loc[
        weights_complete.index.isin(tradable.columns), "Weight"].T
    tradable_weights = tradable_weights.div(tradable_weights.sum())
    tradable = (tradable.mul(tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    non_tradable = complete.loc[:, [not bool(x) for x in prod_details[2]]]
    non_tradable_weights = weights_complete.loc[
        weights_complete.index.isin(non_tradable.columns), "Weight"].T
    non_tradable_weights = non_tradable_weights.div(non_tradable_weights.sum())
    non_tradable = (non_tradable.mul(non_tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    core = complete.loc[:, [bool(x) for x in prod_details[3]]]
    core_weights = weights_complete.loc[
        weights_complete.index.isin(core.columns), "Weight"].T
    core_weights = core_weights.div(core_weights.sum())
    core = (core.mul(core_weights).sum(axis=1).add(1).cumprod().mul(100))

    cpi_re = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re, tradable, non_tradable, core, cpi_win], axis=1)
    output = transform.base_index(output,
                                  start_date="2010-12-01",
                                  end_date="2010-12-31")
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: Winsorized 0.05"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output

Esempio n. 17

Mostra file

def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        name: str = "commodity_index",
        index_label: str = "index",
        only_get: bool = False,
        only_get_prices: bool = False,
        only_get_weights: bool = True) -> pd.DataFrame:
    """Get export-weighted commodity price index for Uruguay.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'commodity_weights'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.
    only_get_prices : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc`` for commodity prices.
    only_get_weights : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for commodity weights.

    Returns
    -------
    Monthly export-weighted commodity index : pd.DataFrame
        Export-weighted average of commodity prices relevant to Uruguay.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    prices = _prices(update_loc=update_loc,
                     revise_rows="nodup",
                     save_loc=save_loc,
                     only_get=only_get_prices)
    prices = prices.interpolate(method="linear", limit=1).dropna(how="any")
    prices = prices.pct_change(periods=1)
    weights = _weights(update_loc=update_loc,
                       revise_rows="nodup",
                       save_loc=save_loc,
                       only_get=only_get_weights)
    weights = weights[prices.columns]
    weights = weights.reindex(prices.index, method="ffill")

    product = pd.DataFrame(prices.values * weights.values,
                           columns=prices.columns,
                           index=prices.index)
    product = product.sum(axis=1).add(1).to_frame().cumprod().multiply(100)
    product.columns = ["Índice de precios de productos primarios"]

    metadata._set(product,
                  area="Sector externo",
                  currency="USD",
                  inf_adj="No",
                  unit="2002-01=100",
                  seas_adj="NSA",
                  ts_type="Flujo",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=product,
                name=name,
                index_label=index_label)

    return product

Esempio n. 18

Mostra file

File: frequent.py Progetto: vierja/econuy

def fiscal(aggregation: str = "gps",
           fss: bool = True,
           unit: Optional[str] = None,
           start_date: Union[str, date, None] = None,
           end_date: Union[str, date, None] = None,
           update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = True,
           name: str = "tfm_fiscal",
           index_label: str = "index") -> pd.DataFrame:
    """
    Get fiscal accounts data.

    Allow choosing government aggregation, whether to exclude the FSS
    (Fideicomiso  de la Seguridad Social, Social Security Trust Fund), the unit
    (UYU, real UYU, USD, real USD or percent of GDP), periods to accumuldate
    for rolling sums and seasonal adjustment.

    Parameters
    ----------
    aggregation : {'gps', 'nfps', 'gc'}
        Government aggregation. Can be ``gps`` (consolidated public sector),
        ``nfps`` (non-financial public sector) or ``gc`` (central government).
    fss : bool, default True
        If ``True``, exclude the `FSS's <https://www.impo.com.uy/bases/decretos
        /71-2018/25>`_ income from gov't revenues and the FSS's
        interest revenues from gov't interest payments.
    unit : {None, 'gdp', 'usd', 'real', 'real_usd'}
        Unit in which data should be expressed. Possible values are ``real``,
        ``usd``, ``real_usd`` and ``gdp``. If ``None`` or another string is
        set, no unit calculations will be performed, rendering the data as is
        (current UYU).
    start_date : str, datetime.date or None, default None
        If ``unit`` is set to ``real`` or ``real_usd``, this parameter and
        ``end_date`` control how deflation is calculated.
    end_date :
        If ``unit`` is set to ``real`` or ``real_usd``, this parameter and
        ``start_date`` control how deflation is calculated.
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_fiscal'
        Either CSV filename for updating and/or saving, or table name if
        using SQL. Options will be appended to the base name.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Fiscal aggregation : pd.DataFrame

    Raises
    ------
    ValueError
        If ``seas_adj``, ``unit`` or ``aggregation`` are given an invalid
        keywords.

    """
    if unit not in ["gdp", "usd", "real", "real_usd", None]:
        raise ValueError("'unit' can be 'gdp', 'usd', 'real', 'real_usd' or"
                         " None.")
    if aggregation not in ["gps", "nfps", "gc"]:
        raise ValueError("'aggregation' can be 'gps', 'nfps' or 'gc'.")

    if unit is None:
        unit = "uyu"
    name = f"{name}_{aggregation}_{unit}"
    if fss:
        name = name + "_fssadj"

    data = fiscal_accounts.get(update_loc=update_loc,
                               save_loc=save_loc,
                               only_get=only_get)
    gps = data["gps"]
    nfps = data["nfps"]
    gc = data["gc-bps"]

    proc = pd.DataFrame(index=gps.index)
    proc["Ingresos: SPNF-SPC"] = nfps["Ingresos: SPNF"]
    proc["Ingresos: GC-BPS"] = gc["Ingresos: GC-BPS"]
    proc["Egresos: Primarios SPNF-SPC"] = nfps["Egresos: Primarios SPNF"]
    proc["Egresos: Totales GC-BPS"] = gc["Egresos: GC-BPS"]
    proc["Egresos: Inversiones SPNF-SPC"] = nfps["Egresos: Inversiones"]
    proc["Egresos: Inversiones GC-BPS"] = gc["Egresos: Inversión"]
    proc["Intereses: SPNF"] = nfps["Intereses: Totales"]
    proc["Intereses: BCU"] = gps["Intereses: BCU"]
    proc["Intereses: SPC"] = proc["Intereses: SPNF"] + proc["Intereses: BCU"]
    proc["Intereses: GC-BPS"] = gc["Intereses: Total"]
    proc["Egresos: Totales SPNF"] = (proc["Egresos: Primarios SPNF-SPC"] +
                                     proc["Intereses: SPNF"])
    proc["Egresos: Totales SPC"] = (proc["Egresos: Totales SPNF"] +
                                    proc["Intereses: BCU"])
    proc["Egresos: Primarios GC-BPS"] = (proc["Egresos: Totales GC-BPS"] -
                                         proc["Intereses: GC-BPS"])
    proc["Resultado: Primario intendencias"] = nfps[
        "Resultado: Primario intendencias"]
    proc["Resultado: Primario BSE"] = nfps["Resultado: Primario BSE"]
    proc["Resultado: Primario BCU"] = gps["Resultado: Primario BCU"]
    proc["Resultado: Primario SPNF"] = nfps["Resultado: Primario SPNF"]
    proc["Resultado: Global SPNF"] = nfps["Resultado: Global SPNF"]
    proc["Resultado: Primario SPC"] = gps["Resultado: Primario SPC"]
    proc["Resultado: Global SPC"] = gps["Resultado: Global SPC"]
    proc["Resultado: Primario GC-BPS"] = (proc["Ingresos: GC-BPS"] -
                                          proc["Egresos: Primarios GC-BPS"])
    proc["Resultado: Global GC-BPS"] = gc["Resultado: Global GC-BPS"]

    proc["Ingresos: FSS"] = gc["Ingresos: FSS"]
    proc["Intereses: FSS"] = gc["Intereses: BPS-FSS"]
    proc["Ingresos: SPNF-SPC aj. FSS"] = (proc["Ingresos: SPNF-SPC"] -
                                          proc["Ingresos: FSS"])
    proc["Ingresos: GC-BPS aj. FSS"] = (proc["Ingresos: GC-BPS"] -
                                        proc["Ingresos: FSS"])
    proc["Intereses: SPNF aj. FSS"] = (proc["Intereses: SPNF"] -
                                       proc["Intereses: FSS"])
    proc["Intereses: SPC aj. FSS"] = (proc["Intereses: SPC"] -
                                      proc["Intereses: FSS"])
    proc["Intereses: GC-BPS aj. FSS"] = (proc["Intereses: GC-BPS"] -
                                         proc["Intereses: FSS"])
    proc["Egresos: Totales SPNF aj. FSS"] = (proc["Egresos: Totales SPNF"] -
                                             proc["Intereses: FSS"])
    proc["Egresos: Totales SPC aj. FSS"] = (proc["Egresos: Totales SPC"] -
                                            proc["Intereses: FSS"])
    proc["Egresos: Totales GC-BPS aj. FSS"] = (
        proc["Egresos: Totales GC-BPS"] - proc["Intereses: FSS"])
    proc["Resultado: Primario SPNF aj. FSS"] = (
        proc["Resultado: Primario SPNF"] - proc["Ingresos: FSS"])
    proc["Resultado: Global SPNF aj. FSS"] = (proc["Resultado: Global SPNF"] -
                                              proc["Ingresos: FSS"] +
                                              proc["Intereses: FSS"])
    proc["Resultado: Primario SPC aj. FSS"] = (
        proc["Resultado: Primario SPC"] - proc["Ingresos: FSS"])
    proc["Resultado: Global SPC aj. FSS"] = (proc["Resultado: Global SPC"] -
                                             proc["Ingresos: FSS"] +
                                             proc["Intereses: FSS"])
    proc["Resultado: Primario GC-BPS aj. FSS"] = (
        proc["Resultado: Primario GC-BPS"] - proc["Ingresos: FSS"])
    proc["Resultado: Global GC-BPS aj. FSS"] = (
        proc["Resultado: Global GC-BPS"] - proc["Ingresos: FSS"] +
        proc["Intereses: FSS"])

    output = proc.loc[:, fiscal_metadata[aggregation][fss]]
    metadata._set(output,
                  area="Cuentas fiscales y deuda",
                  currency="UYU",
                  inf_adj="No",
                  unit="Millones",
                  seas_adj="NSA",
                  ts_type="Flujo",
                  cumperiods=1)

    if unit == "gdp":
        output = transform.rolling(output, periods=12, operation="sum")
        output = transform.convert_gdp(output,
                                       update_loc=update_loc,
                                       save_loc=save_loc,
                                       only_get=only_get)
    elif unit == "usd":
        output = transform.convert_usd(output,
                                       update_loc=update_loc,
                                       save_loc=save_loc,
                                       only_get=only_get)
    elif unit == "real_usd":
        output = transform.convert_real(output,
                                        start_date=start_date,
                                        end_date=end_date,
                                        update_loc=update_loc,
                                        save_loc=save_loc,
                                        only_get=only_get)
        xr = nxr.get_monthly(update_loc=update_loc,
                             save_loc=save_loc,
                             only_get=only_get)
        output = output.divide(xr[start_date:end_date].mean()[1])
        metadata._set(output, currency="USD")
    elif unit == "real":
        output = transform.convert_real(output,
                                        start_date=start_date,
                                        end_date=end_date,
                                        update_loc=update_loc,
                                        save_loc=save_loc,
                                        only_get=only_get)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output

Esempio n. 19

Mostra file

def get(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        name: str = "cpi",
        index_label: str = "index",
        only_get: bool = False) -> pd.DataFrame:
    """Get CPI data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'cpi'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI index : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    cpi_raw = pd.read_excel(urls["cpi"]["dl"]["main"],
                            skiprows=7).dropna(axis=0, thresh=2)
    cpi = (cpi_raw.drop(
        ["Mensual", "Acum.año", "Acum.12 meses"],
        axis=1).dropna(axis=0,
                       how="all").set_index("Mes y año").rename_axis(None))
    cpi.columns = ["Índice de precios al consumo"]
    cpi.index = cpi.index + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        cpi = ops._revise(new_data=cpi,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    cpi = cpi.apply(pd.to_numeric, errors="coerce")
    metadata._set(cpi,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-10=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=cpi,
                name=name,
                index_label=index_label)

    return cpi

Esempio n. 20

Mostra file

def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get CPI data.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI index : pd.DataFrame

    """
    name = "cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    try:
        cpi = pd.read_excel(urls[name]["dl"]["main"],
                            skiprows=7,
                            usecols="A:B",
                            index_col=0).dropna()
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"], verify=certificate)
            cpi = pd.read_excel(BytesIO(r.content),
                                skiprows=7,
                                usecols="A:B",
                                index_col=0).dropna()
        else:
            raise err
    cpi.columns = ["Índice de precios al consumo"]
    cpi.rename_axis(None, inplace=True)
    cpi.index = cpi.index + MonthEnd(1)

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        cpi = ops._revise(new_data=cpi,
                          prev_data=previous_data,
                          revise_rows=revise_rows)

    cpi = cpi.apply(pd.to_numeric, errors="coerce")
    metadata._set(cpi,
                  area="Precios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-10=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=cpi, name=name)

    return cpi

Esempio n. 21

Mostra file

File: national_accounts.py Progetto: vierja/econuy

def _lin_gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
             save_loc: Union[str, PathLike, Engine, Connection, None] = None,
             name: str = "lin_gdp",
             index_label: str = "index",
             only_get: bool = True,
             only_get_na: bool = True):
    """Get nominal GDP data in UYU and USD with forecasts.

    Update nominal GDP data for use in the `convert.convert_gdp()` function.
    Get IMF forecasts for year of last available data point and the next
    year (for example, if the last period available at the BCU website is
    september 2019, fetch forecasts for 2019 and 2020).

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'lin_gdp'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.
    only_get_na : bool, default True
        If True, don't download national accounts data,
        retrieve what is available from ``update_loc``.

    Returns
    -------
    output : Pandas dataframe
        Quarterly GDP in UYU and USD with 1 year forecasts.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    data_uyu = get(update_loc=update_loc, only_get=only_get_na)["gdp_cur_nsa"]
    data_uyu = transform.rolling(data_uyu, periods=4, operation="sum")
    data_usd = transform.convert_usd(data_uyu,
                                     update_loc=update_loc,
                                     only_get=only_get)

    data = [data_uyu, data_usd]
    last_year = data_uyu.index.max().year
    if data_uyu.index.max().month == 12:
        last_year += 1

    results = []
    for table, gdp in zip(["NGDP", "NGDPD"], data):
        table_url = (f"https://www.imf.org/external/pubs/ft/weo/2019/02/weodat"
                     f"a/weorept.aspx?sy={last_year - 1}&ey={last_year + 1}"
                     f"&scsm=1&ssd=1&sort=country&ds=.&br=1&pr1.x=27&pr1.y=9&c"
                     f"=298&s={table}&grp=0&a=")
        imf_data = pd.to_numeric(pd.read_html(table_url)[4].iloc[2, [5, 6, 7]])
        imf_data = imf_data.reset_index(drop=True)
        fcast = (gdp.loc[[dt.datetime(last_year - 1, 12, 31)]].multiply(
            imf_data.iloc[1]).divide(imf_data.iloc[0]))
        fcast = fcast.rename(index={
            dt.datetime(last_year - 1, 12, 31):
            dt.datetime(last_year, 12, 31)
        })
        next_fcast = (gdp.loc[[dt.datetime(last_year - 1, 12, 31)]].multiply(
            imf_data.iloc[2]).divide(imf_data.iloc[0]))
        next_fcast = next_fcast.rename(index={
            dt.datetime(last_year - 1, 12, 31):
            dt.datetime(last_year + 1, 12, 31)
        })
        fcast = fcast.append(next_fcast)
        gdp = gdp.append(fcast)
        results.append(gdp)

    output = pd.concat(results, axis=1)
    output = output.resample("Q-DEC").interpolate("linear").dropna(how="all")
    arrays = []
    for level in range(0, 9):
        arrays.append(list(output.columns.get_level_values(level)))
    arrays[0] = ["PBI UYU", "PBI USD"]
    tuples = list(zip(*arrays))
    output.columns = pd.MultiIndex.from_tuples(tuples,
                                               names=[
                                                   "Indicador", "Área",
                                                   "Frecuencia", "Moneda",
                                                   "Inf. adj.", "Unidad",
                                                   "Seas. Adj.", "Tipo",
                                                   "Acum. períodos"
                                               ])

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output

Esempio n. 22

Mostra file

def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """
    Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual
    CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    name = "cpi_measures"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"])
        xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"])
        prod_97 = (pd.read_excel(
            urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index(
                "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate)
            xls_10_14 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["2015-"], verify=certificate)
            xls_15 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["1997"], verify=certificate)
            prod_97 = (pd.read_excel(BytesIO(
                r.content), skiprows=5).dropna(how="any").set_index(
                    "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
        else:
            raise err
    weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"],
                                index_col=0).drop_duplicates(
                                    subset="Descripción", keep="first"))
    weights = pd.read_excel(xls_10_14,
                            sheet_name=xls_10_14.sheet_names[0],
                            usecols="A:C",
                            skiprows=13,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]

    sheets = []
    for excel_file in [xls_10_14, xls_15]:
        for sheet in excel_file.sheet_names:
            raw = pd.read_excel(excel_file,
                                sheet_name=sheet,
                                usecols="D:IN",
                                skiprows=8).dropna(how="all")
            proc = raw.loc[:,
                           raw.columns.str.contains("Indice|Índice")].dropna(
                               how="all")
            sheets.append(proc.T)
    complete_10 = pd.concat(sheets)
    complete_10 = complete_10.iloc[:, 1:]
    complete_10.columns = [weights["Item"], weights.index]
    complete_10.index = pd.date_range(start="2010-12-31",
                                      periods=len(complete_10),
                                      freq="M")
    diff_8 = complete_10.loc[:,
                             complete_10.columns.get_level_values(
                                 level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    weights_97["Weight"] = (weights_97["Rubro"].fillna(
        weights_97["Agrupación, subrubro, familia"]).fillna(
            weights_97["Artículo"]).drop(
                columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"])
                            )
    prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())]
    prod_97.index = pd.date_range(start="1997-03-31",
                                  periods=len(prod_97),
                                  freq="M")
    weights_97 = (weights_97[weights_97["Descripción"].isin(
        cpi_details["1997_weights"])].set_index("Descripción").drop(
            columns=["Rubro", "Agrupación, subrubro, "
                     "familia", "Artículo"])).div(100)
    weights_97.index = prod_97.columns
    prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())]
    prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values(
        level=0).duplicated()]
    prod_10.columns = prod_10.columns.get_level_values(level=0)
    weights_10 = (weights.loc[weights["Item"].isin(
        list(cpi_details["2010_base"].keys()))].drop_duplicates(
            subset="Item", keep="first")).set_index("Item")
    items = []
    weights = []
    for item, weight, details in zip([prod_10, prod_97],
                                     [weights_10, weights_97],
                                     ["2010_base", "1997_base"]):
        for tradable in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]]
            weights.append(aux.div(aux.sum()))
        for core in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]]
            weights.append(aux.div(aux.sum()))

    intermediate = []
    for item, weight in zip(items, weights):
        intermediate.append(item.mul(weight.squeeze()).sum(1))

    output = []
    for x, y in zip(intermediate[:4], intermediate[4:]):
        aux = pd.concat([
            y.pct_change().loc[y.index < "2011-01-01"],
            x.pct_change().loc[x.index > "2011-01-01"]
        ])
        output.append(aux.fillna(0).add(1).cumprod().mul(100))

    cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re] + output + [cpi_win], axis=1)
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: residual",
        "Índice de precios al consumo: Winsorized 0.05"
    ]
    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.rebase(output,
                              start_date="2010-12-01",
                              end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 23

Mostra file

def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data.

    Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai
    Composite.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD : pd.DataFrame

    """
    name = "global_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    yahoo = []
    for series in ["spy", "n100", "nikkei", "sse"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        yahoo.append(aux)
    output = pd.concat(yahoo, axis=1).interpolate(method="linear",
                                                  limit_area="inside")
    output.columns = [
        "S&P 500", "Euronext 100", "Nikkei 225",
        "Shanghai Stock Exchange Composite"
    ]
    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])
    output = rebase(output, start_date="2019-01-02")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 24

Mostra file

def _prices(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
            revise_rows: Union[str, int] = "nodup",
            save_loc: Union[str, PathLike, Engine, Connection, None] = None,
            only_get: bool = True) -> pd.DataFrame:
    """Get commodity prices for Uruguay.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Commodity prices : pd.DataFrame
        Prices and price indexes of relevant commodities for Uruguay.

    """
    bushel_conv = 36.74 / 100
    name = "commodity_prices"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    url = urls["commodity_index"]["dl"]
    raw_beef = (pd.read_excel(url["beef"], header=4,
                              index_col=0).dropna(how="all"))
    raw_beef.columns = raw_beef.columns.str.strip()
    proc_beef = raw_beef["Ing. Prom./Ton."].to_frame()
    proc_beef.index = pd.date_range(start="2002-01-04",
                                    periods=len(proc_beef),
                                    freq="W-SAT")
    proc_beef["Ing. Prom./Ton."] = np.where(
        proc_beef > np.mean(proc_beef) + np.std(proc_beef) * 2,
        proc_beef / 1000,
        proc_beef,
    )
    beef = proc_beef.resample("M").mean()

    raw_pulp_r = requests.get(url["pulp"])
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(BytesIO(raw_pulp_r.content), "r") as f:
        f.extractall(path=temp_dir.name)
        path_temp = path.join(temp_dir.name, "monthly_values.csv")
        raw_pulp = pd.read_csv(path_temp, sep=";").dropna(how="any")
    proc_pulp = raw_pulp.copy().sort_index(ascending=False)
    proc_pulp.index = pd.date_range(start="1990-01-31",
                                    periods=len(proc_pulp),
                                    freq="M")
    proc_pulp.drop(["Label", "Codes"], axis=1, inplace=True)
    pulp = proc_pulp

    soy_wheat = []
    for link in [url["soybean"], url["wheat"]]:
        raw = pd.read_csv(link, index_col=0)
        proc = (raw["Settle"] * bushel_conv).to_frame()
        proc.index = pd.to_datetime(proc.index, format="%Y-%m-%d")
        proc.sort_index(inplace=True)
        soy_wheat.append(proc.resample("M").mean())
    soybean = soy_wheat[0]
    wheat = soy_wheat[1]

    milk_r = requests.get(url["milk1"])
    milk_soup = BeautifulSoup(milk_r.content, "html.parser")
    links = milk_soup.find_all(href=re.compile("Oceanía"))
    xls = links[0]["href"]
    raw_milk = pd.read_excel(requests.utils.quote(xls).replace("%3A", ":"),
                             skiprows=14,
                             nrows=dt.datetime.now().year - 2006)
    raw_milk.dropna(how="all", axis=1, inplace=True)
    raw_milk.drop(["Promedio ", "Variación"], axis=1, inplace=True)
    raw_milk.columns = ["Año/Mes"] + list(range(1, 13))
    proc_milk = pd.melt(raw_milk, id_vars=["Año/Mes"])
    proc_milk.sort_values(by=["Año/Mes", "variable"], inplace=True)
    proc_milk.index = pd.date_range(start="2007-01-31",
                                    periods=len(proc_milk),
                                    freq="M")
    proc_milk = proc_milk.iloc[:, 2].to_frame()

    prev_milk = pd.read_excel(url["milk2"],
                              sheet_name="Dairy Products Prices",
                              index_col=0,
                              usecols="A,D",
                              skiprows=5)
    prev_milk = prev_milk.resample("M").mean()
    eurusd_r = requests.get(
        "http://fx.sauder.ubc.ca/cgi/fxdata",
        params=f"b=USD&c=EUR&rd=&fd=1&fm=1&fy=2001&ld=31&lm=12&ly="
        f"{dt.datetime.now().year}&y=monthly&q=volume&f=html&o=&cu=on")
    eurusd = pd.read_html(eurusd_r.content)[0].drop("MMM YYYY", axis=1)
    eurusd.index = pd.date_range(start="2001-01-31",
                                 periods=len(eurusd),
                                 freq="M")
    eurusd = eurusd.reindex(prev_milk.index)
    prev_milk = prev_milk.divide(eurusd.values).multiply(10)
    prev_milk = prev_milk.loc[prev_milk.index < min(proc_milk.index)]
    prev_milk.columns, proc_milk.columns = ["Price"], ["Price"]
    milk = prev_milk.append(proc_milk)

    raw_imf = pd.read_excel(url["imf"])
    raw_imf.columns = raw_imf.iloc[0, :]
    proc_imf = raw_imf.iloc[3:, 1:]
    proc_imf.index = pd.date_range(start="1980-01-31",
                                   periods=len(proc_imf),
                                   freq="M")
    rice = proc_imf[proc_imf.columns[proc_imf.columns.str.contains("Rice")]]
    wood = proc_imf[proc_imf.columns[proc_imf.columns.str.contains(
        "Sawnwood")]]
    wood = wood.mean(axis=1).to_frame()
    wool = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Wool")]]
    wool = wool.mean(axis=1).to_frame()
    barley = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith(
        "Barley")]]
    gold = proc_imf[proc_imf.columns[proc_imf.columns.str.startswith("Gold")]]

    complete = pd.concat(
        [beef, pulp, soybean, milk, rice, wood, wool, barley, gold, wheat],
        axis=1)
    complete = complete.reindex(beef.index).dropna(thresh=8)
    complete.columns = [
        "Beef", "Pulp", "Soybeans", "Milk", "Rice", "Wood", "Wool", "Barley",
        "Gold", "Wheat"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        complete = ops._revise(new_data=complete,
                               prev_data=previous_data,
                               revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=complete, name=name)

    return complete

Esempio n. 25

Mostra file

def gdp(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get seasonally adjusted real quarterly GDP for select countries.

    Countries/aggregates are US, EU-27, Japan and China.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Quarterly real GDP in seasonally adjusted terms : pd.DataFrame

    """
    name = "global_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    chn_y = dt.datetime.now().year + 1
    chn_r = requests.get(f"{urls[name]['dl']['chn_oecd']}{chn_y}-Q4")
    chn_json = chn_r.json()
    chn_datasets = []
    for dataset, start in zip(["0", "1"], ["2011-03-31", "1993-03-31"]):
        raw = chn_json["dataSets"][0]["series"][f"0:0:{dataset}:0"][
            "observations"]
        values = [x[0] for x in raw.values()]
        df = pd.DataFrame(data=values,
                          index=pd.date_range(start=start,
                                              freq="Q-DEC",
                                              periods=len(values)),
                          columns=["China"])
        chn_datasets.append(df)
    chn_qoq = chn_datasets[0]
    chn_yoy = chn_datasets[1]
    chn_obs = pd.read_excel(urls["global_gdp"]["dl"]["chn_obs"],
                            index_col=0).dropna(how="all",
                                                axis=1).dropna(how="all",
                                                               axis=0)
    chn_obs = chn_obs.loc[(chn_obs.index > "2011-01-01")
                          & (chn_obs.index < "2016-01-01")]
    chn_yoy["volume"] = chn_obs
    for row in reversed(range(len(chn_yoy.loc[chn_yoy.index < "2011-01-01"]))):
        if pd.isna(chn_yoy.iloc[row, 1]):
            chn_yoy.iloc[row, 1] = (chn_yoy.iloc[row + 4, 1] /
                                    (1 + chn_yoy.iloc[row + 4, 0] / 100))
    chn_yoy = chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"]
    metadata._set(chn_yoy)
    chn_sa = decompose(chn_yoy[["volume"]].loc[chn_yoy.index < "2016-01-01"],
                       component="seas",
                       method="x13")
    chn_sa = pd.concat([chn_sa, chn_qoq], axis=1)
    for row in range(len(chn_sa)):
        if not pd.isna(chn_sa.iloc[row, 1]):
            chn_sa.iloc[row, 0] = (chn_sa.iloc[row - 1, 0] *
                                   (1 + chn_sa.iloc[row, 1] / 100))
    chn = chn_sa.iloc[:, [0]].div(10)

    gdps = []
    load_dotenv(Path(get_project_root(), ".env"))
    fred_api_key = os.environ.get("FRED_API_KEY")
    for series in ["GDPC1", "CLVMNACSCAB1GQEU272020", "JPNRGDPEXP"]:
        r = requests.get(f"{urls[name]['dl']['fred']}{series}&api_key="
                         f"{fred_api_key}&file_type=json")
        aux = pd.DataFrame.from_records(r.json()["observations"])
        aux = aux[["date", "value"]].set_index("date")
        aux.index = pd.to_datetime(aux.index)
        aux.index = aux.index.shift(3, freq="M") + MonthEnd(0)
        aux.columns = [series]
        aux = aux.apply(pd.to_numeric, errors="coerce")
        if series == "GDPC1":
            aux = aux.div(4)
        elif series == "CLVMNACSCAB1GQEU272020":
            aux = aux.div(1000)
        gdps.append(aux)
    gdps = pd.concat(gdps, axis=1)

    output = pd.concat([gdps, chn], axis=1)
    output.columns = ["Estados Unidos", "Unión Europea", "Japón", "China"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="Const.",
                  unit="Miles de millones",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 26

Mostra file

def tax_revenue(
        update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """
    Get tax revenues data.

    This retrieval function requires that Ghostscript and Tkinter be found in
    your system.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly tax revenues : pd.DataFrame

    """
    name = "taxes"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output

    raw = pd.read_excel(urls[name]["dl"]["main"],
                        usecols="C:AO", index_col=0)
    raw.index = pd.to_datetime(raw.index, errors="coerce")
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = taxes_columns
    output = output.div(1000000)
    latest = _get_taxes_from_pdf(output)
    output = pd.concat([output, latest], sort=False)
    output = output.loc[~output.index.duplicated(keep="first")]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output, area="Sector público", currency="UYU",
                  inf_adj="No", unit="Millones", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output

Esempio n. 27

Mostra file

def nxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get currencies data.

    Selected currencies are the US dollar index, USDEUR, USDJPY and USDCNY.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily currencies : pd.DataFrame

    """
    name = "global_nxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    output = []
    for series in ["dollar", "eur", "jpy", "cny"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        if series == "dollar":
            aux.dropna(inplace=True)
        output.append(aux)
    output = output[0].join(output[1:]).interpolate(method="linear",
                                                    limit_area="inside")
    output.columns = ["Índice Dólar", "Euro", "Yen", "Renminbi"]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(
        output,
        levels=[3, 5],
        new_arrays=[["USD", "EUR", "JPY", "CNY"],
                    ["Canasta/USD", "EUR/USD", "JPY/USD", "CNY/USD"]])

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Esempio n. 28

Mostra file

def _public_debt_retriever(update_loc: Union[str, PathLike,
                                             Engine, Connection, None] = None,
                           revise_rows: Union[str, int] = "nodup",
                           save_loc: Union[str, PathLike,
                                           Engine, Connection, None] = None,
                           only_get: bool = False) -> Dict[str, pd.DataFrame]:
    """Helper function. See any of the `public_debt_...()` functions."""
    if only_get is True and update_loc is not None:
        output = {}
        for meta in ["gps", "nfps", "cb", "assets"]:
            data = ops._io(operation="update", data_loc=update_loc,
                           name=f"public_debt_{meta}")
            output.update({meta: data})
        if all(not value.equals(pd.DataFrame()) for value in output.values()):
            return output

    colnames = ["Total deuda", "Plazo contractual: hasta 1 año",
                "Plazo contractual: entre 1 y 5 años",
                "Plazo contractual: más de 5 años",
                "Plazo residual: hasta 1 año",
                "Plazo residual: entre 1 y 5 años",
                "Plazo residual: más de 5 años",
                "Moneda: pesos", "Moneda: dólares", "Moneda: euros",
                "Moneda: yenes", "Moneda: DEG", "Moneda: otras",
                "Residencia: no residentes", "Residencia: residentes"]

    xls = pd.ExcelFile(urls["public_debt_gps"]["dl"]["main"])
    gps_raw = pd.read_excel(xls, sheet_name="SPG2",
                            usecols="B:Q", index_col=0, skiprows=10,
                            nrows=(dt.datetime.now().year - 1999) * 4)
    gps = gps_raw.dropna(how="any", thresh=2)
    gps.index = pd.date_range(start="1999-12-31", periods=len(gps),
                              freq="Q-DEC")
    gps.columns = colnames

    nfps_raw = pd.read_excel(xls, sheet_name="SPNM bruta",
                             usecols="B:O", index_col=0)
    loc = nfps_raw.index.get_loc("9. Deuda Bruta del Sector Público no "
                                 "monetario por plazo y  moneda.")
    nfps = nfps_raw.iloc[loc + 5:, :].dropna(how="any")
    nfps.index = pd.date_range(start="1999-12-31", periods=len(nfps),
                               freq="Q-DEC")
    nfps_extra_raw = pd.read_excel(xls, sheet_name="SPNM bruta",
                                   usecols="O:P", skiprows=11,
                                   nrows=(dt.datetime.now().year - 1999) * 4)
    nfps_extra = nfps_extra_raw.dropna(how="all")
    nfps_extra.index = nfps.index
    nfps = pd.concat([nfps, nfps_extra], axis=1)
    nfps.columns = colnames

    cb_raw = pd.read_excel(xls, sheet_name="BCU bruta",
                           usecols="B:O", index_col=0,
                           skiprows=(dt.datetime.now().year - 1999) * 8 + 20)
    cb = cb_raw.dropna(how="any")
    cb.index = pd.date_range(start="1999-12-31", periods=len(cb),
                             freq="Q-DEC")
    cb_extra_raw = pd.read_excel(xls, sheet_name="BCU bruta",
                                 usecols="O:P", skiprows=11,
                                 nrows=(dt.datetime.now().year - 1999) * 4)
    bcu_extra = cb_extra_raw.dropna(how="all")
    bcu_extra.index = cb.index
    cb = pd.concat([cb, bcu_extra], axis=1)
    cb.columns = colnames

    assets_raw = pd.read_excel(xls, sheet_name="Activos Neta",
                               usecols="B,C,D,K", index_col=0, skiprows=13,
                               nrows=(dt.datetime.now().year - 1999) * 4)
    assets = assets_raw.dropna(how="any")
    assets.index = pd.date_range(start="1999-12-31", periods=len(assets),
                                 freq="Q-DEC")
    assets.columns = ["Total activos", "Sector público no monetario",
                      "BCU"]

    output = {"gps": gps, "nfps": nfps, "cb": cb, "assets": assets}

    for meta, data in output.items():
        if update_loc is not None:
            previous_data = ops._io(operation="update", data_loc=update_loc,
                                    name=f"public_debt_{meta}")
            data = ops._revise(new_data=data,
                               prev_data=previous_data,
                               revise_rows=revise_rows)
        metadata._set(data, area="Sector público", currency="USD",
                      inf_adj="No", unit="Millones", seas_adj="NSA",
                      ts_type="Stock", cumperiods=1)

        if save_loc is not None:
            ops._io(operation="save", data_loc=save_loc,
                    data=data, name=f"public_debt_{meta}")

        output.update({meta: data})

    return output

Esempio n. 29

Mostra file

File: income.py Progetto: rxavier/econuy

def income_household(update_loc: Union[str, PathLike,
                                       Engine, Connection, None] = None,
                     revise_rows: Union[str, int] = "nodup",
                     save_loc: Union[str, PathLike,
                                     Engine, Connection, None] = None,
                     only_get: bool = False) -> pd.DataFrame:
    """Get average household income.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly average household income : pd.DataFrame

    """
    name = "income_household"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc,
                         name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        raw = pd.read_excel(urls[name]["dl"]["main"], sheet_name="Mensual",
                            skiprows=5, index_col=0).dropna(how="all")
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["main"],
                             verify=certificate)
            raw = pd.read_excel(BytesIO(r.content),
                                sheet_name="Mensual",
                                skiprows=5, index_col=0).dropna(how="all")
        else:
            raise err
    raw.index = pd.to_datetime(raw.index)
    output = raw.loc[~pd.isna(raw.index)]
    output.index = output.index + MonthEnd(0)
    output.columns = ["Total país", "Montevideo", "Interior: total",
                      "Interior: localidades de más de 5 mil hab.",
                      "Interior: localidades pequeñas y rural"]

    missing = pd.read_excel(urls[name]["dl"]["missing"],
                            index_col=0, header=0).iloc[:, 10:13]
    missing.columns = output.columns[:3]
    output = output.append(missing, sort=False)
    output = output.apply(pd.to_numeric, errors="coerce")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output, prev_data=previous_data,
                             revise_rows=revise_rows)

    metadata._set(output, area="Ingresos", currency="UYU",
                  inf_adj="No", unit="Pesos", seas_adj="NSA",
                  ts_type="Flujo", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output

Esempio n. 30

Mostra file

File: frequent.py Progetto: vierja/econuy

def labor_rate_people(seas_adj: Union[str, None] = None,
                      update_loc: Union[str, PathLike, Engine, Connection,
                                        None] = None,
                      save_loc: Union[str, PathLike, Engine, Connection,
                                      None] = None,
                      name: str = "tfm_labor",
                      index_label: str = "index",
                      only_get: bool = True) -> pd.DataFrame:
    """
    Get labor data, both rates and persons. Allow choosing seasonal adjustment.

    Parameters
    ----------
    seas_adj : {None, 'trend', 'seas'}
        Whether to seasonally adjust.
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_labor'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Labor market data : pd.DataFrame

    Raises
    ------
    ValueError
        If ``seas_adj`` is given an invalid keyword.

    """
    if seas_adj not in ["trend", "seas", None]:
        raise ValueError("'seas_adj' can be 'trend', 'seas' or None.")

    rates = labor.get_rates(update_loc=update_loc, only_get=only_get)
    rates = rates.loc[:, [
        "Tasa de actividad: total", "Tasa de empleo: total",
        "Tasa de desempleo: total"
    ]]
    rates.columns.set_levels(rates.columns.levels[0].str.replace(
        ": total", ""),
                             level=0,
                             inplace=True)

    if seas_adj in ["trend", "seas"]:
        trend, seasadj = transform.decompose(rates, trading=True, outlier=True)
        if seas_adj == "trend":
            rates = trend
        elif seas_adj == "seas":
            rates = seasadj

    working_age = pd.read_excel(urls["tfm_labor"]["dl"]["population"],
                                skiprows=7,
                                index_col=0,
                                nrows=92).dropna(how="all")
    ages = list(range(14, 90)) + ["90 y más"]
    working_age = working_age.loc[ages].sum()
    working_age.index = pd.date_range(start="1996-06-30",
                                      end="2050-06-30",
                                      freq="A-JUN")
    monthly_working_age = working_age.resample("M").interpolate("linear")
    monthly_working_age = monthly_working_age.loc[rates.index]
    persons = rates.iloc[:, [0, 1]].div(100).mul(monthly_working_age, axis=0)
    persons["Desempleados"] = rates.iloc[:, 2].div(100).mul(persons.iloc[:, 0])
    persons.columns = ["Activos", "Empleados", "Desempleados"]
    seas_text = "NSA"
    if seas_adj == "trend":
        seas_text = "Trend"
    elif seas_adj == "seas":
        seas_text = "SA"
    metadata._set(persons,
                  area="Mercado laboral",
                  currency="-",
                  inf_adj="No",
                  unit="Personas",
                  seas_adj=seas_text,
                  ts_type="-",
                  cumperiods=1)

    output = pd.concat([rates, persons], axis=1)

    name = f"{name}_{seas_text.lower()}"

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output