Ejemplo n.º 1
0
    def base_index(self, start_date: Union[str, date],
                   end_date: Union[str, date, None] = None, base: float = 100):
        """
        Scale to a period or range of periods.

        See Also
        --------
        :func:`~econuy.transform.base_index`

        """
        if isinstance(self.dataset, dict):
            output = {}
            for key, value in self.dataset.items():
                table = transform.base_index(value, start_date=start_date,
                                             end_date=end_date, base=base)
                output.update({key: table})
        else:
            output = transform.base_index(self.dataset, start_date=start_date,
                                          end_date=end_date, base=base)
        self.logger.info("Applied 'base_index' transformation.")
        if self.inplace is True:
            self.dataset = output
            return self
        else:
            return Session(location=self.location,
                           revise_rows=self.revise_rows,
                           only_get=self.only_get,
                           dataset=output,
                           logger=self.logger,
                           inplace=self.inplace)
Ejemplo n.º 2
0
def terms_of_trade(update_loc: Union[str, PathLike, Engine, Connection,
                                     None] = None,
                   save_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                   name: str = "tfm_tot",
                   index_label: str = "index",
                   only_get: bool = True) -> pd.DataFrame:
    """
    Get terms of trade.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_tot'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Terms of trade (exports/imports) : pd.DataFrame

    """
    data = trade.get(update_loc=update_loc,
                     save_loc=save_loc,
                     only_get=only_get)
    exports = data["tb_x_dest_pri"].rename(
        columns={"Total exportaciones": "Total"})
    imports = data["tb_m_orig_pri"].rename(
        columns={"Total importaciones": "Total"})
    tot = exports / imports
    tot = tot.loc[:, ["Total"]]
    tot.rename(columns={"Total": "Términos de intercambio"}, inplace=True)
    tot = transform.base_index(tot,
                               start_date="2005-01-01",
                               end_date="2005-12-31")
    metadata._set(tot, ts_type="-")

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=tot,
                name=name,
                index_label=index_label)

    return tot
Ejemplo n.º 3
0
def test_wages():
    remove_clutter()
    session = Session(location=TEST_CON)
    assert isinstance(session, Session)
    assert isinstance(session.dataset, pd.DataFrame)
    full_wages = session.get_frequent(dataset="real_wages",
                                      seas_adj="trend").dataset
    wages_tfm = full_wages.iloc[:, [0, 1, 2]]
    remove_clutter()
    wages_ = session.get(dataset="wages").dataset
    session.only_get = True
    compare = session.get(dataset="wages").dataset
    assert wages_.round(4).equals(compare.round(4))
    session.only_get = False
    remove_clutter()
    wages_trend, wages_sa = transform.decompose(wages_,
                                                outlier=False,
                                                trading=True)
    wages_trend = transform.base_index(wages_trend, start_date="2008-07-31")
    wages_trend.columns = wages_tfm.columns
    assert wages_trend.equals(wages_tfm)
    remove_clutter()
    full_wages = session.get_frequent(dataset="wages", seas_adj="seas").dataset
    wages_tfm = full_wages.iloc[:, [0, 1, 2]]
    wages_sa = transform.base_index(wages_sa, start_date="2008-07-31")
    wages_sa.columns = wages_tfm.columns
    assert wages_sa.equals(wages_tfm)
    remove_clutter()
    full_wages = session.get_frequent(dataset="wages", seas_adj=None).dataset
    real_wages = full_wages.iloc[:, [3, 4, 5]]
    compare = transform.convert_real(wages_)
    compare = transform.base_index(compare, start_date="2008-07-31")
    compare.columns = real_wages.columns
    assert real_wages.equals(compare)
    remove_clutter()
    with pytest.raises(ValueError):
        session.get_frequent(dataset="wages", seas_adj="wrong")
Ejemplo n.º 4
0
def test_prices_inflation():
    remove_clutter()
    session = Session(location=TEST_CON)
    assert isinstance(session, Session)
    assert isinstance(session.dataset, pd.DataFrame)
    measures = session.get_frequent(dataset="price_measures").dataset
    remove_clutter()
    prices = session.get(dataset="cpi").dataset
    prices = prices.loc[prices.index >= "1997-03-31"]
    prices = transform.base_index(prices,
                                  start_date="2010-12-01",
                                  end_date="2010-12-31")
    compare = measures.iloc[:, [0]]
    compare.columns = prices.columns
    assert compare.equals(prices)
Ejemplo n.º 5
0
def test_tot():
    remove_clutter()
    session = Session(location=TEST_CON)
    assert isinstance(session, Session)
    assert isinstance(session.dataset, pd.DataFrame)
    tb_ = session.get(dataset="trade").dataset
    assert isinstance(tb_, dict)
    assert len(tb_) == 12
    remove_clutter()
    net = session.get_frequent(dataset="tot").dataset
    compare = (
        tb_["tb_x_dest_pri"].rename(columns={"Total exportaciones": "Total"}) /
        tb_["tb_m_orig_pri"].rename(columns={"Total importaciones": "Total"}))
    compare = compare.loc[:, ["Total"]]
    compare = transform.base_index(compare,
                                   start_date="2005-01-01",
                                   end_date="2005-12-31")
    compare.columns = net.columns
    assert net.equals(compare)
    remove_clutter()
Ejemplo n.º 6
0
def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 name: str = "tfm_prices",
                 index_label: str = "index",
                 only_get: bool = False) -> pd.DataFrame:
    """Get core CPI, Winsorized CPI, tradabe CPI and non-tradable CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_prices'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    xls = pd.ExcelFile(urls["tfm_prices"]["dl"]["main"])
    weights = pd.read_excel(xls,
                            sheet_name=xls.sheet_names[0],
                            usecols="A:C",
                            skiprows=14,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]
    sheets = []
    for sheet in xls.sheet_names:
        raw = pd.read_excel(xls, sheet_name=sheet, usecols="D:IN",
                            skiprows=9).dropna(how="all")
        proc = raw.loc[:, raw.columns.str.contains("Indice|Índice")].dropna(
            how="all")
        sheets.append(proc.T)
    output = pd.concat(sheets)
    output = output.iloc[:, 1:]
    output.columns = [weights["Item"], weights.index]
    output.index = pd.date_range(start="2010-12-31",
                                 periods=len(output),
                                 freq="M")
    diff_8 = output.loc[:,
                        output.columns.get_level_values(
                            level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    prod_97 = (pd.read_excel(
        urls["tfm_prices"]["dl"]["historical"], skiprows=5).dropna(
            how="any").set_index("Rubros, Agrupaciones y Subrubros").T)
    prod_97 = prod_97.loc[:, prod_details[1]].pct_change()
    output_8 = output.loc[:, prod_details[0]].pct_change()
    output_8 = output_8.loc[:, ~output_8.columns.get_level_values(
        level=0).duplicated()]
    output_8.columns = output_8.columns.get_level_values(level=0)
    prod_97.columns = output_8.columns.get_level_values(level=0)
    complete = pd.concat([prod_97, output_8.iloc[1:]])
    complete.index = pd.date_range(start="1997-03-31",
                                   freq="M",
                                   periods=len(complete))
    weights_complete = weights.loc[weights["Item"].isin(complete.columns)]
    weights_complete = weights_complete.loc[~weights_complete["Item"].
                                            duplicated()].set_index("Item")
    tradable = complete.loc[:, [bool(x) for x in prod_details[2]]]
    tradable_weights = weights_complete.loc[
        weights_complete.index.isin(tradable.columns), "Weight"].T
    tradable_weights = tradable_weights.div(tradable_weights.sum())
    tradable = (tradable.mul(tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    non_tradable = complete.loc[:, [not bool(x) for x in prod_details[2]]]
    non_tradable_weights = weights_complete.loc[
        weights_complete.index.isin(non_tradable.columns), "Weight"].T
    non_tradable_weights = non_tradable_weights.div(non_tradable_weights.sum())
    non_tradable = (non_tradable.mul(non_tradable_weights).sum(
        axis=1).add(1).cumprod().mul(100))

    core = complete.loc[:, [bool(x) for x in prod_details[3]]]
    core_weights = weights_complete.loc[
        weights_complete.index.isin(core.columns), "Weight"].T
    core_weights = core_weights.div(core_weights.sum())
    core = (core.mul(core_weights).sum(axis=1).add(1).cumprod().mul(100))

    cpi_re = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re, tradable, non_tradable, core, cpi_win], axis=1)
    output = transform.base_index(output,
                                  start_date="2010-12-01",
                                  end_date="2010-12-31")
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: Winsorized 0.05"
    ]

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output
Ejemplo n.º 7
0
def labor_real_wages(seas_adj: Union[str, None] = None,
                     update_loc: Union[str, PathLike, Engine, Connection,
                                       None] = None,
                     save_loc: Union[str, PathLike, Engine, Connection,
                                     None] = None,
                     name: str = "tfm_wages",
                     index_label: str = "index",
                     only_get: bool = True) -> pd.DataFrame:
    """
    Get real wages. Allow choosing seasonal adjustment.

    Parameters
    ----------
    seas_adj : {'trend', 'seas', None}
        Whether to seasonally adjust.
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'tfm_wages'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc`` for the commodity index.

    Returns
    -------
    Real wages data : pd.DataFrame

    Raises
    ------
    ValueError
        If ``seas_adj`` is given an invalid keyword.

    """
    if seas_adj not in ["trend", "seas", None]:
        raise ValueError("'seas_adj' can be 'trend', 'seas' or None.")

    wages = labor.get_wages(update_loc=update_loc, only_get=only_get)
    real_wages = wages.copy()
    real_wages.columns = [
        "Índice medio de salarios reales",
        "Índice medio de salarios reales privados",
        "Índice medio de salarios reales públicos"
    ]
    metadata._set(real_wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="Sí",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    real_wages = transform.convert_real(real_wages,
                                        update_loc=update_loc,
                                        only_get=only_get)
    output = pd.concat([wages, real_wages], axis=1)
    seas_text = "nsa"
    if seas_adj in ["trend", "seas"]:
        trend, seasadj = transform.decompose(output,
                                             trading=True,
                                             outlier=False)
        if seas_adj == "trend":
            output = trend
            seas_text = "trend"
        elif seas_adj == "seas":
            output = seasadj
            seas_text = "sa"

    output = transform.base_index(output, start_date="2008-07-31")

    name = f"{name}_{seas_text}"

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output
Ejemplo n.º 8
0
def get_custom(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               revise_rows: Union[str, int] = "nodup",
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               name: str = "rxr_custom",
               index_label: str = "index",
               only_get: bool = False) -> pd.DataFrame:
    """Get official real exchange rates from the BCU website.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    name : str, default 'rxr_custom'
        Either CSV filename for updating and/or saving, or table name if
        using SQL.
    index_label : str, default 'index'
        Label for SQL indexes.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly real exchange rates vs select countries : pd.DataFrame
        Available: Argentina, Brazil, US.

    """
    if only_get is True and update_loc is not None:
        output = ops._io(operation="update",
                         data_loc=update_loc,
                         name=name,
                         index_label=index_label)
        if not output.equals(pd.DataFrame()):
            return output

    url_ = "http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/IFS/M."
    url_extra = ".?startPeriod=1970&endPeriod="
    raw = []
    for country in ["US", "BR", "AR"]:
        for indicator in ["PCPI_IX", "ENDA_XDC_USD_RATE"]:
            base_url = (f"{url_}{country}.{indicator}{url_extra}"
                        f"{dt.datetime.now().year}")
            r_json = requests.get(base_url).json()
            data = r_json["CompactData"]["DataSet"]["Series"]["Obs"]
            try:
                data = pd.DataFrame(data)
                data.set_index("@TIME_PERIOD", drop=True, inplace=True)
            except ValueError:
                data = pd.DataFrame(np.nan,
                                    index=pd.date_range(start="1970-01-01",
                                                        end=dt.datetime.now(),
                                                        freq="M"),
                                    columns=[f"{country}.{indicator}"])
            if "@OBS_STATUS" in data.columns:
                data.drop("@OBS_STATUS", inplace=True, axis=1)
            data.index = (pd.to_datetime(data.index, format="%Y-%m") +
                          MonthEnd(1))
            data.columns = [f"{country}.{indicator}"]
            raw.append(data)
    raw = pd.concat(raw, axis=1, sort=True).apply(pd.to_numeric)

    ar_black_xr, ar_cpi = _missing_ar()
    proc = raw.copy()
    proc["AR.PCPI_IX"] = ar_cpi
    ar_black_xr = pd.concat([ar_black_xr, proc["AR.ENDA_XDC_USD_RATE"]],
                            axis=1)
    ar_black_xr[0] = np.where(pd.isna(ar_black_xr[0]),
                              ar_black_xr["AR.ENDA_XDC_USD_RATE"],
                              ar_black_xr[0])
    proc["AR.ENDA_XDC_USD_RATE_black"] = ar_black_xr.iloc[:, 0]
    proc["AR_E_A"] = proc.iloc[:, [5, 6]].mean(axis=1)

    uy_cpi = cpi.get(update_loc=update_loc, save_loc=save_loc, only_get=True)
    uy_e = nxr.get_monthly(update_loc=update_loc,
                           save_loc=save_loc,
                           only_get=True).iloc[:, [1]]
    proc = pd.concat([proc, uy_cpi, uy_e], axis=1)
    proc = proc.interpolate(method="linear", limit_area="inside")
    proc = proc.dropna(how="any")
    proc.columns = [
        "US_P", "US_E", "BR_P", "BR_E", "AR_P", "AR_E", "AR_E_B", "AR_E_A",
        "UY_P", "UY_E"
    ]

    output = pd.DataFrame()
    output["UY_E_P"] = proc["UY_E"] / proc["UY_P"]
    output["TCR_UY_AR"] = output["UY_E_P"] / proc["AR_E_A"] * proc["AR_P"]
    output["TCR_UY_BR"] = output["UY_E_P"] / proc["BR_E"] * proc["BR_P"]
    output["TCR_UY_US"] = output["UY_E_P"] * proc["US_P"]
    output["TCR_AR_US"] = proc["BR_E"] * proc["US_P"] / proc["BR_P"]
    output["TCR_BR_US"] = proc["AR_E"] * proc["US_P"] / proc["AR_P"]
    output.drop("UY_E_P", axis=1, inplace=True)
    output.rename_axis(None, inplace=True)

    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.base_index(output,
                                  start_date="2010-01-01",
                                  end_date="2010-12-31",
                                  base=100)
    arrays = []
    for level in range(0, 9):
        arrays.append(list(output.columns.get_level_values(level)))
    arrays[3] = ["UYU/ARS", "UYU/BRL", "UYU/USD", "ARS/USD", "BRL/USD"]
    tuples = list(zip(*arrays))
    output.columns = pd.MultiIndex.from_tuples(tuples,
                                               names=[
                                                   "Indicador", "Área",
                                                   "Frecuencia", "Moneda",
                                                   "Inf. adj.", "Unidad",
                                                   "Seas. Adj.", "Tipo",
                                                   "Acum. períodos"
                                               ])

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name,
                                index_label=index_label)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save",
                data_loc=save_loc,
                data=output,
                name=name,
                index_label=index_label)

    return output