Python rebase Beispiele, econuy.transform.rebase Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: labor.py Projekt: rxavier/econuy

def real_wages(update_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
               save_loc: Union[str, PathLike, Engine, Connection, None] = None,
               only_get: bool = True) -> pd.DataFrame:
    """
    Get real wages.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Real wages data : pd.DataFrame

    """
    name = "real_wages"

    wages = nominal_wages(update_loc=update_loc, only_get=only_get)
    wages.columns = [
        "Índice medio de salarios reales",
        "Índice medio de salarios reales privados",
        "Índice medio de salarios reales públicos"
    ]
    metadata._set(wages,
                  area="Mercado laboral",
                  currency="UYU",
                  inf_adj="Sí",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.convert_real(wages,
                                    update_loc=update_loc,
                                    only_get=only_get)

    output = transform.rebase(output, start_date="2008-07-31")

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #2

0

Datei anzeigen

def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data.

    Indexes selected are S&P 500, Euronext 100, Nikkei 225 and Shanghai
    Composite.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD : pd.DataFrame

    """
    name = "global_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    yahoo = []
    for series in ["spy", "n100", "nikkei", "sse"]:
        aux = pd.read_csv(urls[name]["dl"][series],
                          index_col=0,
                          usecols=[0, 4],
                          parse_dates=True)
        aux.columns = [series]
        yahoo.append(aux)
    output = pd.concat(yahoo, axis=1).interpolate(method="linear",
                                                  limit_area="inside")
    output.columns = [
        "S&P 500", "Euronext 100", "Nikkei 225",
        "Shanghai Stock Exchange Composite"
    ]
    metadata._set(output,
                  area="Global",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["USD", "EUR", "JPY", "CNY"]])
    output = rebase(output, start_date="2019-01-02")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #3

0

Datei anzeigen

Datei: callbacks.py Projekt: rxavier/econuy-web

    def store_transformed_data(
        real_start,
        real_end,
        resample_freq,
        resample_operation,
        rolling_periods,
        rolling_operation,
        chg_diff_operation,
        chg_diff_period,
        rebase_start,
        rebase_end,
        rebase_base,
        decompose_method,
        decompose_component,
        order,
        query_data,
        query_metadata,
    ):
        if not order:
            return query_data, query_metadata
        if not query_data:
            return {}, {}
        if (
            ("resample" in order and (not resample_freq or not resample_operation))
            or ("rolling" in order and (not rolling_periods or not rolling_operation))
            or ("chg-diff" in order and (not chg_diff_operation or not chg_diff_period))
            or ("rebase" in order and (not rebase_start or not rebase_base))
            or (
                "decompose" in order
                and (not decompose_method or not decompose_component)
            )
        ):
            raise PreventUpdate
        data = pd.DataFrame.from_records(query_data, coerce_float=True, index="index")
        data.index = pd.to_datetime(data.index)
        metadata = pd.DataFrame.from_records(query_metadata)
        data.columns = pd.MultiIndex.from_frame(metadata)
        p = Pipeline(location=db.engine, download=False)

        transformations = {
            "usd": lambda x: convert_usd(x, pipeline=p, errors="ignore"),
            "real": lambda x: convert_real(
                x, start_date=real_start, end_date=real_end, pipeline=p, errors="ignore"
            ),
            "gdp": lambda x: convert_gdp(x, pipeline=p, errors="ignore"),
            "resample": lambda x: resample(
                x, rule=resample_freq, operation=resample_operation
            ),
            "rolling": lambda x: rolling(
                x, window=rolling_periods, operation=rolling_operation
            ),
            "chg-diff": lambda x: chg_diff(
                x, operation=chg_diff_operation, period=chg_diff_period
            ),
            "rebase": lambda x: rebase(
                x, start_date=rebase_start, end_date=rebase_end, base=rebase_base
            ),
            "decompose": lambda x: decompose(
                x,
                component=decompose_component,
                method=decompose_method,
                force_x13=True,
                errors="ignore",
            ),
        }
        transformed_data = data.copy()
        for t in order:
            transformed_data = transformations[t](transformed_data)

        transformed_metadata = transformed_data.columns.to_frame()
        transformed_data.columns = transformed_data.columns.get_level_values(0)
        transformed_data.reset_index(inplace=True)

        return transformed_data.to_dict("records"), transformed_metadata.to_dict(
            "records"
        )

Beispiel #4

0

Datei anzeigen

def cpi_measures(update_loc: Union[str, PathLike, Engine, Connection,
                                   None] = None,
                 revise_rows: Union[str, int] = "nodup",
                 save_loc: Union[str, PathLike, Engine, Connection,
                                 None] = None,
                 only_get: bool = False) -> pd.DataFrame:
    """
    Get core CPI, Winsorized CPI, tradabe CPI, non-tradable CPI and residual
    CPI.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI measures : pd.DataFrame

    """
    name = "cpi_measures"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output
    try:
        xls_10_14 = pd.ExcelFile(urls[name]["dl"]["2010-14"])
        xls_15 = pd.ExcelFile(urls[name]["dl"]["2015-"])
        prod_97 = (pd.read_excel(
            urls[name]["dl"]["1997"], skiprows=5).dropna(how="any").set_index(
                "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
    except URLError as err:
        if "SSL: CERTIFICATE_VERIFY_FAILED" in str(err):
            certificate = Path(get_project_root(), "utils", "files",
                               "ine_certs.pem")
            r = requests.get(urls[name]["dl"]["2010-14"], verify=certificate)
            xls_10_14 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["2015-"], verify=certificate)
            xls_15 = pd.ExcelFile(BytesIO(r.content))
            r = requests.get(urls[name]["dl"]["1997"], verify=certificate)
            prod_97 = (pd.read_excel(BytesIO(
                r.content), skiprows=5).dropna(how="any").set_index(
                    "Rubros, Agrupaciones, Subrubros, Familias y Artículos").T)
        else:
            raise err
    weights_97 = (pd.read_excel(urls[name]["dl"]["1997_weights"],
                                index_col=0).drop_duplicates(
                                    subset="Descripción", keep="first"))
    weights = pd.read_excel(xls_10_14,
                            sheet_name=xls_10_14.sheet_names[0],
                            usecols="A:C",
                            skiprows=13,
                            index_col=0).dropna(how="any")
    weights.columns = ["Item", "Weight"]
    weights_8 = weights.loc[weights.index.str.len() == 8]

    sheets = []
    for excel_file in [xls_10_14, xls_15]:
        for sheet in excel_file.sheet_names:
            raw = pd.read_excel(excel_file,
                                sheet_name=sheet,
                                usecols="D:IN",
                                skiprows=8).dropna(how="all")
            proc = raw.loc[:,
                           raw.columns.str.contains("Indice|Índice")].dropna(
                               how="all")
            sheets.append(proc.T)
    complete_10 = pd.concat(sheets)
    complete_10 = complete_10.iloc[:, 1:]
    complete_10.columns = [weights["Item"], weights.index]
    complete_10.index = pd.date_range(start="2010-12-31",
                                      periods=len(complete_10),
                                      freq="M")
    diff_8 = complete_10.loc[:,
                             complete_10.columns.get_level_values(
                                 level=1).str.len() == 8].pct_change()
    win = pd.DataFrame(winsorize(diff_8, limits=(0.05, 0.05), axis=1))
    win.index = diff_8.index
    win.columns = diff_8.columns.get_level_values(level=1)
    cpi_win = win.mul(weights_8.loc[:, "Weight"].T)
    cpi_win = cpi_win.sum(axis=1).add(1).cumprod().mul(100)

    weights_97["Weight"] = (weights_97["Rubro"].fillna(
        weights_97["Agrupación, subrubro, familia"]).fillna(
            weights_97["Artículo"]).drop(
                columns=["Rubro", "Agrupación, subrubro, familia", "Artículo"])
                            )
    prod_97 = prod_97.loc[:, list(cpi_details["1997_base"].keys())]
    prod_97.index = pd.date_range(start="1997-03-31",
                                  periods=len(prod_97),
                                  freq="M")
    weights_97 = (weights_97[weights_97["Descripción"].isin(
        cpi_details["1997_weights"])].set_index("Descripción").drop(
            columns=["Rubro", "Agrupación, subrubro, "
                     "familia", "Artículo"])).div(100)
    weights_97.index = prod_97.columns
    prod_10 = complete_10.loc[:, list(cpi_details["2010_base"].keys())]
    prod_10 = prod_10.loc[:, ~prod_10.columns.get_level_values(
        level=0).duplicated()]
    prod_10.columns = prod_10.columns.get_level_values(level=0)
    weights_10 = (weights.loc[weights["Item"].isin(
        list(cpi_details["2010_base"].keys()))].drop_duplicates(
            subset="Item", keep="first")).set_index("Item")
    items = []
    weights = []
    for item, weight, details in zip([prod_10, prod_97],
                                     [weights_10, weights_97],
                                     ["2010_base", "1997_base"]):
        for tradable in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items()
                if v["Tradable"] is tradable
            ]]
            weights.append(aux.div(aux.sum()))
        for core in [True, False]:
            items.append(item.loc[:, [
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]])
            aux = weight.loc[[
                k for k, v in cpi_details[details].items() if v["Core"] is core
            ]]
            weights.append(aux.div(aux.sum()))

    intermediate = []
    for item, weight in zip(items, weights):
        intermediate.append(item.mul(weight.squeeze()).sum(1))

    output = []
    for x, y in zip(intermediate[:4], intermediate[4:]):
        aux = pd.concat([
            y.pct_change().loc[y.index < "2011-01-01"],
            x.pct_change().loc[x.index > "2011-01-01"]
        ])
        output.append(aux.fillna(0).add(1).cumprod().mul(100))

    cpi_re = cpi(update_loc=update_loc, save_loc=save_loc, only_get=True)
    cpi_re = cpi_re.loc[cpi_re.index >= "1997-03-31"]
    output = pd.concat([cpi_re] + output + [cpi_win], axis=1)
    output.columns = [
        "Índice de precios al consumo: total",
        "Índice de precios al consumo: transables",
        "Índice de precios al consumo: no transables",
        "Índice de precios al consumo: subyacente",
        "Índice de precios al consumo: residual",
        "Índice de precios al consumo: Winsorized 0.05"
    ]
    output = output.apply(pd.to_numeric, errors="coerce")
    metadata._set(output,
                  area="Precios y salarios",
                  currency="-",
                  inf_adj="No",
                  unit="2010-12=100",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = transform.rebase(output,
                              start_date="2010-12-01",
                              end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #5

0

Datei anzeigen

Datei: regional.py Projekt: rxavier/econuy

def rxr(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get real exchange rates vis-á-vis the US dollar for Argentina and Brasil .

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly real exchange rate : pd.DataFrame

    """
    name = "regional_rxr"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    proc = _ifs(update_loc=update_loc, save_loc=save_loc, only_get=only_get)

    output = pd.DataFrame()
    output["Argentina"] = (proc["Argentina - oficial"] * proc["US.PCPI_IX"] /
                           proc["ARG CPI"])
    output["Brasil"] = proc["Brasil"] * proc["US.PCPI_IX"] / proc["BRA CPI"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="-",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS/USD", "BRL/USD"]])
    output = rebase(output, start_date="2019-01-01",
                    end_date="2019-01-31").dropna(how="all")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #6

0

Datei anzeigen

Datei: regional.py Projekt: rxavier/econuy

def stocks(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
           revise_rows: Union[str, int] = "nodup",
           save_loc: Union[str, PathLike, Engine, Connection, None] = None,
           only_get: bool = False) -> pd.DataFrame:
    """Get stock market index data in USD terms.

    Indexes selected are MERVAL and BOVESPA.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily stock market index in USD terms: pd.DataFrame

    """
    name = "regional_stocks"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    end_date_dt = dt.datetime(2000, 1, 1)
    start_date_dt = dt.datetime(2000, 1, 1)
    aux = []
    while end_date_dt < dt.datetime.now():
        end_date_dt = start_date_dt + dt.timedelta(days=5000)
        params = {
            "curr_id": "13376",
            "smlID": str(randint(1000000, 99999999)),
            "header": "S&amp;P Merval Historical Data",
            "st_date": start_date_dt.strftime("%m/%d/%Y"),
            "end_date": end_date_dt.strftime("%m/%d/%Y"),
            "interval_sec": "Daily",
            "sort_col": "date",
            "sort_ord": "DESC",
            "action": "historical_data"
        }
        r = requests.post(urls[name]["dl"]["arg"],
                          headers=investing_headers,
                          data=params)
        aux.append(
            pd.read_html(r.content,
                         match="Price",
                         index_col=0,
                         parse_dates=True)[0])
        start_date_dt = end_date_dt + dt.timedelta(days=1)
    arg = pd.concat(aux, axis=0)[["Price"]].sort_index()

    bra = pd.read_csv(urls[name]["dl"]["bra"], index_col=0,
                      parse_dates=True)[["Close"]]
    bra = bra.loc[bra.index >= "2000-01-01"]

    converters = nxr(update_loc=update_loc, only_get=only_get)
    converters.columns = converters.columns.get_level_values(0)
    arg = pd.merge_asof(arg,
                        converters[["Argentina - informal"]],
                        left_index=True,
                        right_index=True)
    arg = (arg.iloc[:, 0] / arg.iloc[:, 1]).to_frame()
    arg.columns = ["Argentina"]
    bra = pd.merge_asof(bra,
                        converters[["Brasil"]],
                        left_index=True,
                        right_index=True)
    bra = (bra.iloc[:, 0] / bra.iloc[:, 1]).to_frame()
    bra.columns = ["Brasil"]

    output = arg.join(bra, how="left").interpolate(method="linear",
                                                   limit_area="inside")
    metadata._set(output,
                  area="Regional",
                  currency="USD",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    output = rebase(output, start_date="2019-01-02").dropna(how="all")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #7

0

Datei anzeigen

Datei: regional.py Projekt: rxavier/econuy

def cpi(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
        revise_rows: Union[str, int] = "nodup",
        save_loc: Union[str, PathLike, Engine, Connection, None] = None,
        only_get: bool = False) -> pd.DataFrame:
    """Get consumer price index for Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Monthly CPI : pd.DataFrame

    """
    name = "regional_cpi"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = requests.get(urls[name]["dl"]["ar"],
                       params=urls[name]["dl"]["ar_payload"])
    arg = pd.read_html(arg.content)[0]
    arg.set_index("Fecha", drop=True, inplace=True)
    arg.index = pd.to_datetime(arg.index, format="%d/%m/%Y")
    arg.columns = ["nivel"]
    arg = arg.divide(10)

    arg_unoff = pd.read_excel(urls[name]["dl"]["ar_unofficial"])
    arg_unoff.set_index("date", drop=True, inplace=True)
    arg_unoff.index = arg_unoff.index + MonthEnd(0)
    arg_unoff = arg_unoff.loc[(arg_unoff.index >= "2006-12-01") &
                              (arg_unoff.index <= "2016-12-01"), "index"]
    arg_unoff = arg_unoff.to_frame().pct_change(
        periods=1).multiply(100).dropna()
    arg_unoff.columns = ["nivel"]
    arg = (arg.append(arg_unoff).reset_index().drop_duplicates(
        subset="index", keep="last").set_index("index",
                                               drop=True).sort_index())
    arg = arg.divide(100).add(1).cumprod()

    bra_r = requests.get(urls[name]["dl"]["bra"])
    bra = pd.DataFrame(bra_r.json())[["v"]]
    bra.index = pd.date_range(start="1979-12-31", freq="M", periods=len(bra))
    bra = bra.apply(pd.to_numeric, errors="coerce")
    bra = bra.divide(100).add(1).cumprod()

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="No",
                  seas_adj="NSA",
                  ts_type="-",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-10-01", end_date="2010-10-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output

Beispiel #8

0

Datei anzeigen

Datei: regional.py Projekt: rxavier/econuy

def monthly_gdp(update_loc: Union[str, PathLike, Engine, Connection,
                                  None] = None,
                revise_rows: Union[str, int] = "nodup",
                save_loc: Union[str, PathLike, Engine, Connection,
                                None] = None,
                only_get: bool = False) -> pd.DataFrame:
    """Get monthly GDP data.

    Countries/aggregates selected are Argentina and Brazil.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    revise_rows : {'nodup', 'auto', int}
        Defines how to process data updates. An integer indicates how many rows
        to remove from the tail of the dataframe and replace with new data.
        String can either be ``auto``, which automatically determines number of
        rows to replace from the inferred data frequency, or ``nodup``,
        which replaces existing periods with new data.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default False
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Daily policy interest rates : pd.DataFrame

    """
    name = "regional_monthly_gdp"

    if only_get is True and update_loc is not None:
        output = ops._io(operation="update", data_loc=update_loc, name=name)
        if not output.equals(pd.DataFrame()):
            return output

    arg = pd.read_excel(urls[name]["dl"]["arg"], usecols="D",
                        skiprows=4).dropna(how="all")
    arg.index = pd.date_range(start="2004-01-31", freq="M", periods=len(arg))

    bra = pd.read_csv(urls[name]["dl"]["bra"],
                      sep=";",
                      index_col=0,
                      decimal=",")
    bra.index = pd.date_range(start="2003-01-31", freq="M", periods=len(bra))

    output = pd.concat([arg, bra], axis=1)
    output.columns = ["Argentina", "Brasil"]
    metadata._set(output,
                  area="Regional",
                  currency="-",
                  inf_adj="Const.",
                  seas_adj="SA",
                  ts_type="Flujo",
                  cumperiods=1)
    metadata._modify_multiindex(output,
                                levels=[3],
                                new_arrays=[["ARS", "BRL"]])
    output = rebase(output, start_date="2010-01-01", end_date="2010-12-31")

    if update_loc is not None:
        previous_data = ops._io(operation="update",
                                data_loc=update_loc,
                                name=name)
        output = ops._revise(new_data=output,
                             prev_data=previous_data,
                             revise_rows=revise_rows)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc, data=output, name=name)

    return output