Esempio n. 1
0
    def resample(self, target: str, operation: str = "sum",
                 interpolation: str = "linear"):
        """
        Resample to target frequencies.

        See Also
        --------
        :func:`~econuy.transform.resample`

        """
        if isinstance(self.dataset, dict):
            output = {}
            for key, value in self.dataset.items():
                table = transform.resample(value, target=target,
                                           operation=operation,
                                           interpolation=interpolation)
                output.update({key: table})
        else:
            output = transform.resample(self.dataset, target=target,
                                        operation=operation,
                                        interpolation=interpolation)
        self.logger.info(f"Applied 'resample' transformation with '{target}' "
                         f"and '{operation}' operation.")
        if self.inplace is True:
            self.dataset = output
            return self
        else:
            return Session(location=self.location,
                           revise_rows=self.revise_rows,
                           only_get=self.only_get,
                           dataset=output,
                           logger=self.logger,
                           inplace=self.inplace)
Esempio n. 2
0
def concat(dfs: List[pd.DataFrame]) -> List[pd.DataFrame]:
    freqs = [pd.infer_freq(df.index) for df in dfs]
    if all(freq == freqs[0] for freq in freqs):
        combined = pd.concat(dfs, axis=1)
    else:
        for freq_opt in ["A-DEC", "A", "Q-DEC", "Q", "M", "2W-SUN", "W-SUN"]:
            if freq_opt in freqs:
                output = []
                for df in dfs:
                    freq_df = pd.infer_freq(df.index)
                    if freq_df == freq_opt:
                        df_match = df.copy()
                    else:
                        type_df = df.columns.get_level_values("Tipo")[0]
                        unit_df = df.columns.get_level_values("Unidad")[0]
                        if type_df == "Stock":
                            df_match = transform.resample(df,
                                                          rule=freq_opt,
                                                          operation="last")
                        elif type_df == "Flujo" and not any(
                                x in unit_df for x in ["%", "=", "Cambio"]):
                            df_match = transform.resample(df,
                                                          rule=freq_opt,
                                                          operation="sum")
                        else:
                            df_match = transform.resample(df,
                                                          rule=freq_opt,
                                                          operation="mean")
                    output.append(df_match)
                combined = pd.concat(output, axis=1)
                break
            else:
                continue

    return combined
Esempio n. 3
0
def net_public_debt(update_loc: Union[str, PathLike, Engine,
                                      Connection, None] = None,
                    save_loc: Union[str, PathLike, Engine,
                                    Connection, None] = None,
                    only_get: bool = True) -> pd.DataFrame:
    """
    Get net public debt excluding deposits at the central bank.

    Parameters
    ----------
    update_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                  default None
        Either Path or path-like string pointing to a directory where to find
        a CSV for updating, SQLAlchemy connection or engine object, or
        ``None``, don't update.
    save_loc : str, os.PathLike, SQLAlchemy Connection or Engine, or None, \
                default None
        Either Path or path-like string pointing to a directory where to save
        the CSV, SQL Alchemy connection or engine object, or ``None``,
        don't save.
    only_get : bool, default True
        If True, don't download data, retrieve what is available from
        ``update_loc``.

    Returns
    -------
    Net public debt excl. deposits at the central bank : pd.DataFrame

    """
    name = "net_public_debt"

    data = _public_debt_retriever(update_loc=update_loc,
                                  save_loc=save_loc, only_get=only_get)
    gross_debt = data["gps"].loc[:, ["Total deuda"]]
    assets = data["assets"].loc[:, ["Total activos"]]
    gross_debt.columns = ["Deuda neta del sector"
                          " público global excl. encajes"]
    assets.columns = gross_debt.columns
    deposits = econuy.retrieval.external_sector.reserves(
        update_loc=update_loc, save_loc=save_loc,
        only_get=only_get).loc[:,
                               ["Obligaciones en ME con el sector financiero"]]
    deposits = (transform.resample(deposits, rule="Q-DEC", operation="last")
                .reindex(gross_debt.index).squeeze())
    output = gross_debt.add(assets).add(deposits, axis=0).dropna()

    metadata._set(output, area="Sector público",
                  currency="USD", inf_adj="No", unit="Millones",
                  seas_adj="NSA", ts_type="Stock", cumperiods=1)

    if save_loc is not None:
        ops._io(operation="save", data_loc=save_loc,
                data=output, name=name)

    return output
Esempio n. 4
0
def _ifs(update_loc: Union[str, PathLike, Engine, Connection, None] = None,
         save_loc: Union[str, PathLike, Engine, Connection, None] = None,
         only_get: bool = False) -> pd.DataFrame:
    """Get extra data from the IMF IFS."""
    url_ = "http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/IFS/M."
    url_extra = ".?startPeriod=1970&endPeriod="
    ifs = []
    for country in ["US", "BR", "AR"]:
        for indicator in ["PCPI_IX", "ENDA_XDC_USD_RATE"]:
            base_url = (f"{url_}{country}.{indicator}{url_extra}"
                        f"{dt.datetime.now().year}")
            r_json = requests.get(base_url).json()
            data = r_json["CompactData"]["DataSet"]["Series"]["Obs"]
            try:
                data = pd.DataFrame(data)
                data.set_index("@TIME_PERIOD", drop=True, inplace=True)
            except ValueError:
                data = pd.DataFrame(np.nan,
                                    index=pd.date_range(start="1970-01-01",
                                                        end=dt.datetime.now(),
                                                        freq="M"),
                                    columns=[f"{country}.{indicator}"])
            if "@OBS_STATUS" in data.columns:
                data.drop("@OBS_STATUS", inplace=True, axis=1)
            data.index = (pd.to_datetime(data.index, format="%Y-%m") +
                          MonthEnd(1))
            data.columns = [f"{country}.{indicator}"]
            ifs.append(data)
    ifs = pd.concat(ifs, axis=1, sort=True).apply(pd.to_numeric)

    xr = nxr(update_loc=update_loc, save_loc=save_loc, only_get=only_get)
    xr = resample(xr, rule="M", operation="mean")
    xr.columns = xr.columns.get_level_values(0)
    prices = cpi(update_loc=update_loc, save_loc=save_loc, only_get=only_get)
    prices.columns = ["ARG CPI", "BRA CPI"]

    proc = pd.concat([xr, prices, ifs], axis=1)
    proc["Argentina - oficial"] = np.where(
        pd.isna(proc["Argentina - oficial"]), proc["AR.ENDA_XDC_USD_RATE"],
        proc["Argentina - oficial"])
    proc["Argentina - informal"] = np.where(
        pd.isna(proc["Argentina - informal"]), proc["AR.ENDA_XDC_USD_RATE"],
        proc["Argentina - informal"])
    proc["Brasil"] = np.where(pd.isna(proc["Brasil"]),
                              proc["BR.ENDA_XDC_USD_RATE"], proc["Brasil"])
    proc = proc[[
        "Argentina - oficial", "Argentina - informal", "Brasil", "ARG CPI",
        "BRA CPI", "US.PCPI_IX"
    ]]

    return proc
Esempio n. 5
0
def test_resample():
    data_m = dummy_df(freq="M", periods=204, ts_type="Flujo", cumperiods=2)
    session = Session(location=TEST_CON, dataset=data_m)
    trf_none = session.resample(target="Q-DEC", operation="sum").dataset
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.resample("Q-DEC").sum())
    data_q1 = dummy_df(freq="Q", ts_type="Flujo")
    data_q2 = dummy_df(freq="Q", ts_type="Flujo")
    data_dict = {"data_q1": data_q1, "data_q2": data_q2}
    session = Session(location=TEST_CON, dataset=data_dict, inplace=True)
    trf_inter = session.resample(target="A-DEC", operation="average").dataset
    trf_inter["data_q1"].columns = trf_inter[
        "data_q2"].columns = data_q1.columns
    assert trf_inter["data_q1"].equals(data_q1.resample("A-DEC").mean())
    assert trf_inter["data_q2"].equals(data_q2.resample("A-DEC").mean())
    data_m = dummy_df(freq="Q-DEC")
    trf_none = transform.resample(data_m, target="M", operation="upsample")
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.resample("M").interpolate("linear"))
    data_m = dummy_df(freq="Q-DEC")
    trf_none = transform.resample(data_m, target="A-DEC", operation="end")
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.asfreq(freq="A-DEC"))
    data_m = dummy_df(freq="Q-DEC")
    data_m.columns.set_levels(["-"], level=2, inplace=True)
    trf_none = transform.resample(data_m, target="M", operation="upsample")
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.resample("M").interpolate("linear"))
    data_m = dummy_df(freq="Q-DEC")
    data_m.columns.set_levels(["-"], level=2, inplace=True)
    trf_none = transform.resample(data_m, target="A-DEC", operation="end")
    trf_none.columns = data_m.columns
    assert trf_none.equals(data_m.asfreq(freq="A-DEC"))
    with pytest.raises(ValueError):
        data_m = dummy_df(freq="M", periods=204, ts_type="Flujo")
        transform.resample(data_m, target="Q-DEC", operation="wrong")
Esempio n. 6
0
    def store_transformed_data(
        real_start,
        real_end,
        resample_freq,
        resample_operation,
        rolling_periods,
        rolling_operation,
        chg_diff_operation,
        chg_diff_period,
        rebase_start,
        rebase_end,
        rebase_base,
        decompose_method,
        decompose_component,
        order,
        query_data,
        query_metadata,
    ):
        if not order:
            return query_data, query_metadata
        if not query_data:
            return {}, {}
        if (
            ("resample" in order and (not resample_freq or not resample_operation))
            or ("rolling" in order and (not rolling_periods or not rolling_operation))
            or ("chg-diff" in order and (not chg_diff_operation or not chg_diff_period))
            or ("rebase" in order and (not rebase_start or not rebase_base))
            or (
                "decompose" in order
                and (not decompose_method or not decompose_component)
            )
        ):
            raise PreventUpdate
        data = pd.DataFrame.from_records(query_data, coerce_float=True, index="index")
        data.index = pd.to_datetime(data.index)
        metadata = pd.DataFrame.from_records(query_metadata)
        data.columns = pd.MultiIndex.from_frame(metadata)
        p = Pipeline(location=db.engine, download=False)

        transformations = {
            "usd": lambda x: convert_usd(x, pipeline=p, errors="ignore"),
            "real": lambda x: convert_real(
                x, start_date=real_start, end_date=real_end, pipeline=p, errors="ignore"
            ),
            "gdp": lambda x: convert_gdp(x, pipeline=p, errors="ignore"),
            "resample": lambda x: resample(
                x, rule=resample_freq, operation=resample_operation
            ),
            "rolling": lambda x: rolling(
                x, window=rolling_periods, operation=rolling_operation
            ),
            "chg-diff": lambda x: chg_diff(
                x, operation=chg_diff_operation, period=chg_diff_period
            ),
            "rebase": lambda x: rebase(
                x, start_date=rebase_start, end_date=rebase_end, base=rebase_base
            ),
            "decompose": lambda x: decompose(
                x,
                component=decompose_component,
                method=decompose_method,
                force_x13=True,
                errors="ignore",
            ),
        }
        transformed_data = data.copy()
        for t in order:
            transformed_data = transformations[t](transformed_data)

        transformed_metadata = transformed_data.columns.to_frame()
        transformed_data.columns = transformed_data.columns.get_level_values(0)
        transformed_data.reset_index(inplace=True)

        return transformed_data.to_dict("records"), transformed_metadata.to_dict(
            "records"
        )
Esempio n. 7
0
    def update_chart(
        final_data_record,
        final_metadata_record,
        title,
        subtitle,
        start_date,
        end_date,
        chart_type,
        *tables_indicators,
    ):
        if not final_data_record:
            return dcc.Graph(id="graph"), "", "d-inline btn btn-primary disabled"
        data = pd.DataFrame.from_records(
            final_data_record, coerce_float=True, index="index"
        )
        final_metadata = pd.DataFrame.from_records(final_metadata_record)
        data.index = pd.to_datetime(data.index)
        start_date = start_date or "1970-01-01"
        end_date = end_date or "2100-01-01"
        data = data.loc[(data.index >= start_date) & (data.index <= end_date), :]
        if len(data) > 7000:
            data = resample(data, rule="M", operation="mean")

        tables = tables_indicators[:3]
        indicators = tables_indicators[3:]
        tables = [table for table, indicator in zip(tables, indicators) if indicator]
        labels = utils.get_labels(tables)
        labels_dedup = list(dict.fromkeys(labels))
        if not title:
            title = "<br>".join(labels_dedup)
        if subtitle:
            title = f"{title}<br><span style='font-size: 14px'>{subtitle}</span>"
        height = 600 + 20 * len(labels_dedup)
        if chart_type != "table":
            if chart_type == "bar":
                fig = px.bar(
                    data,
                    y=data.columns,
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                    barmode="group",
                )
            elif chart_type == "stackbar":
                fig = px.bar(
                    data,
                    y=data.columns,
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                    barmode="stack",
                )
            elif chart_type == "area":
                fig = px.area(
                    data,
                    y=data.columns,
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                )
            elif chart_type == "normarea":
                fig = px.area(
                    data,
                    y=data.columns,
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                    groupnorm="fraction",
                )
            elif chart_type == "lineyears":
                aux = data.copy()
                aux["Año"] = aux.index.year
                if pd.infer_freq(aux.index) in ["M", "MS", "Q", "Q-DEC"]:
                    aux["Período"] = aux.index.month_name()
                elif pd.infer_freq(aux.index) in ["A", "A-DEC"]:
                    raise PreventUpdate
                elif pd.infer_freq(aux.index) in ["W", "W-SUN"]:
                    aux["Período"] = aux.index.strftime("%U").astype("int32")
                else:
                    aux["Período"] = aux.index.dayofyear
                fig = px.line(
                    aux,
                    y=aux.columns,
                    color="Año",
                    x="Período",
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                )
            else:
                fig = px.line(
                    data,
                    y=data.columns,
                    height=height,
                    title=title,
                    color_discrete_sequence=px.colors.qualitative.Bold,
                )
            ylabels = []
            for currency, unit, inf in zip(
                final_metadata["Moneda"],
                final_metadata["Unidad"],
                final_metadata["Inf. adj."],
            ):
                text = []
                if currency != "-":
                    text += [currency]
                text += [unit]
                if inf != "No":
                    text += [inf]
                ylabels.append(" | ".join(text))
            if all(x == ylabels[0] for x in ylabels):
                ylabels = ylabels[0]
            else:
                ylabels = ""
            fig.update_layout(
                {
                    "margin": {"l": 20, "r": 20},
                    "legend": {
                        "orientation": "h",
                        "yanchor": "top",
                        "y": -0.1,
                        "xanchor": "left",
                        "x": 0,
                    },
                    "legend_orientation": "h",
                    "xaxis_title": "",
                    "yaxis_title": ylabels,
                    "legend_title": "",
                    "title": {"y": 0.9, "yanchor": "top", "font": {"size": 16}},
                }
            )
            path_to_logo = path.join(current_app.root_path, "static", "cards.jpg")
            fig.add_layout_image(
                dict(
                    source=Image.open(path_to_logo),
                    sizex=0.1,
                    sizey=0.1,
                    xanchor="right",
                    yanchor="bottom",
                    xref="paper",
                    yref="paper",
                    x=1,
                    y=1.01,
                )
            )
            # fig.update_xaxes(
            #     rangeselector=dict(yanchor="bottom", y=1.01, xanchor="right",
            #                         x=0.9,
            #                         buttons=list([
            #                             dict(count=1, label="1m", step="month",
            #                                 stepmode="backward"),
            #                             dict(count=6, label="6m", step="month",
            #                                 stepmode="backward"),
            #                             dict(count=1, label="YTD", step="year",
            #                                 stepmode="todate"),
            #                             dict(count=1, label="1a", step="year",
            #                                 stepmode="backward"),
            #                             dict(count=5, label="5a", step="year",
            #                                 stepmode="backward"),
            #                             dict(label="todos", step="all")])))
            html_string = StringIO()
            fig.write_html(html_string)
            html_string.seek(0)
            viz = dcc.Graph(figure=fig, id="graph", config={"displayModeBar": False})
            return viz, html_string.read(), "d-inline btn btn-primary"
        else:
            data.reset_index(inplace=True)
            data.rename(columns={"index": "Fecha"}, inplace=True)
            data["Fecha"] = data["Fecha"].dt.strftime("%d-%m-%Y")
            viz = html.Div(
                [
                    html.Br(),
                    dt.DataTable(
                        id="table",
                        columns=[{"name": "Fecha", "id": "Fecha", "type": "datetime"}]
                        + [
                            {
                                "name": i,
                                "id": i,
                                "type": "numeric",
                                "format": Format(
                                    precision=2,
                                    scheme=Scheme.fixed,
                                    group=Group.yes,
                                    groups=3,
                                    group_delimiter=",",
                                    decimal_delimiter=".",
                                ),
                            }
                            for i in data.columns[1:]
                        ],
                        data=data.to_dict("records"),
                        style_cell={"textAlign": "center"},
                        style_header={
                            "whiteSpace": "normal",
                            "height": "auto",
                            "textAlign": "center",
                        },
                        page_action="none",
                        fixed_rows={"headers": True},
                    ),
                ]
            )
            return viz, [], "d-inline btn btn-primary disabled"