def _calculate_return_value(
    args: typing.Tuple[pd.Series,  # ts (time series)
                       float,  # return_period
                       typing.Union[str, pd.Timedelta],  # return_period_size
                       float,  # threshold
                       typing.Union[str, pd.Timedelta],  # r
                       str,  # extremes_type
                       typing.Union[str,
                                    scipy.stats.rv_continuous],  # distribution
                       str,  # distribution_name
                       typing.Optional[float],  # alpha
                       int,  # n_samples
                       ],
) -> typing.Dict[str, typing.Union[str, typing.Optional[float]]]:
    (
        ts,
        return_period,
        return_period_size,
        threshold,
        r,
        extremes_type,
        distribution,
        distribution_name,
        alpha,
        n_samples,
    ) = args
    model = EVA(data=ts)
    model.get_extremes(
        method="POT",
        extremes_type=extremes_type,
        threshold=threshold,
        r=r,
    )
    model.fit_model(
        model="MLE",
        distribution=distribution,
    )
    # TODO - this is a hack to avoid spawning nested subprocesses
    _n_samples = n_samples % 10
    while _n_samples < n_samples:
        _n_samples += 10
        model.get_return_value(
            return_period=return_period,
            return_period_size=return_period_size,
            alpha=alpha,
            n_samples=_n_samples,
        )
    rv, cil, ciu = model.get_return_value(
        return_period=return_period,
        return_period_size=return_period_size,
        alpha=alpha,
        n_samples=n_samples,
    )
    return {
        "distribution_name": distribution_name,
        "threshold": threshold,
        "rv": rv,
        "cil": cil,
        "ciu": ciu,
    }
Beispiel #2
0
def plot_return_value_stability(
        ts: pd.Series,
        return_period,
        return_period_size: typing.Union[str, pd.Timedelta] = "365.2425D",
        thresholds=None,
        r: typing.Union[str, pd.Timedelta] = "24H",
        extremes_type: str = "high",
        distributions: typing.Optional[typing.List[typing.Union[
            str, scipy.stats.rv_continuous]]] = None,
        alpha: typing.Optional[float] = None,
        n_samples: int = 100,
        figsize: tuple = (8, 5),
) -> tuple:  # pragma: no cover
    """
    Plot return value stability plot for given threshold values.

    The return value stability plot shows return values for given return period
    for given thresholds.
    The purpose of this plot is to investigate statibility and sensitivity of the
    Generalized Pareto Distribution model to threshold value.
    Threshold value selection should still be guided by the mean residual life plot
    and the parameter stability plot. This plot should be used as additional check.

    Parameters
    ----------
    ts : pandas.Series
        Time series of the signal.
    return_period : number
        Return period.
        Given as a multiple of `return_period_size`.
    return_period_size : str or pandas.Timedelta, optional
        Size of return period (default='365.2425D').
        If set to '30D', then a return period of 12
        would be roughly equivalent to a 1 year return period (360 days).
    thresholds : array-like, optional
        An array of thresholds for which the mean residual life plot is plotted.
        If None (default), plots mean residual life for 100 equally-spaced thresholds
        between 90th (10th if extremes_type='low') percentile
        and 10th largest (smallest if extremes_type='low') value in the series.
    r : str or pandas.Timedelta, optional
        Duration of window used to decluster the exceedances.
        By default r='24H' (24 hours).
    extremes_type : str, optional
        high (default) - extreme high values
        low - extreme low values
    distributions : list, optional
        List of distributions for which the return value curves are plotted.
        By default these are "genpareto" and "expon".
        A distribution must be either a name of distribution from with scipy.stats
        or a subclass of scipy.stats.rv_continuous.
        See https://docs.scipy.org/doc/scipy/reference/stats.html
    alpha : float, optional
        Confidence interval width in the range (0, 1).
        If None (default), then confidence interval is not shown.
    n_samples : int, optional
        Number of bootstrap samples used to estimate
        confidence interval bounds (default=100).
        Ignored if `alpha` is None.
    figsize : tuple, optional
        Figure size in inches in format (width, height).
        By default it is (8, 5).

    Returns
    -------
    figure : matplotlib.figure.Figure
        Figure object.
    axes : matplotlib.axes._axes.Axes
        Axes object.

    """
    # Get default `thresholds`
    if thresholds is None:
        thresholds = get_default_thresholds(
            ts=ts,
            extremes_type=extremes_type,
            num=100,
        )

    # Get default `distributions`
    if distributions is None:
        distributions = [
            "genpareto",
            "expon",
        ]

    # Instantiate model
    model = EVA(data=ts)

    # Calculate return values for each threshold and distribution
    return_values: typing.Dict[str, typing.List[float]] = {}
    ci_lower: typing.Dict[str, typing.List[float]] = {}
    ci_upper: typing.Dict[str, typing.List[float]] = {}
    for distribution in distributions:
        for threshold in thresholds:
            model.get_extremes(
                method="POT",
                extremes_type=extremes_type,
                threshold=threshold,
                r=r,
            )
            model.fit_model(
                model="MLE",
                distribution=distribution,
            )
            rv, cil, ciu = model.get_return_value(
                return_period=return_period,
                return_period_size=return_period_size,
                alpha=alpha,
                n_samples=n_samples,
            )
            try:
                return_values[distribution].append(rv)
                ci_lower[distribution].append(cil)
                ci_upper[distribution].append(ciu)
            except KeyError:
                return_values[distribution] = [rv]
                ci_lower[distribution] = [cil]
                ci_upper[distribution] = [ciu]

    with plt.rc_context(rc=pyextremes_rc):
        # Create figure and axes
        fig, ax = plt.subplots(figsize=figsize, dpi=96)
        ax.grid(False)

        # Plot central estimate of return values
        for i, distribution in enumerate(distributions):
            color = pyextremes_rc["axes.prop_cycle"].by_key()["color"][i]
            ax.plot(
                thresholds,
                return_values[distribution],
                color=color,
                lw=2,
                ls="-",
                label=distribution,
                zorder=(i + 3) * 5,
            )

            # Plot confidence bounds
            if alpha is not None:
                for ci in [ci_lower[distribution], ci_upper[distribution]]:
                    ax.plot(
                        thresholds,
                        ci,
                        color=color,
                        lw=1,
                        ls="--",
                        zorder=(i + 2) * 5,
                    )
                ax.fill_between(
                    thresholds,
                    ci_lower[distribution],
                    ci_upper[distribution],
                    facecolor=color,
                    edgecolor="None",
                    alpha=0.25,
                    zorder=(i + 1) * 5,
                )

        # Plot legend
        ax.legend(
            frameon=True,
            framealpha=0.9,
        )

        # Label axes
        ax.set_xlabel("Threshold")
        ax.set_ylabel("Return value")

        return fig, ax