def test_single_cluster():
    index = pd.date_range(start="2021/01/01", end="2021/01/02", freq="1H")
    data = pd.Series(data=np.random.random(size=len(index)), index=index)

    # Tesh high extremes
    extremes = get_extremes(
        ts=data,
        method="POT",
        extremes_type="high",
        threshold=data.min() - 1,
        r="24H",
    )
    assert len(extremes) == 1
    assert np.isclose(extremes.values[0], data.max())

    # Tesh low extremes
    extremes = get_extremes(
        ts=data,
        method="POT",
        extremes_type="low",
        threshold=data.max() + 1,
        r="24H",
    )
    assert len(extremes) == 1
    assert np.isclose(extremes.values[0], data.min())
def test_invalid_arguments(battery_wl):
    # Test wrong extremes_type value
    with pytest.raises(ValueError, match=r"invalid value.*extremes_type"):
        get_extremes(
            ts=battery_wl,
            method="POT",
            extremes_type="BAD EXTREMES_TYPE VALUE",
            threshold=2,
            r="24H",
        )

    # Test wrong r type
    with pytest.raises(ValueError, match=r"invalid value.*'r' argument"):
        get_extremes(
            ts=battery_wl, method="POT", extremes_type="high", threshold=2, r="abc"
        )
def test_invalid_arguments(battery_wl):
    # Test wrong extremes_type value
    with pytest.raises(ValueError, match=r"invalid value.*extremes_type"):
        get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type="BAD EXTREMES_TYPE VALUE",
            block_size="365.2425D",
            errors="coerce",
        )

    # Test wrong block_size type
    with pytest.raises(TypeError, match=r"invalid type.*block_size"):
        get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type="high",
            block_size=1,
            errors="coerce",
        )

    # Test wrong errors value
    with pytest.raises(ValueError, match=r"invalid value.*errors.*argument"):
        get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type="high",
            block_size="365.2425D",
            errors="BAD ERRORS VALUE",
        )
def test_extreme_value_extraction(battery_wl, extremes_type):
    # Test errors=raise
    with pytest.raises(ValueError):
        get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type=extremes_type,
            block_size="365.2425D",
            errors="raise",
        )

    # Test errors=ignore
    with pytest.warns(NoDataBlockWarning, match=r"blocks contained no data"):
        extremes_ignored = get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type=extremes_type,
            block_size="365.2425D",
            errors="ignore",
        )
        assert len(extremes_ignored) == 96

    # Test errors=coerce
    with pytest.warns(NoDataBlockWarning, match=r"blocks contained no data"):
        extremes_coerced = get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type=extremes_type,
            block_size="365.2425D",
            errors="coerce",
        )
        assert len(extremes_coerced) == 100

    if extremes_type == "high":
        assert np.isclose(extremes_ignored.max(), extremes_coerced.max())
    else:
        assert np.isclose(extremes_ignored.min(), extremes_coerced.min())
    assert np.isclose(extremes_ignored.mean(), extremes_coerced.mean())
def test_extreme_value_extraction(battery_wl, extremes_type, threshold):
    extremes = get_extremes(
        ts=battery_wl,
        method="POT",
        extremes_type=extremes_type,
        threshold=threshold,
        r="24H",
    )
    if extremes_type == "high":
        assert np.isclose(extremes.max(), battery_wl.max())
        assert len(extremes) == 117
    elif extremes_type == "low":
        assert np.isclose(extremes.min(), battery_wl.min())
        assert len(extremes) == 104
    assert np.all(np.diff(extremes.index) > pd.to_timedelta("24H").to_numpy())
def test_min_last_block(battery_wl, extremes_type):
    with pytest.warns(NoDataBlockWarning, match=r"blocks contained no data"):
        extremes_full = get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type=extremes_type,
            block_size="365.2425D",
            errors="coerce",
            min_last_block=None,
        )
    with pytest.warns(NoDataBlockWarning, match=r"blocks contained no data"):
        extremes_trimmed = get_extremes(
            ts=battery_wl,
            method="BM",
            extremes_type=extremes_type,
            block_size="365.2425D",
            errors="coerce",
            min_last_block=0.9,
        )
    assert len(extremes_full) - len(extremes_trimmed) == 1
    assert np.allclose(extremes_full.values[:-1],
                       extremes_trimmed.values,
                       atol=0.01,
                       rtol=0)
Exemple #7
0
    def get_extremes(self, method: str, extremes_type: str = "high", **kwargs) -> None:
        """
        Get extreme events from time series.

        Extracts extreme values from the 'self.data' attribute.
        Stores extreme values in the 'self.extremes' attribute.

        Parameters
        ----------
        method : str
            Extreme value extraction method.
            Supported values:
                BM - Block Maxima
                POT - Peaks Over Threshold
        extremes_type : str, optional
            high (default) - get extreme high values
            low - get extreme low values
        kwargs
            if method is BM:
                block_size : str or pandas.Timedelta, optional
                    Block size (default='1Y').
                errors : str, optional
                    raise (default) - raise an exception
                        when encountering a block with no data
                    ignore - ignore blocks with no data
                    coerce - get extreme values for blocks with no data
                        as mean of all other extreme events in the series
                        with index being the middle point of corresponding interval
            if method is POT:
                threshold : float
                    Threshold used to find exceedances.
                r : str or pandas.Timedelta, optional
                    Duration of window used to decluster the exceedances.
                    By default r='24H' (24 hours).

        """
        message = f"for method='{method}' and extremes_type='{extremes_type}'"
        logger.debug(f"extracting extreme values {message}")
        self.__extremes = get_extremes(
            method=method, ts=self.data, extremes_type=extremes_type, **kwargs
        )
        self.__extremes_method = method
        self.__extremes_type = extremes_type
        logger.info(f"successfully extracted extreme values {message}")

        logger.debug("collecting extreme value properties ")
        self.__extremes_kwargs = {}
        if method == "BM":
            self.__extremes_kwargs["block_size"] = pd.to_timedelta(
                kwargs.get("block_size", "1Y")
            )
            self.__extremes_kwargs["errors"] = kwargs.get("errors", "raise")
        elif method == "POT":
            self.__extremes_kwargs["threshold"] = kwargs["threshold"]
            self.__extremes_kwargs["r"] = pd.to_timedelta(kwargs.get("r", "24H"))
        else:
            raise AssertionError
        logger.info("successfully collected extreme value properties")

        logger.debug("creating extremes transformer")
        self.__extremes_transformer = ExtremesTransformer(
            extremes=self.__extremes,
            extremes_type=self.__extremes_type,
        )
        logger.info("successfully created extremes transformer")

        logger.info("removing any previously declared models")
        self.__model = None
def _calculate_modified_parameters(
    args: typing.Tuple[pd.Series,  # ts (time series)
                       str,  # extremes_type
                       float,  # threshold
                       typing.Union[str, pd.Timedelta],  # r
                       typing.Optional[float],  # alpha
                       int,  # n_samples
                       int,  # seed
                       ],
) -> typing.Dict[str, typing.Optional[float]]:
    (
        ts,
        extremes_type,
        threshold,
        r,
        alpha,
        n_samples,
        seed,
    ) = args

    result: typing.Dict[str, typing.Optional[float]] = {"threshold": threshold}

    # Get extremes
    extremes = get_extremes(
        ts=ts,
        method="POT",
        extremes_type=extremes_type,
        threshold=threshold,
        r=r,
    )
    extremes_transformer = ExtremesTransformer(
        extremes=extremes,
        extremes_type=extremes_type,
    )

    # Get central estimates for shape and scale parameters
    c, _, scale = scipy.stats.genpareto.fit(
        data=extremes_transformer.transformed_extremes,
        floc=threshold,
    )
    result["shape"] = c
    result["scale"] = scale - c * threshold

    # Get confidence bounds
    if alpha is None:
        result["shape_ci_lower"] = None
        result["shape_ci_upper"] = None
        result["scale_ci_lower"] = None
        result["scale_ci_upper"] = None
    if alpha is not None:
        # Get fit parameters
        rng_generator = np.random.default_rng(seed=seed)
        fit_parameters = [
            scipy.stats.genpareto.fit(
                data=rng_generator.choice(
                    a=extremes.values,
                    size=len(extremes),
                    replace=True,
                ),
                floc=threshold,
            ) for _ in range(n_samples)
        ]

        # Calculate confidence bounds for shape and scale parameters
        result["shape_ci_lower"], result["shape_ci_upper"] = np.quantile(
            a=np.transpose(fit_parameters)[0],
            q=[(1 - alpha) / 2, (1 + alpha) / 2],
        )
        result["scale_ci_lower"], result["scale_ci_upper"] = np.quantile(
            a=np.transpose(fit_parameters)[2] -
            np.transpose(fit_parameters)[0] * threshold,
            q=[(1 - alpha) / 2, (1 + alpha) / 2],
        )

    return result
Exemple #9
0
def plot_parameter_stability(
        ts: pd.Series,
        thresholds=None,
        r: typing.Union[str, pd.Timedelta] = "24H",
        extremes_type: str = "high",
        alpha: typing.Optional[float] = None,
        n_samples: int = 100,
        figsize: tuple = (8, 5),
) -> tuple:  # pragma: no cover
    """
    Plot parameter stability plot for given threshold values.

    The parameter stability plot shows shape and modified scale parameters
    of the Generalized Pareto Distribution (GPD).
    Both shape and modified scale parameters should be approximately constant above
    a threshold for which the GPD model is valid.
    The strategy is to select the smallest (largest for extremes_type='low')
    threshold value immediately above (below for extremes_type='low')
    which the GPD parameters are approximately constant.

    Parameters
    ----------
    ts : pandas.Series
        Time series of the signal.
    thresholds : array-like, optional
        An array of thresholds for which the mean residual life plot is plotted.
        If None (default), plots mean residual life for 100 equally-spaced thresholds
        between 90th (10th if extremes_type='low') percentile
        and 10th largest (smallest if extremes_type='low') value in the series.
    r : str or pandas.Timedelta, optional
        Duration of window used to decluster the exceedances.
        By default r='24H' (24 hours).
    extremes_type : str, optional
        high (default) - extreme high values
        low - extreme low values
    alpha : float, optional
        Confidence interval width in the range (0, 1).
        If None (default), then confidence interval is not shown.
    n_samples : int, optional
        Number of bootstrap samples used to estimate
        confidence interval bounds (default=100).
        Ignored if `alpha` is None.
    figsize : tuple, optional
        Figure size in inches in format (width, height).
        By default it is (8, 5).

    Returns
    -------
    figure : matplotlib.figure.Figure
        Figure object.
    axes : matplotlib.axes._axes.Axes
        Axes object.

    """
    # Get default thresholds
    if thresholds is None:
        thresholds = get_default_thresholds(
            ts=ts,
            extremes_type=extremes_type,
            num=100,
        )

    # Calculate shape and modified scale parameters for each threshold
    shape_parameters: typing.Dict[str, typing.List[float]] = {
        "values": [],
        "ci_lower": [],
        "ci_upper": [],
    }
    scale_parameters: typing.Dict[str, typing.List[float]] = {
        "values": [],
        "ci_lower": [],
        "ci_upper": [],
    }
    distribution = scipy.stats.genpareto
    for threshold in thresholds:
        # Get extremes
        extremes = get_extremes(
            ts=ts,
            method="POT",
            extremes_type=extremes_type,
            threshold=threshold,
            r=r,
        )
        extremes_transformer = ExtremesTransformer(
            extremes=extremes,
            extremes_type=extremes_type,
        )

        # Get central estimates for shape and scale parameters
        c, _, scale = distribution.fit(
            data=extremes_transformer.transformed_extremes,
            floc=threshold,
        )
        shape_parameters["values"].append(c)
        scale_parameters["values"].append(scale - c * threshold)

        # Get confidence bounds
        if alpha is not None:
            # Prepare local variables used by fit parameter calculator
            fit_function = distribution.fit
            fixed_parameters = {"floc": threshold}

            min_samples_per_core = 50
            if n_samples <= min_samples_per_core:
                # Calculate without multiprocessing
                seed = np.random.randint(low=0, high=1e6, size=None)
                fit_parameters = get_fit_parameters(params=(
                    n_samples,
                    fit_function,
                    extremes,
                    fixed_parameters,
                    seed,
                ))
            else:
                # Find number of cores
                n_cores = min(
                    os.cpu_count() or 2,
                    int(np.ceil(n_samples / min_samples_per_core)),
                )

                # Calculate number of samples per core
                min_samples_per_core = int(n_samples / n_cores)
                core_samples = [min_samples_per_core for _ in range(n_cores)]

                # Distribute remaining samples evenly across cores
                for i in range(n_samples - sum(core_samples)):
                    core_samples[i] += 1

                # Get unique random seed for each core
                seeds: typing.List[int] = []
                while len(seeds) < n_cores:
                    seed = np.random.randint(low=0, high=1e6, size=None)
                    if seed not in seeds:
                        seeds.append(seed)

                # Calculate new fit parameters using processor pool
                with multiprocessing.Pool(processes=n_cores) as pool:
                    fit_parameters = list(
                        itertools.chain(*pool.map(
                            get_fit_parameters,
                            zip(
                                core_samples,
                                [fit_function for _ in range(n_cores)],
                                [extremes for _ in range(n_cores)],
                                [fixed_parameters for _ in range(n_cores)],
                                seeds,
                            ),
                        )))

            # Calculate confidence bounds
            shapes = np.transpose(fit_parameters)[0]
            scales = np.transpose(fit_parameters)[0] - shapes * threshold
            cil, ciu = np.quantile(
                a=shapes,
                q=[(1 - alpha) / 2, (1 + alpha) / 2],
            )
            shape_parameters["ci_lower"].append(cil)
            shape_parameters["ci_upper"].append(ciu)
            cil, ciu = np.quantile(
                a=scales,
                q=[(1 - alpha) / 2, (1 + alpha) / 2],
            )
            scale_parameters["ci_lower"].append(cil)
            scale_parameters["ci_upper"].append(ciu)

    with plt.rc_context(rc=pyextremes_rc):
        # Create figure
        fig = plt.figure(figsize=figsize, dpi=96)

        # Create gridspec
        gs = matplotlib.gridspec.GridSpec(
            nrows=2,
            ncols=1,
            wspace=0.1,
            hspace=0.1,
            width_ratios=[1],
            height_ratios=[1, 1],
        )

        # Create and configure axes
        ax_shape = fig.add_subplot(gs[0, 0])
        ax_scale = fig.add_subplot(gs[1, 0])

        # Plot central estimates of shape and modified scale parameters
        ax_shape.plot(
            thresholds,
            shape_parameters["values"],
            ls="-",
            color="#F85C50",
            lw=2,
            zorder=15,
        )
        ax_scale.plot(
            thresholds,
            scale_parameters["values"],
            ls="-",
            color="#F85C50",
            lw=2,
            zorder=15,
        )

        # Plot confidence bounds
        if alpha is not None:
            for ci in [
                    shape_parameters["ci_lower"], shape_parameters["ci_upper"]
            ]:
                ax_shape.plot(
                    thresholds,
                    ci,
                    color="#5199FF",
                    lw=1,
                    ls="--",
                    zorder=10,
                )
            ax_shape.fill_between(
                thresholds,
                shape_parameters["ci_lower"],
                shape_parameters["ci_upper"],
                facecolor="#5199FF",
                edgecolor="None",
                alpha=0.25,
                zorder=5,
            )
            for ci in [
                    scale_parameters["ci_lower"], scale_parameters["ci_upper"]
            ]:
                ax_scale.plot(
                    thresholds,
                    ci,
                    color="#5199FF",
                    lw=1,
                    ls="--",
                    zorder=10,
                )
            ax_scale.fill_between(
                thresholds,
                scale_parameters["ci_lower"],
                scale_parameters["ci_upper"],
                facecolor="#5199FF",
                edgecolor="None",
                alpha=0.25,
                zorder=5,
            )

        # Configure axes
        ax_shape.tick_params(axis="x",
                             which="both",
                             labelbottom=False,
                             length=0)
        ax_scale.set_xlim(ax_shape.get_xlim())

        # Label axes
        ax_shape.set_ylabel(r"Shape, $\xi$")
        ax_scale.set_ylabel(r"Modified scale, $\sigma^*$")
        ax_scale.set_xlabel("Threshold")

        return fig, (ax_shape, ax_scale)
def test_get_extremes():
    with pytest.raises(ValueError, match=r"invalid value.*method.*argument"):
        get_extremes(ts=pd.Series([1, 2, 3]),
                     method="BAD METHOD",
                     extremes_type="high")