Esempio n. 1
0
    def _adjust(
        self,
        sim: xr.DataArray,
        scen: xr.DataArray,
        *,
        frac: float = 0.25,
        power: float = 1.0,
        interp: str = "linear",
        extrapolation: str = "constant",
    ):
        # Quantiles coord : cheat and assign 0 - 1 so we can use `extrapolate_qm`.
        ds = self.ds.assign(
            quantiles=(np.arange(self.ds.quantiles.size) + 1)
            / (self.ds.quantiles.size + 1)
        )

        scen = extremes_adjust(
            ds.assign(sim=sim, scen=scen),
            cluster_thresh=self.cluster_thresh,
            dist=stats.get_dist("genpareto"),
            frac=frac,
            power=power,
            interp=interp,
            extrapolation=extrapolation,
            group="time",
        )

        return scen
Esempio n. 2
0
def ts_fit_graph(ts, params):
    """Create graphic showing an histogram of the data and the distribution fitted to it.

    Parameters
    ----------
    ts : str
      Path to netCDF file storing the time series.
    params : str
      Path to netCDF file storing the distribution parameters.

    Returns
    -------
    fig
    """
    from xclim.indices.stats import get_dist

    n = ts.nbasins.size
    dist = params.attrs["scipy_dist"]

    fig, axes = plt.subplots(n, figsize=(10, 6), squeeze=False)

    for i in range(n):
        ax = axes.flat[i]
        ax2 = plt.twinx(ax)
        p = params.isel(nbasins=i)

        # Plot histogram of time series as density then as a normal count.
        density, bins, patches = ax.hist(
            ts.isel(nbasins=i).dropna(dim="time"),
            alpha=0.5,
            density=True,
            bins="auto",
            label="__nolabel__",
        )
        ax2.hist(
            ts.isel(nbasins=i).dropna(dim="time"),
            bins=bins,
            facecolor=(1, 1, 1, 0.01),
            edgecolor="gray",
            linewidth=1,
        )

        # Plot pdf of distribution
        dc = get_dist(dist)(*params.isel(nbasins=i))
        mn = dc.ppf(0.01)
        mx = dc.ppf(0.99)
        q = np.linspace(mn, mx, 200)
        pdf = dc.pdf(q)
        ps = ", ".join(["{:.1f}".format(x) for x in p.values])
        ax.plot(q, pdf, "-", label="{}({})".format(params.attrs["scipy_dist"], ps))

        # Labels
        ax.set_xlabel("{} (${:~P}$)".format(ts.long_name, units2pint(ts.units)))
        ax.set_ylabel("Probability density")
        ax2.set_ylabel("Histogram count")

        ax.legend(frameon=False)

    plt.tight_layout()
    return fig
Esempio n. 3
0
 def test_get_lm3_dist(self, dist):
     """Check that parameterization for lmoments3 and scipy is identical."""
     pytest.importorskip("lmoments3")
     dc = stats.get_dist(dist)
     lm3dc = stats.get_lm3_dist(dist)
     par = self.params[dist]
     expected = dc(**par).pdf(self.inputs_pdf)
     values = lm3dc(**par).pdf(self.inputs_pdf)
     np.testing.assert_array_almost_equal(values, expected)
Esempio n. 4
0
def robust_data(request):
    norm = get_dist("norm")
    ref = np.tile(
        np.array([
            norm.rvs(loc=274, scale=0.8, size=(40, ), random_state=r)
            for r in [101083, 19377, 473820, 483625]
        ]),
        (4, 1, 1),
    )
    fut = np.array([
        [
            norm.rvs(loc=loc, scale=sc, size=(40, ), random_state=r)
            for loc, sc, r in shps
        ] for shps in (
            [
                (274.0, 0.7, 176378),
                (274.0, 0.6, 839789),
                (274.0, 0.7, 393239),
                (275.6, 1.1, 747390),
            ],  # 3 no change, 1 positive change
            [
                (272.5, 1.2, 743920),
                (272.4, 0.8, 138489),
                (275.5, 0.8, 673683),
                (275.6, 1.1, 969383),
            ],  # 2 neg change
            [
                (275.6, 0.8, 696857),
                (275.8, 1.2, 379949),
                (276.5, 0.8, 268395),
                (277.6, 1.1, 456544),
            ],  # All pos change
            [
                (np.nan, 0.3, 746323),
                (np.nan, 1.2, 5643723),
                (275.5, 0.8, 118294),
                (275.6, 1.1, 574732),
            ],  # Some NaN
        )
    ])
    ref = xr.DataArray(ref, dims=("lon", "realization", "time"), name="tas")
    ref["time"] = xr.cftime_range("2000-01-01", periods=40, freq="YS")
    fut = xr.DataArray(fut, dims=("lon", "realization", "time"), name="tas")
    fut["time"] = xr.cftime_range("2040-01-01", periods=40, freq="YS")
    if request.param:
        ref = ref.chunk({"lon": 1}).to_dataset()
        fut = fut.chunk({"lon": 1}).to_dataset()
    return ref, fut
Esempio n. 5
0
    def _train(
        cls,
        ref: xr.DataArray,
        hist: xr.DataArray,
        *,
        cluster_thresh: str,
        ref_params: xr.Dataset = None,
        q_thresh: float = 0.95,
    ):
        cluster_thresh = convert_units_to(cluster_thresh, ref)

        # Approximation of how many "quantiles" values we will get:
        N = (1 - q_thresh) * ref.time.size

        # ref_params: cast nan to f32 not to interfere with map_blocks dtype parsing
        #   ref and hist are f32, we want to have f32 in the output.
        ds = extremes_train(
            xr.Dataset(
                {
                    "ref": ref,
                    "hist": hist,
                    "ref_params": ref_params or np.float32(np.NaN),
                }
            ),
            q_thresh=q_thresh,
            cluster_thresh=cluster_thresh,
            dist=stats.get_dist("genpareto"),
            quantiles=np.arange(int(N)),
            group="time",
        )

        ds.px_hist.attrs.update(
            long_name="Probability of extremes in hist",
            description="Parametric probabilities of extremes in the common domain of hist and ref.",
        )
        ds.af.attrs.update(
            long_name="Extremes adjustment factor",
            description="Multiplicative adjustment factor of extremes from hist to ref.",
        )
        ds.thresh.attrs.update(
            long_name=f"{q_thresh * 100}th percentile extreme value threshold",
            description=f"Mean of the {q_thresh * 100}th percentile of large values (x > {cluster_thresh}) of ref and hist.",
        )

        return ds.drop_vars(["quantiles"]), {"cluster_thresh": cluster_thresh}
Esempio n. 6
0
    def test_pwm_fit(self, dist):
        """Test that the fitted parameters match parameters used to generate a random sample."""
        pytest.importorskip("lmoments3")
        n = 500
        dc = stats.get_dist(dist)
        par = self.params[dist]
        da = xr.DataArray(
            dc(**par).rvs(size=n),
            dims=("time", ),
            coords={"time": xr.cftime_range("1980-01-01", periods=n)},
        )
        out = stats.fit(da, dist=dist, method="PWM").compute()

        # Check that values are identical to lmoments3's output dict
        l3dc = stats.get_lm3_dist(dist)
        expected = l3dc.lmom_fit(da.values)
        for key, val in expected.items():
            np.testing.assert_array_equal(out.sel(dparams=key), val, 1)
Esempio n. 7
0
    def adjust(
        self,
        scen: xr.DataArray,
        sim: xr.DataArray,
        frac: float = 0.25,
        power: float = 1.0,
    ):
        """Return second order bias-adjusted data. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        scen: DataArray
          Bias-adjusted time series.
        sim : DataArray
          Time series to be bias-adjusted, source of scen.
        kwargs :
          Algorithm-specific keyword arguments, see class doc.
        """
        if not self._trained:
            raise ValueError("train() must be called before adjusting.")

        def _adjust_extremes_1d(scen, sim, ref_params, thresh, *, dist,
                                cluster_thresh):
            # Clusters of large values of sim
            _, _, sim_posmax, sim_maxs = get_clusters_1d(
                sim, thresh, cluster_thresh)

            new_scen = scen.copy()
            if sim_posmax.size == 0:
                # Happens if everything is under `cluster_thresh`
                return new_scen

            # Fit the dist, force location at thresh
            sim_fit = stats._fitfunc_1d(sim_maxs,
                                        dist=dist,
                                        nparams=len(ref_params),
                                        method="ML",
                                        floc=thresh)

            # Cumulative density function for extreme values in sim's distribution
            sim_cdf = dist.cdf(sim_maxs, *sim_fit)
            # Equivalent value of sim's CDF's but in ref's distribution.
            new_sim = dist.ppf(sim_cdf, *ref_params) + thresh

            # Get the transition weights based on frac and power values
            transition = (((sim_maxs - sim_maxs.min()) /
                           ((sim_maxs.max()) - sim_maxs.min())) / frac)**power
            np.clip(transition, None, 1, out=transition)

            # Apply smooth linear transition between scen and corrected scen
            new_scen_trans = (new_sim * transition) + (scen[sim_posmax] *
                                                       (1.0 - transition))

            # We change new_scen to the new data
            new_scen[sim_posmax] = new_scen_trans
            return new_scen

        new_scen = xr.apply_ufunc(
            _adjust_extremes_1d,
            scen,
            sim,
            self.ds.fit_params,
            self.ds.thresh,
            input_core_dims=[["time"], ["time"], ["dparams"], []],
            output_core_dims=[["time"]],
            vectorize=True,
            kwargs={
                "dist": stats.get_dist("genpareto"),
                "cluster_thresh": convert_units_to(self.cluster_thresh, sim),
            },
            dask="parallelized",
            output_dtypes=[scen.dtype],
        )

        params = f"frac={frac}, power={power}"
        new_scen.attrs["xclim_history"] = update_history(
            f"Second order bias-adjustment with {str(self)}.adjust(sim, {params})",
            sim)
        return new_scen
Esempio n. 8
0
def ts_fit_graph(ts, params):
    """Create graphic showing an histogram of the data and the distribution fitted to it.

    The graphic contains one panel per watershed.

    Parameters
    ----------
    ts : xr.DataArray
      Stream flow time series with dimensions (time, nbasins).
    params : xr.DataArray
      Fitted distribution parameters returned by `xclim.land.fit` indicator.

    Returns
    -------
    fig
      Figure showing a histogram and the parameterized pdf.
    """
    # Note: The hover tool could be customized to show the histogram count in addition to the frequency.
    from xclim.indices.stats import get_dist

    n = ts.nbasins.size
    if n > 1:
        raise NotImplementedError

    ts = ts.isel(nbasins=0)
    params = params.isel(nbasins=0)

    # Using matplotlib's default binning strategy
    hist, bins, mh = plt.hist(ts, bins="auto", density=True)

    # Histogram graphic object
    h = hv.Histogram((hist, bins), kdims=ts.name, label="Histogram")

    # PDF domain
    mn = np.min(bins)
    mx = np.max(bins)
    q = np.linspace(mn, mx, 200)

    # Compute PDF
    dist = params.attrs["scipy_dist"]
    dc = get_dist(dist)(*params)  # Works because dparams is the first dimension.
    pdf = xr.DataArray(
        data=dc.pdf(q),
        dims=(ts.name,) + params.dims[1:],
        coords={ts.name: q},
        name="pdf",
    )

    # PDF line label
    ps = ", ".join(
        [
            f"{key}={x:.1f}".format(x)
            for (key, x) in zip(params.dparams.data, params.values)
        ]
    )
    label = f"{dist}({ps})"

    # PDF graphic object
    p = pdf.hvplot.line(label=label, xlabel=ts.attrs["long_name"], color="orange")

    # Layout
    return (h * p).opts(hv.opts.Histogram(tools=["hover"]))
Esempio n. 9
0
def ts_fit_graph(ts, params):
    """Create graphic showing an histogram of the data and the distribution fitted to it.

    The graphic contains one panel per watershed.

    Parameters
    ----------
    ts : xr.DataArray
      Stream flow time series with dimensions (time, nbasins).
    params : xr.DataArray
      Fitted distribution parameters returned by `xclim.land.fit` indicator.

    Returns
    -------
    fig
      Figure showing a histogram and the parameterized pdf.
    """
    from xclim.indices.stats import get_dist

    n = ts.nbasins.size
    dist = params.attrs["scipy_dist"]

    fig, axes = plt.subplots(n, figsize=(10, 6), squeeze=False)

    for i in range(n):
        ax = axes.flat[i]
        ax2 = plt.twinx(ax)
        p = params.isel(nbasins=i)
        t = ts.isel(nbasins=i).dropna(dim="time")

        # Plot histogram of time series as density then as a normal count.
        density, bins, patches = ax.hist(
            t,
            alpha=0.5,
            density=True,
            bins="auto",
            label="__nolabel__",
        )
        ax2.hist(
            t,
            bins=bins,
            facecolor=(1, 1, 1, 0.01),
            edgecolor="gray",
            linewidth=1,
        )

        # Plot pdf of distribution
        dc = get_dist(dist)(*params.isel(nbasins=i))
        mn = dc.ppf(0.01)
        mx = dc.ppf(0.99)
        q = np.linspace(mn, mx, 200)
        pdf = dc.pdf(q)

        ps = ", ".join([f"{x:.1f}" for x in p.values])
        ax.plot(q,
                pdf,
                "-",
                label="{}({})".format(params.attrs["scipy_dist"], ps))

        # Labels
        ax.set_xlabel(f"{ts.long_name} (${units2pint(ts.units):~P}$)")
        ax.set_ylabel("Probability density")
        ax2.set_ylabel("Histogram count")

        ax.legend(frameon=False)

    plt.tight_layout()
    return fig