Beispiel #1
0
    def prepare(self, da, freq, **indexer):
        pfreq, anchor = self.split_freq(freq)
        null = self.is_null(da, freq, **indexer)

        c = null.sum(dim="time")

        # Otherwise simply use the start and end dates to find the expected number of days.
        if pfreq.endswith("S"):
            start_time = c.indexes["time"]
            end_time = start_time.shift(1, freq=freq)
        else:
            end_time = c.indexes["time"]
            start_time = end_time.shift(-1, freq=freq)

        if indexer:
            # Create a full synthetic time series and compare the number of days with the original series.
            t0 = str(start_time[0].date())
            t1 = str(end_time[-1].date())
            if isinstance(c.indexes["time"], xr.CFTimeIndex):
                cal = da.time.encoding.get("calendar")
                t = xr.cftime_range(t0, t1, freq="D", calendar=cal)
            else:
                t = pd.date_range(t0, t1, freq="D")

            sda = xr.DataArray(data=np.ones(len(t)), coords={"time": t}, dims=("time",))
            st = generic.select_time(sda, **indexer)
            count = st.notnull().resample(time=freq).sum(dim="time")

        else:
            n = (end_time - start_time).days
            count = xr.DataArray(n.values, coords={"time": c.time}, dims="time")

        return null, count
Beispiel #2
0
    def is_null(da, freq, **indexer):
        # Compute the number of days in the time series during each period at the given frequency.
        selected = generic.select_time(da, **indexer)
        if selected.time.size == 0:
            raise ValueError("No data for selected period.")

        return selected.isnull().resample(time=freq)
Beispiel #3
0
    def is_null(da, freq, **indexer):
        """Return a boolean array indicating which values are null."""
        selected = generic.select_time(da, **indexer)
        if selected.time.size == 0:
            raise ValueError("No data for selected period.")

        null = selected.isnull()
        if freq:
            return null.resample(time=freq)

        return null
Beispiel #4
0
    def prepare(self, da, freq, src_timestep, **indexer):
        """Prepare arrays to be fed to the `is_missing` function.

        Parameters
        ----------
        da : xr.DataArray
          Input data.
        freq : str
          Resampling frequency defining the periods defined in
          http://pandas.pydata.org/pandas-docs/stable/timeseries.html#resampling.
        src_timestep : {"D", "H"}
          Expected input frequency.
        **indexer : {dim: indexer, }, optional
          Time attribute and values over which to subset the array. For example, use season='DJF' to select winter
          values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given,
          all values are considered.

        Returns
        -------
        xr.DataArray, xr.DataArray
          Boolean array indicating which values are null, array of expected number of valid values.

        Notes
        -----
        If `freq=None` and an indexer is given, then missing values during period at the start or end of array won't be
        flagged.
        """
        # This function can probably be made simpler once CFPeriodIndex is implemented.
        null = self.is_null(da, freq, **indexer)

        pfreq, anchor = self.split_freq(freq)

        c = null.sum(dim="time")

        # Otherwise simply use the start and end dates to find the expected number of days.
        if pfreq.endswith("S"):
            start_time = c.indexes["time"]
            end_time = start_time.shift(1, freq=freq)
        elif pfreq:
            end_time = c.indexes["time"]
            start_time = end_time.shift(-1, freq=freq)
        else:
            i = da.time.to_index()
            start_time = i[:1]
            end_time = i[-1:]

        if indexer:
            # Create a full synthetic time series and compare the number of days with the original series.
            t = date_range(
                start_time[0],
                end_time[-1],
                freq=src_timestep,
                calendar=get_calendar(da),
            )

            sda = xr.DataArray(data=np.ones(len(t)),
                               coords={"time": t},
                               dims=("time", ))
            st = generic.select_time(sda, **indexer)
            if freq:
                count = st.notnull().resample(time=freq).sum(dim="time")
            else:
                count = st.notnull().sum(dim="time")

        else:
            delta = end_time - start_time
            n = delta.astype(_np_timedelta64[src_timestep])

            if freq:
                count = xr.DataArray(n.values,
                                     coords={"time": c.time},
                                     dims="time")
            else:
                count = xr.DataArray(n.values[0] + 1)

        return null, count
 def compute(da: xr.DataArray, **indexer):
     select = select_time(da, **indexer)
     return select.mean(dim="time", keep_attrs=True)