Ejemplo n.º 1
0
def test_infer_freq_invalid_inputs():
    # Non-datetime DataArray
    with pytest.raises(ValueError, match="must contain datetime-like objects"):
        xr.infer_freq(xr.DataArray([0, 1, 2]))

    indx = xr.cftime_range("1990-02-03", periods=4, freq="MS")
    # 2D DataArray
    with pytest.raises(ValueError, match="must be 1D"):
        xr.infer_freq(xr.DataArray([indx, indx]))

    # CFTimeIndex too short
    with pytest.raises(ValueError, match="Need at least 3 dates to infer frequency"):
        xr.infer_freq(indx[:2])

    # Non-monotonic input
    assert xr.infer_freq(indx[np.array([0, 2, 1, 3])]) is None

    # Non-unique input
    assert xr.infer_freq(indx[np.array([0, 1, 1, 2])]) is None

    # No unique frequency (here 1st step is MS, second is 2MS)
    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None

    # Same, but for QS
    indx = xr.cftime_range("1990-02-03", periods=4, freq="QS")
    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
Ejemplo n.º 2
0
def missing_from_context(da, freq, src_timestep=None, **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    return FromContext.execute(da,
                               freq,
                               src_timestep,
                               options={},
                               indexer=indexer)
Ejemplo n.º 3
0
def check_freq(var: xr.DataArray,
               freq: Union[str, Sequence[str]],
               strict: bool = True):
    """Raise an error if not series has not the expected temporal frequency or is not monotonically increasing.

    Parameters
    ----------
    var : xr.DataArray
      Input array.
    freq : str or sequence of str
      The expected temporal frequencies, using Pandas frequency terminology ({'A', 'M', 'D', 'H', 'T', 'S', 'L', 'U'}
      and multiples thereof). To test strictly for 'W', pass '7D' with `strict=True`.
      This ignores the start flag and the anchor (ex: 'AS-JUL' will validate against 'Y').
    strict : bool
      Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3H' series
      will not raise an error if freq is set to 'H'.
    """
    if isinstance(freq, str):
        freq = [freq]
    exp_base = [parse_offset(frq)[1] for frq in freq]
    v_freq = xr.infer_freq(var.time)
    if v_freq is None:
        raise ValidationError(
            "Unable to infer the frequency of the time series. "
            "To mute this, set xclim's option data_validation='log'.")
    v_base = parse_offset(v_freq)[1]
    if v_base not in exp_base or (strict and all(
            compare_offsets(v_freq, "!=", frq) for frq in freq)):
        raise ValidationError(
            f"Frequency of time series not {'strictly' if strict else ''} in {freq}. "
            "To mute this, set xclim's option data_validation='log'.")
Ejemplo n.º 4
0
def at_least_n_valid(da,
                     freq,
                     n=1,
                     src_timestep=None,
                     **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    return AtLeastNValid(da, freq, src_timestep, **indexer)(n=n)
Ejemplo n.º 5
0
def missing_pct(da,
                freq,
                tolerance,
                src_timestep=None,
                **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    return MissingPct(da, freq, src_timestep, **indexer)(tolerance=tolerance)
Ejemplo n.º 6
0
def get_wrf_pw_at_dsea_gnss_coord(path=des_path,
                                  work_path=work_yuval,
                                  point=None):
    from PW_stations import produce_geo_gnss_solved_stations
    import xarray as xr
    from aux_gps import get_nearest_lat_lon_for_xy
    from aux_gps import path_glob
    from aux_gps import get_unique_index
    df = produce_geo_gnss_solved_stations(path=work_path / 'gis', plot=False)
    dsea_point = df.loc['dsea'][['lat', 'lon']].astype(float).values
    files = path_glob(path, 'pw_wrfout*.nc')
    pw_list = []
    for file in files:
        pw_all = xr.load_dataset(file)
        freq = xr.infer_freq(pw_all['Time'])
        print(freq)
        if point is not None:
            print('looking for {} at wrf.'.format(point))
            dsea_point = point
        loc = get_nearest_lat_lon_for_xy(pw_all['XLAT'], pw_all['XLONG'],
                                         dsea_point)
        print(loc)
        pw = pw_all.isel(south_north=loc[0][0], west_east=loc[0][1])
        pw_list.append(pw)
    pw_ts = xr.concat(pw_list, 'Time')
    pw_ts = get_unique_index(pw_ts, dim='Time')
    return pw_ts
Ejemplo n.º 7
0
def test_convert_calendar_missing(source, target, freq):
    src = DataArray(
        date_range(
            "2004-01-01",
            "2004-12-31" if source != "360_day" else "2004-12-30",
            freq=freq,
            calendar=source,
        ),
        dims=("time", ),
        name="time",
    )
    da_src = DataArray(np.linspace(0, 1, src.size),
                       dims=("time", ),
                       coords={"time": src})
    out = convert_calendar(da_src, target, missing=np.nan, align_on="date")
    assert infer_freq(out.time) == freq

    expected = date_range(
        "2004-01-01",
        "2004-12-31" if target != "360_day" else "2004-12-30",
        freq=freq,
        calendar=target,
    )
    np.testing.assert_array_equal(out.time, expected)

    if freq != "M":
        out_without_missing = convert_calendar(da_src, target, align_on="date")
        expected_nan = out.isel(time=~out.time.isin(out_without_missing.time))
        assert expected_nan.isnull().all()

        expected_not_nan = out.sel(time=out_without_missing.time)
        assert_identical(expected_not_nan, out_without_missing)
Ejemplo n.º 8
0
def check_daily(var: xr.DataArray):
    """Raise an error if not series has a frequency other that daily, or is not monotonically increasing.

    Note that this does not check for gaps in the series.
    """
    if xr.infer_freq(var.time) != "D":
        raise ValidationError("time series is not recognized as daily.")
Ejemplo n.º 9
0
 def __init__(self, da, freq, src_timestep, **indexer):
     if src_timestep is None:
         src_timestep = xr.infer_freq(da.time)
         if src_timestep is None:
             raise ValueError(
                 "`src_timestep` must be given as it cannot be inferred.")
     self.null, self.count = self.prepare(da, freq, src_timestep, **indexer)
Ejemplo n.º 10
0
def test_infer_freq(freq, calendar):
    indx = xr.cftime_range("2000-01-01",
                           periods=3,
                           freq=freq,
                           calendar=calendar)
    out = xr.infer_freq(indx)
    assert out == freq
Ejemplo n.º 11
0
def align_synoptic_class_with_daily_dataset(ds, time_dim='time'):
    import xarray as xr
    assert xr.infer_freq(ds[time_dim]) == 'D'
    #    ds = ds.resample({time_dim: '1D'}, keep_attrs=True).mean(keep_attrs=True)
    syn = read_synoptic_classification(report=False).to_xarray()
    ds['syn_class'] = syn['class']
    ds['upper_class'] = syn['upper_class']
    return ds
Ejemplo n.º 12
0
def missing_wmo(da,
                freq,
                nm=11,
                nc=5,
                src_timestep=None,
                **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    missing = MissingWMO(da, "M", src_timestep, **indexer)(nm=nm, nc=nc)
    return missing.resample(time=freq).any()
Ejemplo n.º 13
0
def date_range_like(source, calendar):
    """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar.

    Parameters
    ----------
    source : xr.DataArray
      1D datetime coordinate DataArray
    calendar : str
      New calendar name.

    Raises
    ------
    ValueError
      If the source's frequency was not found.

    Returns
    -------
    xr.DataArray
      1D datetime coordinate with the same start, end and frequency as the source, but in the new calendar.
        The start date is assumed to exist in the target calendar.
        If the end date doesn't exist, the code tries 1 and 2 calendar days before.
        Exception when the source is in 360_day and the end of the range is the 30th of a 31-days month,
        then the 31st is appended to the range.
    """
    freq = xr.infer_freq(source)
    if freq is None:
        raise ValueError(
            "`date_range_like` was unable to generate a range as the source frequency was not inferrable."
        )

    src_cal = get_calendar(source)
    if src_cal == calendar:
        return source

    index = source.indexes[source.dims[0]]
    end_src = index[-1]
    end = _convert_datetime(end_src, calendar=calendar)
    if end is np.nan:  # Day is invalid, happens at the end of months.
        end = _convert_datetime(end_src.replace(day=end_src.day - 1), calendar=calendar)
        if end is np.nan:  # Still invalid : 360_day to non-leap february.
            end = _convert_datetime(
                end_src.replace(day=end_src.day - 2), calendar=calendar
            )
    if src_cal == "360_day" and end_src.day == 30 and end.daysinmonth == 31:
        # For the specific case of daily data from 360_day source, the last day is expected to be "missing"
        end = end.replace(day=31)

    return xr.DataArray(
        date_range(
            _convert_datetime(index[0], calendar=calendar),
            end,
            freq=freq,
            calendar=calendar,
        ),
        dims=source.dims,
        name=source.dims[0],
    )
Ejemplo n.º 14
0
def check_daily(var):
    """Assert that the series is daily and monotonic (no jumps in time index).

    A ValidationError is raised otherwise.
    """
    if xr.infer_freq(var.time.to_pandas()) != "D":
        raise ValidationError("time series is not recognized as daily.")

    # Check that the series does not go backward in time
    if not var.indexes["time"].is_monotonic_increasing:
        raise ValidationError("time index is not monotonically increasing.")
Ejemplo n.º 15
0
def missing_wmo(da,
                freq,
                nm=11,
                nc=5,
                src_timestep=None,
                **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    return MissingWMO.execute(da,
                              freq,
                              src_timestep,
                              options=dict(nm=nm, nc=nc),
                              indexer=indexer)
Ejemplo n.º 16
0
def _to_quarter(
    pr: Optional[xarray.DataArray] = None,
    tas: Optional[xarray.DataArray] = None,
) -> xarray.DataArray:
    """Convert daily, weekly or monthly time series to quarterly time series according to ANUCLIM specifications."""
    if tas is not None and pr is not None:
        raise ValueError(
            "Supply only one variable, 'tas' (exclusive) or 'pr'.")

    freq = xarray.infer_freq((tas if tas is not None else pr).time)
    if freq is None:
        raise ValueError("Can't infer sampling frequency of the input data.")

    if freq.upper().startswith("D"):
        if tas is not None:
            tas = tg_mean(tas, freq="7D")

        if pr is not None:
            # Accumulate on a week
            # Ensure units are back to a "rate" for rate2amount below
            pr = convert_units_to(precip_accumulation(pr, freq="7D"), "mm")
            pr.attrs["units"] = "mm/week"

        freq = "W"

    if freq.upper().startswith("W"):
        window = 13

    elif freq.upper().startswith("M"):
        window = 3

    else:
        raise NotImplementedError(
            f'Unknown input time frequency "{freq}": must be one of "D", "W" or "M".'
        )

    if tas is not None:
        tas = ensure_chunk_size(tas, time=np.ceil(window / 2))
        out = tas.rolling(time=window, center=False).mean(skipna=False)
        out.attrs = tas.attrs
    elif pr is not None:
        pr = ensure_chunk_size(pr, time=np.ceil(window / 2))
        pram = rate2amount(pr)
        out = pram.rolling(time=window, center=False).sum()
        out.attrs = pr.attrs
        out.attrs["units"] = pram.units
    else:
        raise ValueError("No variables supplied.")

    out = ensure_chunk_size(out, time=-1)
    return out
Ejemplo n.º 17
0
def water_budget(
    pr: xarray.DataArray,
    tasmin: Optional[xarray.DataArray] = None,
    tasmax: Optional[xarray.DataArray] = None,
    tas: Optional[xarray.DataArray] = None,
    method: str = "BR65",
) -> xarray.DataArray:
    r"""Precipitation minus potential evapotranspiration.

    Precipitation minus potential evapotranspiration as a measure of an approximated surface water budget,
    where the potential evapotranspiration is calculated with a given method.

    Parameters
    ----------
    pr : xarray.DataArray
      Daily precipitation.
    tasmin : xarray.DataArray
      Minimum daily temperature.
    tasmax : xarray.DataArray
      Maximum daily temperature.
    tas : xarray.DataArray
      Mean daily temperature.
    method : str
      Method to use to calculate the potential evapotranspiration.

    Notes
    -----
    Available methods are listed in the description of xclim.indicators.atmos.potential_evapotranspiration.

    Returns
    -------
    xarray.DataArray,
      Precipitation minus potential evapotranspiration.
    """
    pr = convert_units_to(pr, "kg m-2 s-1")

    pet = xci.potential_evapotranspiration(tasmin=tasmin,
                                           tasmax=tasmax,
                                           tas=tas,
                                           method=method)

    if xarray.infer_freq(pet.time) == "MS":
        with xarray.set_options(keep_attrs=True):
            pr = pr.resample(time="MS").mean(dim="time")

    out = pr - pet

    out.attrs["units"] = pr.attrs["units"]
    return out
Ejemplo n.º 18
0
def infer_sampling_units(da: xr.DataArray,
                         deffreq: str = "D") -> Tuple[int, str]:
    """Infer a multiplicator and the units corresponding to one sampling period.

    If `xr.infer_freq` fails, returns `deffreq`.
    """
    freq = xr.infer_freq(da.time)
    if freq is None:
        freq = deffreq

    multi, base, _ = parse_offset(freq)
    try:
        return int(multi or "1"), FREQ_UNITS[base]
    except KeyError:
        raise ValueError(
            f"Sampling frequency {freq} has no corresponding units.")
Ejemplo n.º 19
0
def test_infer_freq_valid_types():
    cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
    assert xr.infer_freq(cf_indx) == "D"
    assert xr.infer_freq(xr.DataArray(cf_indx)) == "D"

    pd_indx = pd.date_range("2000-01-01", periods=3, freq="D")
    assert xr.infer_freq(pd_indx) == "D"
    assert xr.infer_freq(xr.DataArray(pd_indx)) == "D"

    pd_td_indx = pd.timedelta_range(start="1D", periods=3, freq="D")
    assert xr.infer_freq(pd_td_indx) == "D"
    assert xr.infer_freq(xr.DataArray(pd_td_indx)) == "D"
Ejemplo n.º 20
0
def test_convert_calendar_missing(source, target, freq):
    src = xr.DataArray(
        date_range(
            "2004-01-01",
            "2004-12-31" if source != "360_day" else "2004-12-30",
            freq=freq,
            calendar=source,
        ),
        dims=("time",),
        name="time",
    )
    da_src = xr.DataArray(
        np.linspace(0, 1, src.size), dims=("time",), coords={"time": src}
    )
    out = convert_calendar(da_src, target, missing=np.nan, align_on="date")
    assert xr.infer_freq(out.time) == freq
    if source == "360_day":
        assert out.time[-1].dt.day == 31
Ejemplo n.º 21
0
def infer_sampling_units(
    da: xr.DataArray,
    deffreq: Optional[str] = "D",
    dim: str = "time",
) -> Tuple[int, str]:
    """Infer a multiplicator and the units corresponding to one sampling period.

    Parameters
    ----------
    da : xr.DataArray
      A DataArray from which to take coordinate `dim`.
    deffreq : str
      If no frequency is inferred from `da[dim]`, take this one.
    dim : str
      Dimension from which to infer the frequency.

    Raises
    ------
    ValueError
      If the frequency has no exact corresponding units.

    Returns
    -------
    m : int
      The magnitude (number of base periods per period)
    u : str
      Units as a string, understandable by pint.
    """
    dimmed = getattr(da, dim)
    freq = xr.infer_freq(dimmed)
    if freq is None:
        freq = deffreq

    multi, base, _, _ = parse_offset(freq)
    try:
        out = multi, FREQ_UNITS[base]
    except KeyError:
        raise ValueError(
            f"Sampling frequency {freq} has no corresponding units.")
    if out == (7, "d"):
        # Special case for weekly frequency. xarray's CFTimeOffsets do not have "W".
        return 1, "week"
    return out
Ejemplo n.º 22
0
def check_freq(var: xr.DataArray, freq: str, strict: bool = True):
    """Raise an error if not series has not the expected temporal frequency or is not monotonically increasing.

    Parameters
    ----------
    var : xr.DataArray
      Input array.
    freq : str
      The temporal frequency defined using the Pandas frequency strings, e.g. 'A', 'M', 'D', 'H', 'T',
      'S'. Note that a 3-hourly time series is declared as '3H'.
    strict : bool
      Whether or not multiples of the frequency are considered invalid. With `strict` set to False, a '3H' series
      will not raise an error if freq is set to 'H'.
    """
    v_freq = xr.infer_freq(var.time)
    if v_freq != freq:
        if (freq in v_freq) and not strict:
            return
        raise ValidationError(
            "Time series has temporal frequency `{v_freq}`, expected `{freq}`."
        )
Ejemplo n.º 23
0
def _get_number_of_elements_by_year(time):
    """Get the number of elements in time in a year by inferring its sampling frequency.

    Only calendar with uniform year lengths are supported : 360_day, noleap, all_leap.
    """
    cal = get_calendar(time)

    # Calendar check
    if cal in ["standard", "gregorian", "default", "proleptic_gregorian"]:
        raise ValueError(
            "For moving window computations, the data must have a uniform calendar (360_day, no_leap or all_leap)"
        )

    mult, freq, _, _ = parse_offset(xr.infer_freq(time))
    days_in_year = max_doy[cal]
    elements_in_year = {"Q": 4, "M": 12, "D": days_in_year, "H": days_in_year * 24}
    N_in_year = elements_in_year.get(freq, 1) / mult
    if N_in_year % 1 != 0:
        raise ValueError(
            f"Sampling frequency of the data must be Q, M, D or H and evenly divide a year (got {mult}{freq})."
        )

    return int(N_in_year)
Ejemplo n.º 24
0
def _rate_and_amount_converter(da: xr.DataArray,
                               dim: str = "time",
                               to: str = "amount",
                               out_units: str = None) -> xr.DataArray:
    """Private function performing the actual conversion for :py:func:`rate2amount` and :py:func:`amount2rate`."""
    m = 1
    u = None  # Default to assume a non-uniform axis
    label = "lower"
    time = da[dim]

    try:
        freq = xr.infer_freq(da[dim])
    except ValueError:
        freq = None
    if freq is not None:
        multi, base, start_anchor, _ = parse_offset(freq)
        if base in ["M", "Q", "A"]:
            start = time.indexes[dim][0]
            if not start_anchor:
                # Anchor is on the end of the period, substract 1 period.
                start = start - xr.coding.cftime_offsets.to_offset(freq)
                # In the diff below, assign to upper label!
                label = "upper"
            # We generate "time" with an extra element, so we do not need to repeat the last element below.
            time = xr.DataArray(
                date_range(start,
                           periods=len(time) + 1,
                           freq=freq,
                           calendar=get_calendar(time)),
                dims=(dim, ),
                name=dim,
                attrs=da[dim].attrs,
            )
        else:
            m, u = multi, FREQ_UNITS[base]

    # Freq is month, season or year, which are not constant units, or simply freq is not inferrable.
    if u is None:
        # Get sampling period lengths in nanoseconds
        # In the case with no freq, last period as the same length as the one before.
        # In the case with freq in M, Q, A, this has been dealt with above in `time`
        # and `label` has been updated accordingly.
        dt = (time.diff(dim,
                        label=label).reindex({
                            dim: da[dim]
                        }, method="ffill").astype(float))
        dt = dt / 1e9  # Convert to seconds

        if to == "amount":
            tu = (str2pint(da.units) * str2pint("s")).to_reduced_units()
            out = da * dt * tu.m
        elif to == "rate":
            tu = (str2pint(da.units) / str2pint("s")).to_reduced_units()
            out = (da / dt) * tu.m
        else:
            raise ValueError("Argument `to` must be one of 'amout' or 'rate'.")

        out.attrs["units"] = pint2cfunits(tu)

    else:
        q = units.Quantity(m, u)
        if to == "amount":
            out = pint_multiply(da, q)
        elif to == "rate":
            out = pint_multiply(da, 1 / q)
        else:
            raise ValueError("Argument `to` must be one of 'amout' or 'rate'.")

    if out_units:
        out = convert_units_to(out, out_units)

    return out
Ejemplo n.º 25
0
def visualize_synoptic_class_on_time_series(da_ts,
                                            path=climate_path,
                                            ax=None,
                                            leg_ncol=1,
                                            add_mm=False,
                                            leg_loc=1,
                                            second_da_ts=None,
                                            twin=None):
    import xarray as xr
    import matplotlib.pyplot as plt
    from aux_gps import replace_time_series_with_its_group
    time_dim = list(set(da_ts.dims))[0]
    assert xr.infer_freq(da_ts[time_dim]) == 'D'
    if ax is None:
        fig, ax = plt.subplots()
    # also calc the monthly means:
    if add_mm:
        da_ts_mm = replace_time_series_with_its_group(da_ts, grp='month')
        da_ts_mm.plot.line('k-.', ax=ax)
    if isinstance(da_ts, xr.Dataset):
        styles = ['r-', 'g-', 'b-']
        lns = []
        for i, st in enumerate(da_ts):
            lbl = st.upper()
            ln = da_ts[st].plot.line(styles[i],
                                     lw=2,
                                     ax=ax,
                                     zorder=20,
                                     label=lbl)
            lns.append(ln)
        da_ts = da_ts[st]
    else:
        # plot daily values:
        da_ts.plot.line('k-', lw=2, ax=ax, zorder=20)
    if second_da_ts is not None:
        # record the corr between second_da_ts and da_ts:
        corr_all = xr.corr(da_ts, second_da_ts).item()
        corr_oct = xr.corr(
            da_ts.sel(time=da_ts['time.month'] == 10),
            second_da_ts.sel(time=second_da_ts['time.month'] == 10)).item()
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        textstr = '\n'.join([
            'r_all = {:.2f}'.format(corr_all),
            'r_just_Oct = {:.2f}'.format(corr_oct)
        ])
        # textstr = 'r_all = {:.2f}'.format(corr)
        # place a text box in upper left in axes coords
        ax.text(0.05,
                0.95,
                textstr,
                transform=ax.transAxes,
                fontsize=14,
                verticalalignment='top',
                bbox=props)
        try:
            if second_da_ts.attrs['units'] == da_ts.attrs['units']:
                second_da_ts.plot.line('k--', lw=2, ax=ax, marker='o')
            else:
                twinx = ax.twinx()
                second_da_ts.plot.line('k--', lw=2, ax=twinx, marker='o')
                if twin is not None:
                    twinx.set_ylim(*twin)
        except KeyError:
            twinx = ax.twinx()
            second_da_ts.plot.line('k--', lw=2, ax=twinx, marker='o')
            if twin is not None:
                twinx.set_ylim(*twin)
    # ymin, ymax = ax.get_ylim()
    df = read_synoptic_classification(path, report=False)
    ind = da_ts.to_dataframe().index
    da_ts = align_synoptic_class_with_daily_dataset(da_ts)
    df = df.loc[ind]
    color_dict, edge_dict = choose_color_for_synoptic_classification()
    #    df['color'] = df['class'].map(color_dict)
    # monthly count of synoptics:
    month_counts = agg_month_count_syn_class(freq=False)
    min_year = da_ts[time_dim].min().dt.year.item()
    min_month = da_ts[time_dim].min().dt.month.item()
    max_year = da_ts[time_dim].max().dt.year.item()
    max_month = da_ts[time_dim].max().dt.month.item()
    min_dt = '{}-{}'.format(min_year, min_month)
    max_dt = '{}-{}'.format(max_year, max_month)
    month_counts = month_counts.sel(time=slice(min_dt, max_dt))
    # alternative count since we need not just monthly but by time slice:
    grp_dict = df.groupby('class').groups
    for key_class, key_ind in grp_dict.items():
        color = color_dict[key_class]
        edge_color = edge_dict[key_class]
        abbr = add_class_abbr(key_class)
        # abbr_count = month_counts.sel(syn_cls=key_class).sum().item()
        abbr_count = df[df['class'] == key_class].count().values[0]
        abbr_label = r'${{{}}}$: {}'.format(abbr, int(abbr_count))
        #    for ind, row in df.iterrows():
        da_ts[da_ts['syn_class'] == key_class].plot.line(
            'k-',
            lw=0,
            ax=ax,
            marker='o',
            markersize=20,
            markerfacecolor=color,
            markeredgewidth=2,
            markeredgecolor=edge_color,
            label=abbr_label)
        # ax.vlines(key_ind, 0, 80, colors=color, alpha=0.4, lw=10,
        #           label=abbr_label)
    ax.legend(ncol=leg_ncol, labelspacing=1.5, fontsize=12, loc=leg_loc)
    ax.grid()
    return ax
Ejemplo n.º 26
0
 def freq(self):
     if self.is_temporal:
         return xr.infer_freq(self._obj)
     else:
         return None
Ejemplo n.º 27
0
def missing_any(da, freq, src_timestep=None, **indexer):  # noqa: D103
    src_timestep = src_timestep or xr.infer_freq(da.time)
    return MissingAny(da, freq, src_timestep, **indexer)()
Ejemplo n.º 28
0
def aggregate_between_dates(
    data: xr.DataArray,
    start: Union[xr.DataArray, DayOfYearStr],
    end: Union[xr.DataArray, DayOfYearStr],
    op: str = "sum",
    freq: Optional[str] = None,
) -> xr.DataArray:
    """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data.

    Parameters
    ----------
    data : xr.DataArray
      Data to aggregate between start and end dates.
    start : xr.DataArray or DayOfYearStr
      Start dates (as day-of-year) for the aggregation periods.
    end : xr.DataArray or DayOfYearStr
      End (as day-of-year) dates for the aggregation periods.
    op : {'min', 'max', 'sum', 'mean', 'std'}
      Operator.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
      Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan.
      If there is no start and/or end date, returns np.nan.
    """
    def _get_days(_bound, _group, _base_time):
        """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr."""
        if isinstance(_bound, str):
            b_i = rl.index_of_date(_group.time, _bound, max_idxs=1)  # noqa
            if not len(b_i):
                return None
            return (_group.time.isel(time=b_i[0]) -
                    _group.time.isel(time=0)).dt.days
        if _base_time in _bound.time:
            return _bound.sel(time=_base_time)
        return None

    if freq is None:
        frequencies = []
        for i, bound in enumerate([start, end], start=1):
            try:
                frequencies.append(xr.infer_freq(bound.time))
            except AttributeError:
                frequencies.append(None)

        good_freq = set(frequencies) - {None}

        if len(good_freq) != 1:
            raise ValueError(
                f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}."
                " Please consider providing `freq` manually.")
        freq = good_freq.pop()

    cal = get_calendar(data, dim="time")

    if not isinstance(start, str):
        start = convert_calendar(start, cal)
        start.attrs["calendar"] = cal
        start = doy_to_days_since(start)
    if not isinstance(end, str):
        end = convert_calendar(end, cal)
        end.attrs["calendar"] = cal
        end = doy_to_days_since(end)

    out = list()
    for base_time, indexes in data.resample(time=freq).groups.items():
        # get group slice
        group = data.isel(time=indexes)

        start_d = _get_days(start, group, base_time)
        end_d = _get_days(end, group, base_time)

        # convert bounds for this group
        if start_d is not None and end_d is not None:

            days = (group.time - base_time).dt.days
            days[days < 0] = np.nan

            masked = group.where((days >= start_d) & (days <= end_d - 1))
            res = getattr(masked, op)(dim="time", skipna=True)
            res = xr.where(
                ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())),
                np.nan, res)
            # Re-add the time dimension with the period's base time.
            res = res.expand_dims(time=[base_time])
            out.append(res)
        else:
            # Get an array with the good shape, put nans and add the new time.
            res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time])
            out.append(res)
            continue

    out = xr.concat(out, dim="time")
    return out
Ejemplo n.º 29
0
def open_dataset(
    infiles,
    file_format=None,
    chunks=None,
    metadata_file=None,
    variables=[],
    spatial_coords=None,
    shapefile=None,
    shapefile_label_header=None,
    shape_overlap=None,
    combine_shapes=False,
    spatial_agg="none",
    lat_dim="lat",
    lon_dim="lon",
    standard_calendar=False,
    no_leap_days=False,
    rolling_sum_window=None,
    time_freq=None,
    time_agg=None,
    month=None,
    reset_times=False,
    complete_time_agg_periods=False,
    input_freq=None,
    time_dim="time",
    isel={},
    sel={},
    scale_factors={},
    units={},
    units_timing="end",
):
    """Create an xarray Dataset from one or more data files.

    Parameters
    ----------
    infiles : str or list
        Input file path/s
    file_format : str, optional
        Formats/engines accepted by xarray.open_dataset (e.g. netcdf4, zarr, cfgrid).
        Estimated if not provided.
    chunks : dict, optional
        Chunks for xarray.open_mfdataset
    metadata_file : str
        YAML file path specifying required file metadata changes
    variables : list, optional
        Subset of variables of interest
    spatial_coords : list, optional
        Coordinates for spatial point or box selection.
        List of length 2 [lat, lon], 4 [south bound, north bound, east bound, west bound]
    shapefile : str, optional
        Shapefile for spatial subseting
    shapefile_label_header : str
        Name of the shapefile column containing the region names
    shape_overlap : float, optional
        Fraction that a grid cell must overlap with a shape to be included.
        If no fraction is provided, grid cells are selected if their centre
        point falls within the shape.
    combine_shapes : bool, default False
        Add a region that combines all shapes
    spatial_agg : {'mean', 'sum', 'weighted_mean'}, optional
        Spatial aggregation method
    lat_dim: str, default 'lat'
        Name of the latitude dimension in infiles
    lon_dim: str, default 'lon'
        Name of the longitude dimension in infiles
    no_leap_days : bool, default False
        Remove leap days from data
    rolling_sum_window : int, default None
        Apply a rolling sum with this window width
    time_freq : {'A-DEC', 'M', 'Q-NOV', 'A-NOV', 'A-AUG'}, optional
        Target temporal frequency for resampling
    time_agg : {'mean', 'sum', 'min', 'max'}, optional
        Temporal aggregation method
    standard_calendar : bool, default False
        Force a common calendar on all input files
    month : {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, optional
        Select a single month from the dataset
    reset_times : bool, default False
        Shift time values after resampling so months match initial date
    complete_time_agg_periods : bool default False
        Limit temporal aggregation output to complete years/months
    input_freq : {'A', 'Q', 'M', 'D'}, optional
        Input time frequency for resampling (estimated if not provided)
    time_dim: str, default 'time'
        Name of the time dimension in infiles
    isel : dict, optional
        Selection using xarray.Dataset.isel
    sel : dict, optional
        Selection using xarray.Dataset.sel
    scale_factors : dict, optional
        Divide input data by this value.
        Variable/s (keys) and scale factor (values).
        Scale factors can be a float or "days_in_month"
    units : dict, optional
        Variable/s (keys) and desired units (values)
    units_timing : str, {'start', 'middle', 'end'}, default 'end'
        When to perform the unit conversions in units
        Middle is after the spatial aggregation but before the temporal aggregation

    Returns
    -------
    ds : xarray Dataset
    """

    preprocess = time_utils.switch_calendar if standard_calendar else None
    engine = file_format if file_format else _guess_file_format(infiles)
    ds = xr.open_mfdataset(infiles,
                           engine=engine,
                           preprocess=preprocess,
                           use_cftime=True,
                           chunks=chunks)

    # Metadata
    if metadata_file:
        ds = _fix_metadata(ds, metadata_file)

    # Variable selection
    if variables:
        ds = ds[variables]

    # General selection/subsetting
    if isel:
        ds = ds.isel(isel)
    if sel:
        ds = ds.sel(sel)
    if month:
        ds = time_utils.select_month(ds,
                                     month,
                                     init_month=reset_times,
                                     time_dim=time_dim)

    # Scale factors
    if scale_factors:
        with xr.set_options(keep_attrs=True):
            for var, scale_factor in scale_factors.items():
                if scale_factor == "days_in_month":
                    ds[var] = ds[var] / ds[time_dim].dt.days_in_month
                else:
                    ds[var] = ds[var] / scale_factor

    # Unit conversion (at start)
    if units and (units_timing == "start"):
        for var, target_units in units.items():
            ds[var] = general_utils.convert_units(ds[var], target_units)

    # Spatial subsetting and aggregation
    spatial_coord_agg = "none" if shapefile else spatial_agg
    if spatial_coords is None:
        pass
    elif len(spatial_coords) == 4:
        ds = spatial_selection.select_box_region(ds,
                                                 spatial_coords,
                                                 agg=spatial_coord_agg,
                                                 lat_dim=lat_dim,
                                                 lon_dim=lon_dim)
    elif len(spatial_coords) == 2:
        ds = spatial_selection.select_point_region(ds,
                                                   spatial_coords,
                                                   lat_dim=lat_dim,
                                                   lon_dim=lon_dim)
    else:
        msg = "coordinate selection must be None, a box (list of 4 floats) or a point (list of 2 floats)"
        raise ValueError(msg)
    if shapefile:
        shapes = gp.read_file(shapefile)
        ds = spatial_selection.select_shapefile_regions(
            ds,
            shapes,
            agg=spatial_agg,
            overlap_fraction=shape_overlap,
            header=shapefile_label_header,
            combine_shapes=combine_shapes,
            lat_dim=lat_dim,
            lon_dim=lon_dim,
        )

    # Unit conversion (at middle)
    if units and (units_timing == "middle"):
        for var, target_units in units.items():
            ds[var] = general_utils.convert_units(ds[var], target_units)

    # Temporal aggregation
    if no_leap_days:
        ds = ds.sel(
            time=~((ds[time_dim].dt.month == 2) & (ds[time_dim].dt.day == 29)))
    if rolling_sum_window:
        ds = ds.rolling({time_dim: rolling_sum_window}).sum()
    if time_freq:
        assert time_agg, "Provide a time_agg"
        assert variables, "Variables argument is required for temporal aggregation"
        if not input_freq:
            input_freq = xr.infer_freq(ds.indexes[time_dim][0:3])[0]
        ds = time_utils.temporal_aggregation(
            ds,
            time_freq,
            input_freq,
            time_agg,
            variables,
            reset_times=reset_times,
            complete=complete_time_agg_periods,
        )
    output_freq = time_freq[0] if time_freq else input_freq
    if output_freq:
        ds[time_dim].attrs["frequency"] = output_freq

    # Unit conversion (at end)
    if units and (units_timing == "end"):
        for var, target_units in units.items():
            ds[var] = general_utils.convert_units(ds[var], target_units)

    assert type(ds) == xr.core.dataset.Dataset
    ds = ds.squeeze(drop=True)

    return ds
Ejemplo n.º 30
0
def my_shift(init, lead):
    """Shift CFTimeIndex init by amount lead in units lead_unit."""
    if isinstance(init, xr.DataArray):
        init = init.to_index()
    init_calendar = init.calendar
    if isinstance(lead, xr.DataArray):
        lead_unit = lead.attrs["units"]
        lead = lead.values

    if lead_unit in ["years", "seasons", "months"
                     ] and "360" not in init_calendar:
        if int(lead) != float(lead):
            raise CoordinateError(
                f'Require integer leads if lead.attrs["units"]="{lead_unit}" in '
                f'["years", "seasons", "months"] and calendar="{init_calendar}" '
                'not "360_day".')
        lead = int(lead)

    if "360" in init_calendar:  # use pd.Timedelta
        if lead_unit == "years":
            lead = lead * 360
            lead_unit = "D"
        elif lead_unit == "seasons":
            lead = lead * 90
            lead_unit = "D"
        elif lead_unit == "months":
            lead_unit = "D"
            lead = lead * 30

    if lead_unit in ["years", "seasons", "months"]:
        # use init_freq reconstructed from anchor and lead unit
        from xarray.coding.frequencies import month_anchor_check

        anchor_check = month_anchor_check(init)  # returns None, ce or cs
        if anchor_check is not None:
            lead_freq_string = lead_unit[0].upper()  # A for years, D for days
            # go down to monthly freq
            if lead_freq_string == "Y":
                lead_freq_string = "12M"
            elif lead_freq_string == "S":
                lead_freq_string = "3M"
            anchor = anchor_check[-1].upper()  # S/E for start/end of month
            if anchor == "E":
                anchor = ""
            lead_freq = f"{lead_freq_string}{anchor}"
            if lead_freq_string in ["A", "Q"]:  # add month info again
                init_freq = xr.infer_freq(init)
                if init_freq:
                    if "-" in init_freq:
                        lead_freq = lead_freq + "-" + init_freq.split("-")[-1]
        else:
            raise ValueError(
                f"could not shift init={init} in calendar={init_calendar} by "
                f" lead={lead} {lead_unit}")
        return init.shift(lead, lead_freq)
    else:  # lower freq
        # reducing pentads, weeks (W) to days
        if lead_unit == "weeks":
            lead_unit = "W"
        elif lead_unit == "pentads":
            lead = lead * 5
            lead_unit = "D"
        return init + pd.Timedelta(float(lead), lead_unit)