Ejemplo n.º 1
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform monthly aggregation of a daily dataset according to the given
    method.

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValueError(
            'Temporal aggregation operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    # Check if we have a daily dataset
    try:
        if ds.attrs['time_coverage_resolution'] != 'P1D':
            raise ValueError(
                'Temporal aggregation operation expects a daily dataset')
    except KeyError:
        raise ValueError('Could not determine temporal resolution. Running'
                         ' the adjust_temporal_attrs operation beforehand may'
                         ' help.')

    with monitor.observing("resample dataset"):
        retset = ds.resample(freq='MS',
                             dim='time',
                             keep_attrs=True,
                             how=method)

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                    retset[var].attrs['cell_methods'] + \
                    ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(
                method)

    return adjust_temporal_attrs(retset)
Ejemplo n.º 2
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         output_resolution: str = 'month',
                         custom_resolution: str = None,
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform aggregation of dataset according to the given
    method and output resolution.

    Note that the operation does not perform weighting. Depending on the
    combination of input and output resolutions, as well as aggregation
    method, the resulting dataset might yield unexpected results.

    Resolution 'month' will result in a monthly dataset with each month
    denoted by its first date. Resolution 'season' will result in a dataset
    aggregated to DJF, MAM, JJA, SON seasons, each denoted by the first
    date of the season.

    The operation also works with custom resolution strings, see:
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    If ``custom_resolution`` is provided, it will override ``output_resolution``.

    Some examples:
      'QS-JUN' produces an output dataset on a quarterly resolution where the
      year ends in 1st of June and each quarter is denoted by its first date
      '8MS' produces an output dataset on an eight-month resolution where each
      period is denoted by the first date. Note that such periods will not be
      consistent over years.
      '8D' produces a dataset on an eight day resolution

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :param output_resolution: Desired temporal resolution of the output dataset
    :param custom_resolution: Custom temporal resolution, overrides output_resolution
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError(
            'Temporal aggregation operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    # Try to figure out the input frequency
    try:
        in_freq = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError(
            'Could not determine temporal resolution of input dataset.'
            ' Running the adjust_temporal_attrs operation beforehand may'
            ' help.')

    if custom_resolution:
        freq = custom_resolution
    else:
        frequencies = {'month': 'MS', 'season': 'QS-DEC'}
        freq = frequencies[output_resolution]

    _validate_freq(in_freq, freq)

    with monitor.observing("resample dataset"):
        try:
            retset = getattr(resampler, method)(ds.resample(time=freq,
                                                            keep_attrs=True))
        except AttributeError:
            raise ValidationError(
                f'Provided aggregation method {method} is not valid.')

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                retset[var].attrs['cell_methods'] + \
                ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(
                method)

    return adjust_temporal_attrs(retset)
Ejemplo n.º 3
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         output_resolution: str = 'month',
                         custom_resolution: str = None,
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform aggregation of dataset according to the given
    method and output resolution.

    Note that the operation does not perform weighting. Depending on the
    combination of input and output resolutions, as well as aggregation
    method, the resulting dataset might yield unexpected results.

    Resolution 'month' will result in a monthly dataset with each month
    denoted by its first date. Resolution 'season' will result in a dataset
    aggregated to DJF, MAM, JJA, SON seasons, each denoted by the first
    date of the season.

    The operation also works with custom resolution strings, see:
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    If ``custom_resolution`` is provided, it will override ``output_resolution``.

    Some examples:
      'QS-JUN' produces an output dataset on a quarterly resolution where the
      year ends in 1st of June and each quarter is denoted by its first date
      '8MS' produces an output dataset on an eight-month resolution where each
      period is denoted by the first date. Note that such periods will not be
      consistent over years.
      '8D' produces a dataset on an eight day resolution

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :param output_resolution: Desired temporal resolution of the output dataset
    :param custom_resolution: Custom temporal resolution, overrides output_resolution
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError('Temporal aggregation operation expects a dataset with the'
                              ' time coordinate of type datetime64[ns], but received'
                              ' {}. Running the normalize operation on this'
                              ' dataset may help'.format(ds.time.dtype))

    # Try to figure out the input frequency
    try:
        in_freq = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError('Could not determine temporal resolution of input dataset.'
                              ' Running the adjust_temporal_attrs operation beforehand may'
                              ' help.')

    if custom_resolution:
        freq = custom_resolution
    else:
        frequencies = {'month': 'MS', 'season': 'QS-DEC'}
        freq = frequencies[output_resolution]

    _validate_freq(in_freq, freq)

    with monitor.observing("resample dataset"):
        try:
            retset = getattr(resampler, method)(ds.resample(time=freq, keep_attrs=True))
        except AttributeError:
            raise ValidationError(f'Provided aggregation method {method} is not valid.')

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                retset[var].attrs['cell_methods'] + \
                ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(method)

    return adjust_temporal_attrs(retset)