Ejemplo n.º 1
0
    def test_single_slice(self):
        """Test a case when the dataset is a single time slice"""
        # With bnds
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'nv': [0, 1],
            'time': [datetime(2000, 1, 1)]
        })
        ds.time.attrs['bounds'] = 'time_bnds'
        ds['time_bnds'] = (['time',
                            'nv'], [(datetime(2000, 1,
                                              1), datetime(2000, 1, 31))])

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-01-31T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_duration'], 'P31D')
        with self.assertRaises(KeyError):
            # Resolution is not defined for a single slice
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_resolution']

        # Without bnds
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, 1, 1)]
        })

        ds1 = adjust_temporal_attrs(ds)

        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-01-01T00:00:00.000000000')
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_resolution']
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_duration']
Ejemplo n.º 2
0
    def test_single_slice(self):
        """Test a case when the dataset is a single time slice"""
        # With bnds
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'lat': np.linspace(-88, 88, 45),
            'lon': np.linspace(-178, 178, 90),
            'nv': [0, 1],
            'time': [datetime(2000, 1, 1)]})
        ds.time.attrs['bounds'] = 'time_bnds'
        ds['time_bnds'] = (['time', 'nv'],
                           [(datetime(2000, 1, 1), datetime(2000, 1, 31))])

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-01-31T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_duration'],
                         'P31D')
        with self.assertRaises(KeyError):
            # Resolution is not defined for a single slice
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_resolution']

        # Without bnds
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 1])),
            'lat': np.linspace(-88, 88, 45),
            'lon': np.linspace(-178, 178, 90),
            'time': [datetime(2000, 1, 1)]})

        ds1 = adjust_temporal_attrs(ds)

        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-01-01T00:00:00.000000000')
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_resolution']
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_duration']
Ejemplo n.º 3
0
Archivo: io.py Proyecto: whigg/cate
def read_netcdf(file: str,
                drop_variables: VarNamesLike.TYPE = None,
                decode_cf: bool = True,
                normalize: bool = True,
                decode_times: bool = True,
                engine: str = None) -> xr.Dataset:
    """
    Read a dataset from a netCDF 3/4 or HDF file.

    :param file: The netCDF file path.
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param engine: Optional netCDF engine name.
    """
    drop_variables = VarNamesLike.convert(drop_variables)
    ds = xr.open_dataset(file,
                         drop_variables=drop_variables,
                         decode_cf=decode_cf,
                         decode_times=decode_times,
                         engine=engine)
    chunks = get_spatial_ext_chunk_sizes(ds)
    if chunks and 'time' in ds.dims:
        chunks['time'] = 1
    if chunks:
        ds = ds.chunk(chunks)
    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Ejemplo n.º 4
0
def subset_temporal(ds: DatasetLike.TYPE,
                    time_range: TimeRangeLike.TYPE) -> xr.Dataset:
    """
    Do a temporal subset of the dataset.

    :param ds: Dataset or dataframe to subset
    :param time_range: Time range to select
    :return: Subset dataset
    """
    ds = DatasetLike.convert(ds)
    time_range = TimeRangeLike.convert(time_range)
    return adjust_temporal_attrs(subset_temporal_impl(ds, time_range))
Ejemplo n.º 5
0
def subset_temporal(ds: DatasetLike.TYPE,
                    time_range: TimeRangeLike.TYPE) -> xr.Dataset:
    """
    Do a temporal subset of the dataset.

    :param ds: Dataset or dataframe to subset
    :param time_range: Time range to select
    :return: Subset dataset
    """
    ds = DatasetLike.convert(ds)
    time_range = TimeRangeLike.convert(time_range)
    return adjust_temporal_attrs(subset_temporal_impl(ds, time_range))
Ejemplo n.º 6
0
    def test_nominal(self):
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]
        })

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-12-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_resolution'], 'P1M')
        self.assertEqual(ds1.attrs['time_coverage_duration'], 'P336D')

        # Test existing attributes update
        # noinspection PyTypeChecker
        indexers = {
            'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))
        }
        ds2 = ds1.sel(**indexers)
        ds2 = adjust_temporal_attrs(ds2)

        self.assertEqual(ds2.attrs['time_coverage_start'],
                         '2000-03-01T00:00:00.000000000')
        self.assertEqual(ds2.attrs['time_coverage_end'],
                         '2000-06-01T00:00:00.000000000')
        self.assertEqual(ds2.attrs['time_coverage_resolution'], 'P1M')
        self.assertEqual(ds2.attrs['time_coverage_duration'], 'P93D')
Ejemplo n.º 7
0
    def test_nominal(self):
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat': np.linspace(-88, 88, 45),
            'lon': np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-12-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_resolution'],
                         'P1M')
        self.assertEqual(ds1.attrs['time_coverage_duration'],
                         'P336D')

        # Test existing attributes update
        # noinspection PyTypeChecker
        indexers = {'time': slice(datetime(2000, 2, 15), datetime(2000, 6, 15))}
        ds2 = ds1.sel(**indexers)
        ds2 = adjust_temporal_attrs(ds2)

        self.assertEqual(ds2.attrs['time_coverage_start'],
                         '2000-03-01T00:00:00.000000000')
        self.assertEqual(ds2.attrs['time_coverage_end'],
                         '2000-06-01T00:00:00.000000000')
        self.assertEqual(ds2.attrs['time_coverage_resolution'],
                         'P1M')
        self.assertEqual(ds2.attrs['time_coverage_duration'],
                         'P93D')
Ejemplo n.º 8
0
    def test_wrong_type(self):
        ds = xr.Dataset({
            'first': (['time', 'lat', 'lon'], np.zeros([12, 45, 90])),
            'second': (['time', 'lat', 'lon'], np.zeros([12, 45, 90])),
            'lon': (['lon'], np.linspace(-178, 178, 90)),
            'lat': (['lat'], np.linspace(-88, 88, 45)),
            'time': (['time'], np.linspace(0, 1, 12))})

        ds1 = adjust_temporal_attrs(ds)

        self.assertIs(ds1, ds)
        self.assertNotIn('time_coverage_start', ds1)
        self.assertNotIn('time_coverage_end', ds1)
        self.assertNotIn('time_coverage_resolution', ds1)
        self.assertNotIn('time_coverage_duration', ds1)
Ejemplo n.º 9
0
Archivo: io.py Proyecto: whigg/cate
def read_zarr(path: str,
              key: str = None,
              secret: str = None,
              token: str = None,
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or (S3) object storage URL.
    :param key: Optional (AWS) access key identifier. Valid only if *path* is a URL.
    :param secret: Optional (AWS) secret access key. Valid only if *path* is a URL.
    :param token: Optional (AWS) access token. Valid only if *path* is a URL.
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    is_s3_url = path.startswith('s3://')
    is_http_url = path.startswith('http://') or path.startswith('https://')
    if is_s3_url or is_http_url:
        root = path
        client_kwargs = None
        if is_http_url:
            url = urllib.parse.urlparse(path)
            root = url.path[1:] if url.path.startswith('/') else url.path
            client_kwargs = dict(endpoint_url=f'{url.scheme}://{url.netloc}')
        store = s3fs.S3Map(root, s3=s3fs.S3FileSystem(anon=not (key or secret or token),
                                                      key=key,
                                                      secret=secret,
                                                      token=token,
                                                      client_kwargs=client_kwargs))
    else:
        store = path

    ds = xr.open_zarr(store,
                      drop_variables=drop_variables,
                      decode_cf=decode_cf,
                      decode_times=decode_times)
    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Ejemplo n.º 10
0
    def test_wrong_type(self):
        ds = xr.Dataset({
            'first': (['time', 'lat', 'lon'], np.zeros([12, 45, 90])),
            'second': (['time', 'lat', 'lon'], np.zeros([12, 45, 90])),
            'lon': (['lon'], np.linspace(-178, 178, 90)),
            'lat': (['lat'], np.linspace(-88, 88, 45)),
            'time': (['time'], np.linspace(0, 1, 12))
        })

        ds1 = adjust_temporal_attrs(ds)

        self.assertIs(ds1, ds)
        self.assertNotIn('time_coverage_start', ds1)
        self.assertNotIn('time_coverage_end', ds1)
        self.assertNotIn('time_coverage_resolution', ds1)
        self.assertNotIn('time_coverage_duration', ds1)
Ejemplo n.º 11
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform monthly aggregation of a daily dataset according to the given
    method.

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValueError(
            'Temporal aggregation operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    # Check if we have a daily dataset
    try:
        if ds.attrs['time_coverage_resolution'] != 'P1D':
            raise ValueError(
                'Temporal aggregation operation expects a daily dataset')
    except KeyError:
        raise ValueError('Could not determine temporal resolution. Running'
                         ' the adjust_temporal_attrs operation beforehand may'
                         ' help.')

    with monitor.observing("resample dataset"):
        retset = ds.resample(freq='MS',
                             dim='time',
                             keep_attrs=True,
                             how=method)

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                    retset[var].attrs['cell_methods'] + \
                    ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(
                method)

    return adjust_temporal_attrs(retset)
Ejemplo n.º 12
0
    def test_only_time_dim_generated(self):
        ds = xr.Dataset({'first': (['lat', 'lon'], np.zeros([90, 180])),
                         'second': (['lat', 'lon'], np.zeros([90, 180]))},
                        coords={'lat': np.linspace(-89.5, 89.5, 90),
                                'lon': np.linspace(-179.5, 179.5, 180)},
                        attrs={'time_coverage_start': '20120101'})

        new_ds = adjust_temporal_attrs(ds)

        self.assertIsNot(ds, new_ds)
        self.assertEqual(len(new_ds.coords), 3)
        self.assertIn('lon', new_ds.coords)
        self.assertIn('lat', new_ds.coords)
        self.assertIn('time', new_ds.coords)
        self.assertNotIn('time_bnds', new_ds.coords)

        import pandas as pd
        self.assertEqual(new_ds.first.shape, (1, 90, 180))
        self.assertEqual(new_ds.second.shape, (1, 90, 180))
        self.assertEqual(new_ds.coords['time'][0], xr.DataArray(pd.to_datetime('2012-01-01')))
Ejemplo n.º 13
0
    def test_bnds(self):
        """Test a case when time_bnds is available"""
        time = [datetime(2000, x, 1) for x in range(1, 13)]
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat':
            np.linspace(-88, 88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'nv': [0, 1],
            'time':
            time
        })

        month_ends = list()
        for x in ds.time.values:
            year = int(str(x)[0:4])
            month = int(str(x)[5:7])
            day = calendar.monthrange(year, month)[1]
            month_ends.append(datetime(year, month, day))

        ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends)))
        ds.time.attrs['bounds'] = 'time_bnds'

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-12-31T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_resolution'], 'P1M')
        self.assertEqual(ds1.attrs['time_coverage_duration'], 'P366D')
Ejemplo n.º 14
0
Archivo: io.py Proyecto: pwambach/cate
def read_zarr(path: str,
              file_system: str = 'Local',
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or object storage path or bucket name.
    :param file_system: File system identifier, "Local" is your locally mounted file system,
           for Amazon S3 use "S3", for general Object Storage use "OBS".
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    if file_system == 'Local':
        ds = xr.open_zarr(path,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    elif file_system == 'S3' or file_system == 'OBS':
        import s3fs
        store = s3fs.S3Map(path, s3=(s3fs.S3FileSystem(anon=True)))
        ds = xr.open_zarr(store,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    else:
        raise ValidationError(f'Unknown file_system {file_system!r}')

    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Ejemplo n.º 15
0
    def test_bnds(self):
        """Test a case when time_bnds is available"""
        time = [datetime(2000, x, 1) for x in range(1, 13)]
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat': np.linspace(-88, 88, 45),
            'lon': np.linspace(-178, 178, 90),
            'nv': [0, 1],
            'time': time})

        month_ends = list()
        for x in ds.time.values:
            year = int(str(x)[0:4])
            month = int(str(x)[5:7])
            day = calendar.monthrange(year, month)[1]
            month_ends.append(datetime(year, month, day))

        ds['time_bnds'] = (['time', 'nv'], list(zip(time, month_ends)))
        ds.time.attrs['bounds'] = 'time_bnds'

        ds1 = adjust_temporal_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['time_coverage_start']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['time_coverage_start'],
                         '2000-01-01T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_end'],
                         '2000-12-31T00:00:00.000000000')
        self.assertEqual(ds1.attrs['time_coverage_resolution'],
                         'P1M')
        self.assertEqual(ds1.attrs['time_coverage_duration'],
                         'P366D')
Ejemplo n.º 16
0
Archivo: io.py Proyecto: whigg/cate
def open_dataset(ds_name: str = '',
                 ds_id: str = '',
                 time_range: TimeRangeLike.TYPE = None,
                 region: PolygonLike.TYPE = None,
                 var_names: VarNamesLike.TYPE = None,
                 normalize: bool = True,
                 force_local: bool = False,
                 local_ds_id: str = None,
                 monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Open a dataset from a data source identified by *ds_name*.

    :param ds_name: The name of data source. This parameter has been deprecated, please use *ds_id* instead.
    :param ds_id: The identifier for the data source.
    :param time_range: Optional time range of the requested dataset
    :param region: Optional spatial region of the requested dataset
    :param var_names: Optional names of variables of the requested dataset
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    :param force_local: Whether to make a local copy of remote data source if it's not present
    :param local_ds_id: Optional local identifier for newly created local copy of remote data source.
           Used only if force_local=True.
    :param monitor: A progress monitor
    :return: An new dataset instance.
    """
    import cate.core.ds
    ds = cate.core.ds.open_dataset(data_source=ds_id or ds_name,
                                   time_range=time_range,
                                   var_names=var_names,
                                   region=region,
                                   force_local=force_local,
                                   local_ds_id=local_ds_id,
                                   monitor=monitor)
    if ds and normalize:
        return adjust_temporal_attrs(normalize_op(ds))

    return ds
Ejemplo n.º 17
0
def anomaly_external(ds: xr.Dataset,
                     file: str,
                     transform: str = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly with external reference data, for example, a climatology.
    The given reference dataset is expected to consist of 12 time slices, one
    for each month.

    The returned dataset will contain the variable names found in both - the
    reference and the given dataset. Names found in the given dataset, but not in
    the reference, will be dropped from the resulting dataset. The calculated
    anomaly will be against the corresponding month of the reference data.
    E.g. January against January, etc.

    In case spatial extents differ between the reference and the given dataset,
    the anomaly will be calculated on the intersection.

    :param ds: The dataset to calculate anomalies from
    :param file: Path to reference data file
    :param transform: Apply the given transformation before calculating the anomaly.
                      For supported operations see help on 'ds_arithmetics' operation.
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    # Check if the time coordinate is of dtype datetime
    try:
        if ds.time.dtype != 'datetime64[ns]':
            raise ValidationError('The dataset provided for anomaly calculation'
                                  ' is required to have a time coordinate of'
                                  ' dtype datetime64[ns]. Running the normalize'
                                  ' operation on this dataset might help.')
    except AttributeError:
        raise ValidationError('The dataset provided for anomaly calculation'
                              ' is required to have a time coordinate.')

    try:
        if ds.attrs['time_coverage_resolution'] != 'P1M':
            raise ValidationError('anomaly_external expects a monthly dataset'
                                  ' got: {} instead.'.format(ds.attrs['time_coverage_resolution']))
    except KeyError:
        try:
            ds = adjust_temporal_attrs(ds)
            if ds.attrs['time_coverage_resolution'] != 'P1M':
                raise ValidationError('anomaly_external expects a monthly dataset'
                                      ' got: {} instead.'.format(ds.attrs['time_coverage_resolution']))
        except KeyError:
            raise ValidationError('Could not determine temporal resolution of'
                                  ' of the given input dataset.')

    clim = xr.open_dataset(file)
    try:
        if len(clim.time) != 12:
            raise ValidationError('The reference dataset is expected to be a '
                                  'monthly climatology. The provided dataset has'
                                  ' a time dimension with length: {}'.format(len(clim.time)))
    except AttributeError:
        raise ValidationError('The reference dataset is required to '
                              'have a time coordinate.')

    ret = ds.copy()
    if transform:
        ret = ds_arithmetics(ds, transform)
    # Group by months, subtract the appropriate slice from the reference
    # Note that this requires that 'time' coordinate labels are of type
    # datetime64[ns]
    total_work = 100
    step = 100 / 12

    with monitor.starting('Anomaly', total_work=total_work):
        monitor.progress(work=0)
        kwargs = {'ref': clim, 'monitor': monitor, 'step': step}
        ret = ret.groupby(ds['time.month']).apply(_group_anomaly,
                                                  **kwargs)

    # Running groupby results in a redundant 'month' variable being added to
    # the dataset
    ret = ret.drop('month')
    ret.attrs = ds.attrs
    # The dataset may be cropped
    return adjust_spatial_attrs(ret)
Ejemplo n.º 18
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         output_resolution: str = 'month',
                         custom_resolution: str = None,
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform aggregation of dataset according to the given
    method and output resolution.

    Note that the operation does not perform weighting. Depending on the
    combination of input and output resolutions, as well as aggregation
    method, the resulting dataset might yield unexpected results.

    Resolution 'month' will result in a monthly dataset with each month
    denoted by its first date. Resolution 'season' will result in a dataset
    aggregated to DJF, MAM, JJA, SON seasons, each denoted by the first
    date of the season.

    The operation also works with custom resolution strings, see:
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    If ``custom_resolution`` is provided, it will override ``output_resolution``.

    Some examples:
      'QS-JUN' produces an output dataset on a quarterly resolution where the
      year ends in 1st of June and each quarter is denoted by its first date
      '8MS' produces an output dataset on an eight-month resolution where each
      period is denoted by the first date. Note that such periods will not be
      consistent over years.
      '8D' produces a dataset on an eight day resolution

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :param output_resolution: Desired temporal resolution of the output dataset
    :param custom_resolution: Custom temporal resolution, overrides output_resolution
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError('Temporal aggregation operation expects a dataset with the'
                              ' time coordinate of type datetime64[ns], but received'
                              ' {}. Running the normalize operation on this'
                              ' dataset may help'.format(ds.time.dtype))

    # Try to figure out the input frequency
    try:
        in_freq = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError('Could not determine temporal resolution of input dataset.'
                              ' Running the adjust_temporal_attrs operation beforehand may'
                              ' help.')

    if custom_resolution:
        freq = custom_resolution
    else:
        frequencies = {'month': 'MS', 'season': 'QS-DEC'}
        freq = frequencies[output_resolution]

    _validate_freq(in_freq, freq)

    with monitor.observing("resample dataset"):
        try:
            retset = getattr(resampler, method)(ds.resample(time=freq, keep_attrs=True))
        except AttributeError:
            raise ValidationError(f'Provided aggregation method {method} is not valid.')

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                retset[var].attrs['cell_methods'] + \
                ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(method)

    return adjust_temporal_attrs(retset)
Ejemplo n.º 19
0
def temporal_aggregation(ds: DatasetLike.TYPE,
                         method: str = 'mean',
                         output_resolution: str = 'month',
                         custom_resolution: str = None,
                         monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform aggregation of dataset according to the given
    method and output resolution.

    Note that the operation does not perform weighting. Depending on the
    combination of input and output resolutions, as well as aggregation
    method, the resulting dataset might yield unexpected results.

    Resolution 'month' will result in a monthly dataset with each month
    denoted by its first date. Resolution 'season' will result in a dataset
    aggregated to DJF, MAM, JJA, SON seasons, each denoted by the first
    date of the season.

    The operation also works with custom resolution strings, see:
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    If ``custom_resolution`` is provided, it will override ``output_resolution``.

    Some examples:
      'QS-JUN' produces an output dataset on a quarterly resolution where the
      year ends in 1st of June and each quarter is denoted by its first date
      '8MS' produces an output dataset on an eight-month resolution where each
      period is denoted by the first date. Note that such periods will not be
      consistent over years.
      '8D' produces a dataset on an eight day resolution

    :param ds: Dataset to aggregate
    :param method: Aggregation method
    :param output_resolution: Desired temporal resolution of the output dataset
    :param custom_resolution: Custom temporal resolution, overrides output_resolution
    :return: Aggregated dataset
    """
    ds = DatasetLike.convert(ds)
    # Check if time dtype is what we want
    if 'datetime64[ns]' != ds.time.dtype:
        raise ValidationError(
            'Temporal aggregation operation expects a dataset with the'
            ' time coordinate of type datetime64[ns], but received'
            ' {}. Running the normalize operation on this'
            ' dataset may help'.format(ds.time.dtype))

    # Try to figure out the input frequency
    try:
        in_freq = ds.attrs['time_coverage_resolution']
    except KeyError:
        raise ValidationError(
            'Could not determine temporal resolution of input dataset.'
            ' Running the adjust_temporal_attrs operation beforehand may'
            ' help.')

    if custom_resolution:
        freq = custom_resolution
    else:
        frequencies = {'month': 'MS', 'season': 'QS-DEC'}
        freq = frequencies[output_resolution]

    _validate_freq(in_freq, freq)

    with monitor.observing("resample dataset"):
        try:
            retset = getattr(resampler, method)(ds.resample(time=freq,
                                                            keep_attrs=True))
        except AttributeError:
            raise ValidationError(
                f'Provided aggregation method {method} is not valid.')

    for var in retset.data_vars:
        try:
            retset[var].attrs['cell_methods'] = \
                retset[var].attrs['cell_methods'] + \
                ' time: {} within years'.format(method)
        except KeyError:
            retset[var].attrs['cell_methods'] = 'time: {} within years'.format(
                method)

    return adjust_temporal_attrs(retset)
Ejemplo n.º 20
0
def anomaly_external(ds: xr.Dataset,
                     file: str,
                     transform: str = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly with external reference data, for example, a climatology.
    The given reference dataset is expected to consist of 12 time slices, one
    for each month.

    The returned dataset will contain the variable names found in both - the
    reference and the given dataset. Names found in the given dataset, but not in
    the reference, will be dropped from the resulting dataset. The calculated
    anomaly will be against the corresponding month of the reference data.
    E.g. January against January, etc.

    In case spatial extents differ between the reference and the given dataset,
    the anomaly will be calculated on the intersection.

    :param ds: The dataset to calculate anomalies from
    :param file: Path to reference data file
    :param transform: Apply the given transformation before calculating the anomaly.
                      For supported operations see help on 'ds_arithmetics' operation.
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    # Check if the time coordinate is of dtype datetime
    try:
        if ds.time.dtype != 'datetime64[ns]':
            raise ValidationError('The dataset provided for anomaly calculation'
                                  ' is required to have a time coordinate of'
                                  ' dtype datetime64[ns]. Running the normalize'
                                  ' operation on this dataset might help.')
    except AttributeError:
        raise ValidationError('The dataset provided for anomaly calculation'
                              ' is required to have a time coordinate.')

    try:
        if ds.attrs['time_coverage_resolution'] != 'P1M':
            raise ValidationError('anomaly_external expects a monthly dataset'
                                  ' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
    except KeyError:
        try:
            ds = adjust_temporal_attrs(ds)
            if ds.attrs['time_coverage_resolution'] != 'P1M':
                raise ValidationError('anomaly_external expects a monthly dataset'
                                      ' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
        except KeyError:
            raise ValidationError('Could not determine temporal resolution of'
                                  ' of the given input dataset.')

    clim = xr.open_dataset(file)
    try:
        if len(clim.time) != 12:
            raise ValidationError('The reference dataset is expected to be a '
                                  'monthly climatology. The provided dataset has'
                                  ' a time dimension with length: {}'.format(len(clim.time)))
    except AttributeError:
        raise ValidationError('The reference dataset is required to '
                              'have a time coordinate.')

    ret = ds.copy()
    if transform:
        ret = ds_arithmetics(ds, transform)
    # Group by months, subtract the appropriate slice from the reference
    # Note that this requires that 'time' coordinate labels are of type
    # datetime64[ns]
    total_work = 100
    step = 100 / 12

    with monitor.starting('Anomaly', total_work=total_work):
        monitor.progress(work=0)
        kwargs = {'ref': clim, 'monitor': monitor, 'step': step}
        ret = ret.groupby(ds['time.month']).apply(_group_anomaly,
                                                  **kwargs)

    # Running groupby results in a redundant 'month' variable being added to
    # the dataset
    ret = ret.drop('month')
    ret.attrs = ds.attrs
    # The dataset may be cropped
    return adjust_spatial_attrs(ret)