예제 #1
0
    def test_nominal_inverted(self):
        # Inverted lat
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat':
            np.linspace(88, -88, 45),
            'lon':
            np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]
        })

        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds1 = adjust_spatial_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['geospatial_lat_min']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['geospatial_lat_min'], -90)
        self.assertEqual(ds1.attrs['geospatial_lat_max'], 90)
        self.assertEqual(ds1.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertEqual(ds1.attrs['geospatial_lat_resolution'], 4)
        self.assertEqual(ds1.attrs['geospatial_lon_min'], -180)
        self.assertEqual(ds1.attrs['geospatial_lon_max'], 180)
        self.assertEqual(ds1.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(ds1.attrs['geospatial_lon_resolution'], 4)
        self.assertEqual(
            ds1.attrs['geospatial_bounds'],
            'POLYGON((-180.0 -90.0, -180.0 90.0, 180.0 90.0,'
            ' 180.0 -90.0, -180.0 -90.0))')

        # Test existing attributes update
        lon_min, lat_min, lon_max, lat_max = -20, -40, 60, 40
        indexers = {
            'lon': slice(lon_min, lon_max),
            'lat': slice(lat_max, lat_min)
        }
        ds2 = ds1.sel(**indexers)
        ds2 = adjust_spatial_attrs(ds2)

        self.assertEqual(ds2.attrs['geospatial_lat_min'], -42)
        self.assertEqual(ds2.attrs['geospatial_lat_max'], 42)
        self.assertEqual(ds2.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertEqual(ds2.attrs['geospatial_lat_resolution'], 4)
        self.assertEqual(ds2.attrs['geospatial_lon_min'], -20)
        self.assertEqual(ds2.attrs['geospatial_lon_max'], 60)
        self.assertEqual(ds2.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(ds2.attrs['geospatial_lon_resolution'], 4)
        self.assertEqual(
            ds2.attrs['geospatial_bounds'],
            'POLYGON((-20.0 -42.0, -20.0 42.0, 60.0 42.0, 60.0'
            ' -42.0, -20.0 -42.0))')
예제 #2
0
    def test_nominal_inverted(self):
        # Inverted lat
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([45, 90, 12])),
            'lat': np.linspace(88, -88, 45),
            'lon': np.linspace(-178, 178, 90),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})

        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds1 = adjust_spatial_attrs(ds)

        # Make sure original dataset is not altered
        with self.assertRaises(KeyError):
            # noinspection PyStatementEffect
            ds.attrs['geospatial_lat_min']

        # Make sure expected values are in the new dataset
        self.assertEqual(ds1.attrs['geospatial_lat_min'], -90)
        self.assertEqual(ds1.attrs['geospatial_lat_max'], 90)
        self.assertEqual(ds1.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertEqual(ds1.attrs['geospatial_lat_resolution'], 4)
        self.assertEqual(ds1.attrs['geospatial_lon_min'], -180)
        self.assertEqual(ds1.attrs['geospatial_lon_max'], 180)
        self.assertEqual(ds1.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(ds1.attrs['geospatial_lon_resolution'], 4)
        self.assertEqual(ds1.attrs['geospatial_bounds'],
                         'POLYGON((-180.0 -90.0, -180.0 90.0, 180.0 90.0,'
                         ' 180.0 -90.0, -180.0 -90.0))')

        # Test existing attributes update
        lon_min, lat_min, lon_max, lat_max = -20, -40, 60, 40
        indexers = {'lon': slice(lon_min, lon_max),
                    'lat': slice(lat_max, lat_min)}
        ds2 = ds1.sel(**indexers)
        ds2 = adjust_spatial_attrs(ds2)

        self.assertEqual(ds2.attrs['geospatial_lat_min'], -42)
        self.assertEqual(ds2.attrs['geospatial_lat_max'], 42)
        self.assertEqual(ds2.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertEqual(ds2.attrs['geospatial_lat_resolution'], 4)
        self.assertEqual(ds2.attrs['geospatial_lon_min'], -20)
        self.assertEqual(ds2.attrs['geospatial_lon_max'], 60)
        self.assertEqual(ds2.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(ds2.attrs['geospatial_lon_resolution'], 4)
        self.assertEqual(ds2.attrs['geospatial_bounds'],
                         'POLYGON((-20.0 -42.0, -20.0 42.0, 60.0 42.0, 60.0'
                         ' -42.0, -20.0 -42.0))')
예제 #3
0
    def test_once_cell_without_bnds(self):
        # Only one cell in lat/lon
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'lat': np.array([52.5]),
            'lon': np.array([11.5]),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        with self.assertRaises(ValueError) as cm:
            adjust_spatial_attrs(ds)

        self.assertEqual(str(cm.exception), 'Cannot determine spatial extent for dimension "lon"')
예제 #4
0
    def test_once_cell_with_bnds(self):
        # Only one cell in lat/lon
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'lat':
            np.array([52.5]),
            'lon':
            np.array([11.5]),
            'lat_bnds': (['lat', 'bnds'], np.array([[52.4, 52.6]])),
            'lon_bnds': (['lon', 'bnds'], np.array([[11.4, 11.6]])),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]
        })
        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds1 = adjust_spatial_attrs(ds)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_resolution'], 0.2)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_min'], 52.4)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_max'], 52.6)
        self.assertEqual(ds1.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_resolution'], 0.2)
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_min'], 11.4)
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_max'], 11.6)
        self.assertEqual(ds1.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(
            ds1.attrs['geospatial_bounds'],
            'POLYGON((11.4 52.4, 11.4 52.6, 11.6 52.6, 11.6 52.4, 11.4 52.4))')
예제 #5
0
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset,
                      method_us: int, method_ds: int,
                      monitor: Monitor) -> xr.Dataset:
    """
    Resample replica onto the grid of the master.
    This does spatial resampling the whole dataset, e.g., all
    variables in the replica dataset.
    This method works only if both datasets have (time, lat, lon) dimensions.

    Note that dataset attributes are not propagated due to currently undecided CDM attributes' set.

    :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid
    :param ds_replica: xr.Dataset that will be resampled on the masters' grid
    :param method_us: Interpolation method for upsampling, see resampling.py
    :param method_ds: Interpolation method for downsampling, see resampling.py
    :param monitor: a progress monitor.
    :return: xr.Dataset The resampled replica dataset
    """
    # Find lat/lon bounds of the intersection of master and replica grids. The
    # bounds should fall on pixel boundaries for both spatial dimensions for
    # both datasets
    lat_min, lat_max = _find_intersection(ds_master['lat'].values,
                                          ds_replica['lat'].values,
                                          global_bounds=(-90, 90))
    lon_min, lon_max = _find_intersection(ds_master['lon'].values,
                                          ds_replica['lon'].values,
                                          global_bounds=(-180, 180))

    # Subset replica dataset and master grid. We're not using here the subset
    # operation, because the subset operation may produce datasets that cross
    # the anti-meridian by design. However, such a disjoint dataset can not be
    # resampled using our current resampling methods.
    lat_slice = slice(lat_min, lat_max)
    lon_slice = slice(lon_min, lon_max)

    lon = ds_master['lon'].sel(lon=lon_slice)
    lat = ds_master['lat'].sel(lat=lat_slice)
    ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice)

    # Don't do anything if datasets already have the same spatial definition
    if _grids_equal(ds_master, ds_replica):
        return ds_replica

    with monitor.starting("coregister dataset", len(ds_replica.data_vars)):
        kwargs = {
            'lon': lon,
            'lat': lat,
            'method_us': method_us,
            'method_ds': method_ds,
            'parent_monitor': monitor
        }
        retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs)

    return adjust_spatial_attrs(retset)
예제 #6
0
파일: subset.py 프로젝트: stratosgear/cate
def subset_spatial(ds: xr.Dataset,
                   region: PolygonLike.TYPE,
                   mask: bool = True) -> xr.Dataset:
    """
    Do a spatial subset of the dataset

    :param ds: Dataset to subset
    :param region: Spatial region to subset
    :param mask: Should values falling in the bounding box of the polygon but not the polygon itself be masked with NaN.
    :return: Subset dataset
    """
    region = PolygonLike.convert(region)
    return adjust_spatial_attrs(subset_spatial_impl(ds, region, mask))
예제 #7
0
def subset_spatial(ds: xr.Dataset,
                   region: PolygonLike.TYPE,
                   mask: bool = True,
                   monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Do a spatial subset of the dataset

    :param ds: Dataset to subset
    :param region: Spatial region to subset
    :param mask: Should values falling in the bounding box of the polygon but not the polygon itself be masked with NaN.
    :return: Subset dataset
    """
    return adjust_spatial_attrs(subset_spatial_impl(ds, region, mask, monitor), allow_point=True)
예제 #8
0
    def test_once_cell_without_bnds(self):
        # Only one cell in lat/lon
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'lat': np.array([52.5]),
            'lon': np.array([11.5]),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds2 = adjust_spatial_attrs(ds)
        # Datasets should be the same --> not modified
        self.assertIs(ds2, ds)
예제 #9
0
    def test_once_cell_without_bnds(self):
        # Only one cell in lat/lon
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'lat': np.array([52.5]),
            'lon': np.array([11.5]),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds2 = adjust_spatial_attrs(ds)
        # Datasets should be the same --> not modified
        self.assertIs(ds2, ds)
예제 #10
0
def _resample_dataset(ds_master: xr.Dataset, ds_replica: xr.Dataset, method_us: int, method_ds: int, monitor: Monitor) -> xr.Dataset:
    """
    Resample replica onto the grid of the master.
    This does spatial resampling the whole dataset, e.g., all
    variables in the replica dataset.
    This method works only if both datasets have (time, lat, lon) dimensions.

    Note that dataset attributes are not propagated due to currently undecided CDM attributes' set.

    :param ds_master: xr.Dataset whose lat/lon coordinates are used as the resampling grid
    :param ds_replica: xr.Dataset that will be resampled on the masters' grid
    :param method_us: Interpolation method for upsampling, see resampling.py
    :param method_ds: Interpolation method for downsampling, see resampling.py
    :param monitor: a progress monitor.
    :return: xr.Dataset The resampled replica dataset
    """
    # Find lat/lon bounds of the intersection of master and replica grids. The
    # bounds should fall on pixel boundaries for both spatial dimensions for
    # both datasets
    lat_min, lat_max = _find_intersection(ds_master['lat'].values,
                                          ds_replica['lat'].values,
                                          global_bounds=(-90, 90))
    lon_min, lon_max = _find_intersection(ds_master['lon'].values,
                                          ds_replica['lon'].values,
                                          global_bounds=(-180, 180))

    # Subset replica dataset and master grid. We're not using here the subset
    # operation, because the subset operation may produce datasets that cross
    # the anti-meridian by design. However, such a disjoint dataset can not be
    # resampled using our current resampling methods.
    lat_slice = slice(lat_min, lat_max)
    lon_slice = slice(lon_min, lon_max)

    lon = ds_master['lon'].sel(lon=lon_slice)
    lat = ds_master['lat'].sel(lat=lat_slice)
    ds_replica = ds_replica.sel(lon=lon_slice, lat=lat_slice)

    # Don't do anything if datasets already have the same spatial definition
    if _grids_equal(ds_master, ds_replica):
        return ds_replica

    with monitor.starting("coregister dataset", len(ds_replica.data_vars)):
        kwargs = {'lon': lon, 'lat': lat, 'method_us': method_us, 'method_ds': method_ds, 'parent_monitor': monitor}
        retset = ds_replica.apply(_resample_array, keep_attrs=True, **kwargs)

    return adjust_spatial_attrs(retset)
예제 #11
0
    def test_once_cell_with_bnds(self):
        # Only one cell in lat/lon
        ds = xr.Dataset({
            'first': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'second': (['lat', 'lon', 'time'], np.zeros([1, 1, 12])),
            'lat': np.array([52.5]),
            'lon': np.array([11.5]),
            'lat_bnds': (['lat', 'bnds'], np.array([[52.4, 52.6]])),
            'lon_bnds': (['lon', 'bnds'], np.array([[11.4, 11.6]])),
            'time': [datetime(2000, x, 1) for x in range(1, 13)]})
        ds.lon.attrs['units'] = 'degrees_east'
        ds.lat.attrs['units'] = 'degrees_north'

        ds1 = adjust_spatial_attrs(ds)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_resolution'], 0.2)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_min'], 52.4)
        self.assertAlmostEqual(ds1.attrs['geospatial_lat_max'], 52.6)
        self.assertEqual(ds1.attrs['geospatial_lat_units'], 'degrees_north')
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_resolution'], 0.2)
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_min'], 11.4)
        self.assertAlmostEqual(ds1.attrs['geospatial_lon_max'], 11.6)
        self.assertEqual(ds1.attrs['geospatial_lon_units'], 'degrees_east')
        self.assertEqual(ds1.attrs['geospatial_bounds'],
                         'POLYGON((11.4 52.4, 11.4 52.6, 11.6 52.6, 11.6 52.4, 11.4 52.4))')
예제 #12
0
def pearson_correlation(ds_x: DatasetLike.TYPE,
                        ds_y: DatasetLike.TYPE,
                        var_x: VarName.TYPE,
                        var_y: VarName.TYPE,
                        monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Perform Pearson correlation on two datasets and produce a lon/lat map of
    correlation coefficients and the correspoding p_values.

    In case two 3D lon/lat/time datasets are provided, pixel by pixel
    correlation will be performed. It is also possible two pro
    Perform Pearson correlation analysis on two time/lat/lon datasets and
    produce a lat/lon map of correlation coefficients and p_values of
    underlying timeseries in the provided datasets.

    The lat/lon definition of both datasets has to be the same. The length of
    the time dimension should be equal, but not neccessarily have the same
    definition. E.g., it is possible to correlate different times of the same
    area.

    There are 'x' and 'y' datasets. Positive correlations imply that as x
    grows, so does y. Negative correlations imply that as x increases, y
    decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: a dataset containing a map of correlation coefficients and p_values
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    # Further validate inputs
    if array_x.dims == array_y.dims:
        if len(array_x.dims) != 3 or len(array_y.dims) != 3:
            raise ValidationError('A correlation coefficient map can only be produced'
                                  ' if both provided datasets are 3D datasets with'
                                  ' lon/lat/time dimensionality, or if a combination'
                                  ' of a 3D lon/lat/time dataset and a 1D timeseries'
                                  ' is provided.')

        if array_x.values.shape != array_y.values.shape:
            raise ValidationError(f'The provided variables {var_x} and {var_y} do not have the'
                                  ' same shape, Pearson correlation can not be'
                                  ' performed. Please review operation'
                                  ' documentation')

        if (not ds_x['lat'].equals(ds_y['lat']) or not ds_x['lon'].equals(ds_y['lon'])):
            raise ValidationError('When performing a pixel by pixel correlation the'
                                  ' datasets have to have the same lat/lon'
                                  ' definition. Consider running coregistration'
                                  ' first')

    elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1))
          or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3))
          or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1))
          or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))):
        raise ValidationError('A correlation coefficient map can only be produced'
                              ' if both provided datasets are 3D datasets with'
                              ' lon/lat/time dimensionality, or if a combination'
                              ' of a 3D lon/lat/time dataset and a 1D timeseries'
                              ' is provided.')

    if len(array_x['time']) != len(array_y['time']):
        raise ValidationError('The length of the time dimension differs between'
                              ' the given datasets. Can not perform the calculation'
                              ', please review operation documentation.')

    if len(array_x['time']) < 3:
        raise ValidationError('The length of the time dimension should not be less'
                              ' than three to run the calculation.')

    # Do pixel by pixel correlation
    retset = _pearsonr(array_x, array_y, monitor)
    retset.attrs['Cate_Description'] = f'Correlation between {var_y} {var_x}'

    return adjust_spatial_attrs(retset)
예제 #13
0
def pearson_correlation(ds_x: DatasetLike.TYPE,
                        ds_y: DatasetLike.TYPE,
                        var_x: VarName.TYPE,
                        var_y: VarName.TYPE,
                        monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Do product moment `Pearson's correlation <http://www.statsoft.com/Textbook/Statistics-Glossary/P/button/p#Pearson%20Correlation>`_ analysis.

    Perform Pearson correlation on two datasets and produce a lon/lat map of
    correlation coefficients and the correspoding p_values.

    In case two 3D lon/lat/time datasets are provided, pixel by pixel
    correlation will be performed. It is also possible two pro
    Perform Pearson correlation analysis on two time/lat/lon datasets and
    produce a lat/lon map of correlation coefficients and p_values of
    underlying timeseries in the provided datasets.

    The lat/lon definition of both datasets has to be the same. The length of
    the time dimension should be equal, but not neccessarily have the same
    definition. E.g., it is possible to correlate different times of the same
    area.

    There are 'x' and 'y' datasets. Positive correlations imply that as x
    grows, so does y. Negative correlations imply that as x increases, y
    decreases.

    For more information how to interpret the results, see
    `here <http://support.minitab.com/en-us/minitab-express/1/help-and-how-to/modeling-statistics/regression/how-to/correlation/interpret-the-results/>`_,
    and `here <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html>`_.

    :param ds_x: The 'x' dataset
    :param ds_y: The 'y' dataset
    :param var_x: Dataset variable to use for correlation analysis in the 'variable' dataset
    :param var_y: Dataset variable to use for correlation analysis in the 'dependent' dataset
    :param monitor: a progress monitor.
    :return: a dataset containing a map of correlation coefficients and p_values
    """
    ds_x = DatasetLike.convert(ds_x)
    ds_y = DatasetLike.convert(ds_y)
    var_x = VarName.convert(var_x)
    var_y = VarName.convert(var_y)

    array_y = ds_y[var_y]
    array_x = ds_x[var_x]

    # Further validate inputs
    if array_x.dims == array_y.dims:
        if len(array_x.dims) != 3 or len(array_y.dims) != 3:
            raise ValueError(
                'A correlation coefficient map can only be produced'
                ' if both provided datasets are 3D datasets with'
                ' lon/lat/time dimensionality, or if a combination'
                ' of a 3D lon/lat/time dataset and a 1D timeseries'
                ' is provided.')

        if array_x.values.shape != array_y.values.shape:
            raise ValueError('The provided variables {} and {} do not have the'
                             ' same shape, Pearson correlation can not be'
                             ' performed. Please review operation'
                             ' documentation'.format(var_x, var_y))

        if (not ds_x['lat'].equals(ds_y['lat'])
                or not ds_x['lon'].equals(ds_y['lon'])):
            raise ValueError('When performing a pixel by pixel correlation the'
                             ' datasets have to have the same lat/lon'
                             ' definition. Consider running coregistration'
                             ' first')

    elif (((len(array_x.dims) == 3) and (len(array_y.dims) != 1))
          or ((len(array_x.dims) == 1) and (len(array_y.dims) != 3))
          or ((len(array_x.dims) != 3) and (len(array_y.dims) == 1))
          or ((len(array_x.dims) != 1) and (len(array_y.dims) == 3))):
        raise ValueError('A correlation coefficient map can only be produced'
                         ' if both provided datasets are 3D datasets with'
                         ' lon/lat/time dimensionality, or if a combination'
                         ' of a 3D lon/lat/time dataset and a 1D timeseries'
                         ' is provided.')

    if len(array_x['time']) != len(array_y['time']):
        raise ValueError('The length of the time dimension differs between'
                         ' the given datasets. Can not perform the calculation'
                         ', please review operation documentation.')

    if len(array_x['time']) < 3:
        raise ValueError('The length of the time dimension should not be less'
                         ' than three to run the calculation.')

    # Do pixel by pixel correlation
    retset = _pearsonr(array_x, array_y, monitor)
    retset.attrs['Cate_Description'] = 'Correlation between {} {}'.format(
        var_y, var_x)

    return adjust_spatial_attrs(retset)
예제 #14
0
def anomaly_external(ds: xr.Dataset,
                     file: str,
                     transform: str = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly with external reference data, for example, a climatology.
    The given reference dataset is expected to consist of 12 time slices, one
    for each month.

    The returned dataset will contain the variable names found in both - the
    reference and the given dataset. Names found in the given dataset, but not in
    the reference, will be dropped from the resulting dataset. The calculated
    anomaly will be against the corresponding month of the reference data.
    E.g. January against January, etc.

    In case spatial extents differ between the reference and the given dataset,
    the anomaly will be calculated on the intersection.

    :param ds: The dataset to calculate anomalies from
    :param file: Path to reference data file
    :param transform: Apply the given transformation before calculating the anomaly.
                      For supported operations see help on 'ds_arithmetics' operation.
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    # Check if the time coordinate is of dtype datetime
    try:
        if ds.time.dtype != 'datetime64[ns]':
            raise ValidationError('The dataset provided for anomaly calculation'
                                  ' is required to have a time coordinate of'
                                  ' dtype datetime64[ns]. Running the normalize'
                                  ' operation on this dataset might help.')
    except AttributeError:
        raise ValidationError('The dataset provided for anomaly calculation'
                              ' is required to have a time coordinate.')

    try:
        if ds.attrs['time_coverage_resolution'] != 'P1M':
            raise ValidationError('anomaly_external expects a monthly dataset'
                                  ' got: {} instead.'.format(ds.attrs['time_coverage_resolution']))
    except KeyError:
        try:
            ds = adjust_temporal_attrs(ds)
            if ds.attrs['time_coverage_resolution'] != 'P1M':
                raise ValidationError('anomaly_external expects a monthly dataset'
                                      ' got: {} instead.'.format(ds.attrs['time_coverage_resolution']))
        except KeyError:
            raise ValidationError('Could not determine temporal resolution of'
                                  ' of the given input dataset.')

    clim = xr.open_dataset(file)
    try:
        if len(clim.time) != 12:
            raise ValidationError('The reference dataset is expected to be a '
                                  'monthly climatology. The provided dataset has'
                                  ' a time dimension with length: {}'.format(len(clim.time)))
    except AttributeError:
        raise ValidationError('The reference dataset is required to '
                              'have a time coordinate.')

    ret = ds.copy()
    if transform:
        ret = ds_arithmetics(ds, transform)
    # Group by months, subtract the appropriate slice from the reference
    # Note that this requires that 'time' coordinate labels are of type
    # datetime64[ns]
    total_work = 100
    step = 100 / 12

    with monitor.starting('Anomaly', total_work=total_work):
        monitor.progress(work=0)
        kwargs = {'ref': clim, 'monitor': monitor, 'step': step}
        ret = ret.groupby(ds['time.month']).apply(_group_anomaly,
                                                  **kwargs)

    # Running groupby results in a redundant 'month' variable being added to
    # the dataset
    ret = ret.drop('month')
    ret.attrs = ds.attrs
    # The dataset may be cropped
    return adjust_spatial_attrs(ret)
예제 #15
0
def anomaly_external(ds: xr.Dataset,
                     file: str,
                     transform: str = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly with external reference data, for example, a climatology.
    The given reference dataset is expected to consist of 12 time slices, one
    for each month.

    The returned dataset will contain the variable names found in both - the
    reference and the given dataset. Names found in the given dataset, but not in
    the reference, will be dropped from the resulting dataset. The calculated
    anomaly will be against the corresponding month of the reference data.
    E.g. January against January, etc.

    In case spatial extents differ between the reference and the given dataset,
    the anomaly will be calculated on the intersection.

    :param ds: The dataset to calculate anomalies from
    :param file: Path to reference data file
    :param transform: Apply the given transformation before calculating the anomaly.
                      For supported operations see help on 'ds_arithmetics' operation.
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    # Check if the time coordinate is of dtype datetime
    try:
        if ds.time.dtype != 'datetime64[ns]':
            raise ValidationError('The dataset provided for anomaly calculation'
                                  ' is required to have a time coordinate of'
                                  ' dtype datetime64[ns]. Running the normalize'
                                  ' operation on this dataset might help.')
    except AttributeError:
        raise ValidationError('The dataset provided for anomaly calculation'
                              ' is required to have a time coordinate.')

    try:
        if ds.attrs['time_coverage_resolution'] != 'P1M':
            raise ValidationError('anomaly_external expects a monthly dataset'
                                  ' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
    except KeyError:
        try:
            ds = adjust_temporal_attrs(ds)
            if ds.attrs['time_coverage_resolution'] != 'P1M':
                raise ValidationError('anomaly_external expects a monthly dataset'
                                      ' got: {} instead.'.format(ds.attrs['time_coverate_resolution']))
        except KeyError:
            raise ValidationError('Could not determine temporal resolution of'
                                  ' of the given input dataset.')

    clim = xr.open_dataset(file)
    try:
        if len(clim.time) != 12:
            raise ValidationError('The reference dataset is expected to be a '
                                  'monthly climatology. The provided dataset has'
                                  ' a time dimension with length: {}'.format(len(clim.time)))
    except AttributeError:
        raise ValidationError('The reference dataset is required to '
                              'have a time coordinate.')

    ret = ds.copy()
    if transform:
        ret = ds_arithmetics(ds, transform)
    # Group by months, subtract the appropriate slice from the reference
    # Note that this requires that 'time' coordinate labels are of type
    # datetime64[ns]
    total_work = 100
    step = 100 / 12

    with monitor.starting('Anomaly', total_work=total_work):
        monitor.progress(work=0)
        kwargs = {'ref': clim, 'monitor': monitor, 'step': step}
        ret = ret.groupby(ds['time.month']).apply(_group_anomaly,
                                                  **kwargs)

    # Running groupby results in a redundant 'month' variable being added to
    # the dataset
    ret = ret.drop('month')
    ret.attrs = ds.attrs
    # The dataset may be cropped
    return adjust_spatial_attrs(ret)