Ejemplo n.º 1
0
 def test_convert_to_point(self):
     expected_point = shapely.geometry.Point(12.8, -34.4)
     self.assertIs(expected_point, convert_geometry(expected_point))
     self.assertEqual(expected_point, convert_geometry([12.8, -34.4]))
     self.assertEqual(expected_point,
                      convert_geometry(np.array([12.8, -34.4])))
     self.assertEqual(expected_point, convert_geometry(expected_point.wkt))
     self.assertEqual(expected_point,
                      convert_geometry(expected_point.__geo_interface__))
Ejemplo n.º 2
0
    def test_invalid(self):
        from xcube.core.geom import _INVALID_GEOMETRY_MSG

        with self.assertRaises(ValueError) as cm:
            convert_geometry(dict(coordinates=[12.8, -34.4]))
        self.assertEqual(_INVALID_GEOMETRY_MSG, f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            convert_geometry([12.8, -34.4, '?'])
        self.assertEqual(_INVALID_GEOMETRY_MSG, f'{cm.exception}')
Ejemplo n.º 3
0
 def test_convert_to_box(self):
     expected_box = shapely.geometry.box(12.8, -34.4, 14.2, 20.6)
     self.assertIs(expected_box, convert_geometry(expected_box))
     self.assertEqual(expected_box,
                      convert_geometry([12.8, -34.4, 14.2, 20.6]))
     self.assertEqual(expected_box,
                      convert_geometry(np.array([12.8, -34.4, 14.2, 20.6])))
     self.assertEqual(expected_box, convert_geometry(expected_box.wkt))
     self.assertEqual(expected_box,
                      convert_geometry(expected_box.__geo_interface__))
Ejemplo n.º 4
0
    def test_convert_from_geojson_feature_dict(self):
        expected_box1 = shapely.geometry.box(-10, -20, 20, 10)
        expected_box2 = shapely.geometry.box(30, 20, 50, 40)
        feature1 = dict(type='Feature', geometry=expected_box1.__geo_interface__)
        feature2 = dict(type='Feature', geometry=expected_box2.__geo_interface__)
        feature_collection = dict(type='FeatureCollection', features=(feature1, feature2))

        actual_geom = convert_geometry(feature1)
        self.assertEqual(expected_box1, actual_geom)

        actual_geom = convert_geometry(feature2)
        self.assertEqual(expected_box2, actual_geom)

        expected_geom = shapely.geometry.GeometryCollection(geoms=[expected_box1, expected_box2])
        actual_geom = convert_geometry(feature_collection)
        self.assertEqual(expected_geom, actual_geom)
Ejemplo n.º 5
0
 def test_convert_to_split_box(self):
     expected_split_box = shapely.geometry.MultiPolygon(polygons=[
         shapely.geometry.Polygon(((180.0, -34.4), (180.0, 20.6), (172.1, 20.6), (172.1, -34.4),
                                   (180.0, -34.4))),
         shapely.geometry.Polygon(((-165.7, -34.4), (-165.7, 20.6), (-180.0, 20.6), (-180.0, -34.4),
                                   (-165.7, -34.4)))])
     self.assertEqual(expected_split_box,
                      convert_geometry([172.1, -34.4, -165.7, 20.6]))
Ejemplo n.º 6
0
    def test_convert_invalid_box(self):
        from xcube.core.geom import _INVALID_BOX_COORDS_MSG

        with self.assertRaises(ValueError) as cm:
            convert_geometry([12.8, 20.6, 14.2, -34.4])
        self.assertEqual(_INVALID_BOX_COORDS_MSG, f'{cm.exception}')
        with self.assertRaises(ValueError) as cm:
            convert_geometry([12.8, -34.4, 12.8, 20.6])
        self.assertEqual(_INVALID_BOX_COORDS_MSG, f'{cm.exception}')
        with self.assertRaises(ValueError) as cm:
            convert_geometry([12.8, -34.4, 12.8, 20.6])
        self.assertEqual(_INVALID_BOX_COORDS_MSG, f'{cm.exception}')
Ejemplo n.º 7
0
def get_time_series(cube: xr.Dataset,
                    geometry: GeometryLike = None,
                    var_names: Sequence[str] = None,
                    start_date: Date = None,
                    end_date: Date = None,
                    include_count: bool = False,
                    include_stdev: bool = False,
                    use_groupby: bool = False,
                    cube_asserted: bool = False) -> Optional[xr.Dataset]:
    """
    Get a time series dataset from a data *cube*.

    *geometry* may be provided as a (shapely) geometry object, a valid GeoJSON object, a valid WKT string,
    a sequence of box coordinates (x1, y1, x2, y2), or point coordinates (x, y). If *geometry* covers an area,
    i.e. is not a point, the function aggregates the variables to compute a mean value and if desired,
    the number of valid observations and the standard deviation.

    *start_date* and *end_date* may be provided as a numpy.datetime64 or an ISO datetime string.

    Returns a time-series dataset whose data variables have a time dimension but no longer have spatial dimensions,
    hence the resulting dataset's variables will only have N-2 dimensions.
    A global attribute ``max_number_of_observations`` will be set to the maximum number of observations
    that could have been made in each time step.
    If the given *geometry* does not overlap the cube's boundaries, or if not output variables remain,
    the function returns ``None``.

    :param cube: The xcube dataset
    :param geometry: Optional geometry
    :param var_names: Optional sequence of names of variables to be included.
    :param start_date: Optional start date.
    :param end_date: Optional end date.
    :param include_count: Whether to include the number of valid observations for each time step.
           Ignored if geometry is a point.
    :param include_stdev: Whether to include standard deviation for each time step.
           Ignored if geometry is a point.
    :param use_groupby: Use group-by operation. May increase or decrease runtime performance and/or memory consumption.
    :param cube_asserted:  If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset with time-series for each variable.
    """

    if not cube_asserted:
        assert_cube(cube)

    geometry = convert_geometry(geometry)

    dataset = select_variables_subset(cube, var_names)
    if len(dataset.data_vars) == 0:
        return None

    if start_date is not None or end_date is not None:
        # noinspection PyTypeChecker
        dataset = dataset.sel(time=slice(start_date, end_date))

    if isinstance(geometry, shapely.geometry.Point):
        bounds = get_dataset_geometry(dataset)
        if not bounds.contains(geometry):
            return None
        dataset = dataset.sel(lon=geometry.x, lat=geometry.y, method='Nearest')
        return dataset.assign_attrs(max_number_of_observations=1)

    if geometry is not None:
        dataset = mask_dataset_by_geometry(dataset,
                                           geometry,
                                           save_geometry_mask='__mask__')
        if dataset is None:
            return None
        mask = dataset['__mask__']
        max_number_of_observations = np.count_nonzero(mask)
        dataset = dataset.drop('__mask__')
    else:
        max_number_of_observations = dataset.lat.size * dataset.lon.size

    ds_count = None
    ds_stdev = None
    if use_groupby:
        time_group = dataset.groupby('time')
        ds_mean = time_group.mean(skipna=True, dim=xr.ALL_DIMS)
        if include_count:
            ds_count = time_group.count(dim=xr.ALL_DIMS)
        if include_stdev:
            ds_stdev = time_group.std(skipna=True, dim=xr.ALL_DIMS)
    else:
        ds_mean = dataset.mean(dim=('lat', 'lon'), skipna=True)
        if include_count:
            ds_count = dataset.count(dim=('lat', 'lon'))
        if include_stdev:
            ds_stdev = dataset.std(dim=('lat', 'lon'), skipna=True)

    if ds_count is not None:
        ds_count = ds_count.rename(
            name_dict=dict({v: f"{v}_count"
                            for v in ds_count.data_vars}))

    if ds_stdev is not None:
        ds_stdev = ds_stdev.rename(
            name_dict=dict({v: f"{v}_stdev"
                            for v in ds_stdev.data_vars}))

    if ds_count is not None and ds_stdev is not None:
        ts_dataset = xr.merge([ds_mean, ds_stdev, ds_count])
    elif ds_count is not None:
        ts_dataset = xr.merge([ds_mean, ds_count])
    elif ds_stdev is not None:
        ts_dataset = xr.merge([ds_mean, ds_stdev])
    else:
        ts_dataset = ds_mean

    ts_dataset = ts_dataset.assign_attrs(
        max_number_of_observations=max_number_of_observations)

    return ts_dataset
Ejemplo n.º 8
0
 def test_convert_box_as_point(self):
     expected_point = shapely.geometry.Point(12.8, -34.4)
     self.assertEqual(expected_point,
                      convert_geometry([12.8, -34.4, 12.8, -34.4]))
Ejemplo n.º 9
0
 def test_convert_null(self):
     self.assertIs(None, convert_geometry(None))
Ejemplo n.º 10
0
def get_time_series(cube: xr.Dataset,
                    geometry: GeometryLike = None,
                    var_names: Sequence[str] = None,
                    start_date: Date = None,
                    end_date: Date = None,
                    agg_methods: Union[str, Sequence[str],
                                       AbstractSet[str]] = AGG_MEAN,
                    include_count: bool = False,
                    include_stdev: bool = False,
                    use_groupby: bool = False,
                    cube_asserted: bool = False) -> Optional[xr.Dataset]:
    """
    Get a time series dataset from a data *cube*.

    *geometry* may be provided as a (shapely) geometry object, a valid GeoJSON object, a valid WKT string,
    a sequence of box coordinates (x1, y1, x2, y2), or point coordinates (x, y). If *geometry* covers an area,
    i.e. is not a point, the function aggregates the variables to compute a mean value and if desired,
    the number of valid observations and the standard deviation.

    *start_date* and *end_date* may be provided as a numpy.datetime64 or an ISO datetime string.

    Returns a time-series dataset whose data variables have a time dimension but no longer have spatial dimensions,
    hence the resulting dataset's variables will only have N-2 dimensions.
    A global attribute ``max_number_of_observations`` will be set to the maximum number of observations
    that could have been made in each time step.
    If the given *geometry* does not overlap the cube's boundaries, or if not output variables remain,
    the function returns ``None``.

    :param cube: The xcube dataset
    :param geometry: Optional geometry
    :param var_names: Optional sequence of names of variables to be included.
    :param start_date: Optional start date.
    :param end_date: Optional end date.
    :param agg_methods: Aggregation methods. May be single string or sequence of strings. Possible values are
           'mean', 'median', 'min', 'max', 'std', 'count'. Defaults to 'mean'.
           Ignored if geometry is a point.
    :param include_count: Deprecated. Whether to include the number of valid observations for each time step.
           Ignored if geometry is a point.
    :param include_stdev: Deprecated. Whether to include standard deviation for each time step.
           Ignored if geometry is a point.
    :param use_groupby: Use group-by operation. May increase or decrease runtime performance and/or memory consumption.
    :param cube_asserted:  If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset with time-series for each variable.
    """

    if not cube_asserted:
        assert_cube(cube)

    geometry = convert_geometry(geometry)

    agg_methods = normalize_agg_methods(agg_methods)
    if include_count:
        warnings.warn("keyword argument 'include_count' has been deprecated, "
                      f"use 'agg_methods=[{AGG_COUNT!r}, ...]' instead")
        agg_methods.add(AGG_COUNT)
    if include_stdev:
        warnings.warn("keyword argument 'include_stdev' has been deprecated, "
                      f"use 'agg_methods=[{AGG_STD!r}, ...]' instead")
        agg_methods.add(AGG_STD)

    dataset = select_variables_subset(cube, var_names)
    if len(dataset.data_vars) == 0:
        return None

    if start_date is not None or end_date is not None:
        # noinspection PyTypeChecker
        dataset = dataset.sel(time=slice(start_date, end_date))

    if isinstance(geometry, shapely.geometry.Point):
        bounds = get_dataset_geometry(dataset)
        if not bounds.contains(geometry):
            return None
        dataset = dataset.sel(lon=geometry.x, lat=geometry.y, method='Nearest')
        return dataset.assign_attrs(max_number_of_observations=1)

    if geometry is not None:
        dataset = mask_dataset_by_geometry(dataset,
                                           geometry,
                                           save_geometry_mask='__mask__')
        if dataset is None:
            return None
        mask = dataset['__mask__']
        max_number_of_observations = np.count_nonzero(mask)
        dataset = dataset.drop_vars(['__mask__'])
    else:
        max_number_of_observations = dataset.lat.size * dataset.lon.size

    must_load = len(agg_methods) > 1 or any(
        AGG_METHODS[agg_method] == MUST_LOAD for agg_method in agg_methods)
    if must_load:
        dataset.load()

    agg_datasets = []
    if use_groupby:
        time_group = dataset.groupby('time')
        for agg_method in agg_methods:
            method = getattr(time_group, agg_method)
            if agg_method == 'count':
                agg_dataset = method(dim=xr.ALL_DIMS)
            else:
                agg_dataset = method(dim=xr.ALL_DIMS, skipna=True)
            agg_datasets.append(agg_dataset)
    else:
        for agg_method in agg_methods:
            method = getattr(dataset, agg_method)
            if agg_method == 'count':
                agg_dataset = method(dim=('lat', 'lon'))
            else:
                agg_dataset = method(dim=('lat', 'lon'), skipna=True)
            agg_datasets.append(agg_dataset)

    agg_datasets = [
        agg_dataset.rename(name_dict=dict(
            {v: f"{v}_{agg_method}"
             for v in agg_dataset.data_vars}))
        for agg_method, agg_dataset in zip(agg_methods, agg_datasets)
    ]

    ts_dataset = xr.merge(agg_datasets)
    ts_dataset = ts_dataset.assign_attrs(
        max_number_of_observations=max_number_of_observations)

    return ts_dataset