Esempio n. 1
0
    def test_resample_f_all(self):
        resampled_cube = resample_in_time(self.input_cube, 'all', ['min', 'max'])
        self.assertIsNot(resampled_cube, self.input_cube)
        self.assertIn('time', resampled_cube)
        self.assertIn('temperature_min', resampled_cube)
        self.assertIn('temperature_max', resampled_cube)
        self.assertIn('precipitation_min', resampled_cube)
        self.assertIn('precipitation_max', resampled_cube)
        self.assertEqual(('time',), resampled_cube.time.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims)
        self.assertEqual((1,), resampled_cube.time.shape)
        self.assertEqual((1, 180, 360), resampled_cube.temperature_min.shape)
        self.assertEqual((1, 180, 360), resampled_cube.temperature_max.shape)
        self.assertEqual((1, 180, 360), resampled_cube.precipitation_min.shape)
        self.assertEqual((1, 180, 360), resampled_cube.precipitation_max.shape)
        np.testing.assert_allclose(resampled_cube.temperature_min.values[..., 0, 0],
                                   np.array([272.0]))
        np.testing.assert_allclose(resampled_cube.temperature_max.values[..., 0, 0],
                                   np.array([274.9]))
        np.testing.assert_allclose(resampled_cube.precipitation_min.values[..., 0, 0],
                                   np.array([114.2]))
        np.testing.assert_allclose(resampled_cube.precipitation_max.values[..., 0, 0],
                                   np.array([120.0]))

        schema = CubeSchema.new(resampled_cube)
        self.assertEqual(3, schema.ndim)
        self.assertEqual(('time', 'lat', 'lon'), schema.dims)
        self.assertEqual((1, 180, 360), schema.shape)
Esempio n. 2
0
 def test_resample_in_time_with_time_chunk_size(self):
     resampled_cube = resample_in_time(self.input_cube, '2D', ['min', 'max'], time_chunk_size=5)
     schema = CubeSchema.new(resampled_cube)
     self.assertEqual(3, schema.ndim)
     self.assertEqual(('time', 'lat', 'lon'), schema.dims)
     self.assertEqual((33, 180, 360), schema.shape)
     self.assertEqual((5, 90, 180), schema.chunks)
Esempio n. 3
0
 def test_repr_html(self):
     cube = new_cube(variables=dict(a=2, b=3, c=4))
     cube = cube.chunk(dict(time=1, lat=90, lon=90))
     schema = CubeSchema.new(cube)
     self.assertEqual("<table>"
                      "<tr><td>Shape:</td><td>(5, 180, 360)</td></tr>"
                      "<tr><td>Chunk sizes:</td><td>(1, 90, 90)</td></tr>"
                      "<tr><td>Dimensions:</td><td>('time', 'lat', 'lon')</td></tr>"
                      "</table>",
                      schema._repr_html_())
Esempio n. 4
0
    def test_resample_in_time_min_max(self):
        resampled_cube = resample_in_time(self.input_cube, '2W',
                                          ['min', 'max'])
        self.assertIsNot(resampled_cube, self.input_cube)
        self.assertIn('time', resampled_cube)
        self.assertIn('temperature_min', resampled_cube)
        self.assertIn('temperature_max', resampled_cube)
        self.assertIn('precipitation_min', resampled_cube)
        self.assertIn('precipitation_max', resampled_cube)
        self.assertEqual(('time', ), resampled_cube.time.dims)
        self.assertEqual(('time', 'lat', 'lon'),
                         resampled_cube.temperature_min.dims)
        self.assertEqual(('time', 'lat', 'lon'),
                         resampled_cube.temperature_max.dims)
        self.assertEqual(('time', 'lat', 'lon'),
                         resampled_cube.precipitation_min.dims)
        self.assertEqual(('time', 'lat', 'lon'),
                         resampled_cube.precipitation_max.dims)
        self.assertEqual((6, ), resampled_cube.time.shape)
        self.assertEqual((6, 180, 360), resampled_cube.temperature_min.shape)
        self.assertEqual((6, 180, 360), resampled_cube.temperature_max.shape)
        self.assertEqual((6, 180, 360), resampled_cube.precipitation_min.shape)
        self.assertEqual((6, 180, 360), resampled_cube.precipitation_max.shape)
        np.testing.assert_equal(
            resampled_cube.time.values,
            np.array([
                '2017-06-25T00:00:00', '2017-07-09T00:00:00',
                '2017-07-23T00:00:00', '2017-08-06T00:00:00',
                '2017-08-20T00:00:00', '2017-09-03T00:00:00'
            ],
                     dtype=np.datetime64))
        np.testing.assert_allclose(
            resampled_cube.temperature_min.values[..., 0, 0],
            np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9]))
        np.testing.assert_allclose(
            resampled_cube.temperature_max.values[..., 0, 0],
            np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9]))
        np.testing.assert_allclose(
            resampled_cube.precipitation_min.values[..., 0, 0],
            np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2]))
        np.testing.assert_allclose(
            resampled_cube.precipitation_max.values[..., 0, 0],
            np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2]))

        schema = CubeSchema.new(resampled_cube)
        self.assertEqual(3, schema.ndim)
        self.assertEqual(('time', 'lat', 'lon'), schema.dims)
        self.assertEqual((6, 180, 360), schema.shape)
        self.assertEqual((1, 90, 180), schema.chunks)
Esempio n. 5
0
    def test_without_inputs(self):
        calls = []

        def my_cube_func(
                input_params: Dict[str, Any] = None,
                dim_coords: Dict[str, np.ndarray] = None,
                dim_ranges: Dict[str, Tuple[int,
                                            int]] = None) -> CubeFuncOutput:
            nonlocal calls
            calls.append((input_params, dim_coords, dim_ranges))
            lon_range = dim_ranges['lon']
            lat_range = dim_ranges['lat']
            time_range = dim_ranges['time']
            n_lon = lon_range[1] - lon_range[0]
            n_lat = lat_range[1] - lat_range[0]
            n_time = time_range[1] - time_range[0]
            fill_value = input_params['fill_value']
            return np.full((n_time, n_lat, n_lon),
                           fill_value,
                           dtype=np.float64)

        output_cube = compute_cube(my_cube_func,
                                   input_cube_schema=CubeSchema.new(self.cube),
                                   input_params=dict(fill_value=0.74))

        self.assertIsInstance(output_cube, xr.Dataset)
        self.assertIn('output', output_cube.data_vars)
        output_var = output_cube.output
        self.assertEqual(0, len(calls))
        self.assertEqual(('time', 'lat', 'lon'), output_var.dims)
        self.assertEqual((6, 180, 360), output_var.shape)

        values = output_var.values
        self.assertEqual(2 * 2 * 4, len(calls))
        self.assertEqual((6, 180, 360), values.shape)
        self.assertAlmostEqual(0.74, values[0, 0, 0])
        self.assertAlmostEqual(0.74, values[-1, -1, -1])
Esempio n. 6
0
    def test_constructor_with_invalid_args(self):
        cube = new_cube(variables=dict(t=273))
        schema = CubeSchema.new(cube)
        with self.assertRaises(ValueError) as cm:
            # noinspection PyTypeChecker
            CubeSchema(None, schema.coords)
        self.assertEqual('shape must be a sequence of integer sizes',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            # noinspection PyTypeChecker
            CubeSchema(schema.shape, None)
        self.assertEqual('coords must be a mapping from dimension names to label arrays',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            # noinspection PyTypeChecker
            CubeSchema(schema.shape, cube.coords, x_name=None)
        self.assertEqual('x_name must be given',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            # noinspection PyTypeChecker
            CubeSchema(schema.shape, cube.coords, y_name=None)
        self.assertEqual('y_name must be given',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            # noinspection PyTypeChecker
            CubeSchema(schema.shape, cube.coords, time_name=None)
        self.assertEqual('time_name must be given',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            CubeSchema(schema.shape[1:], schema.coords)
        self.assertEqual('shape must have at least three dimensions',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            CubeSchema(schema.shape, schema.coords, dims=('lat', 'lon'))
        self.assertEqual('dims must have same length as shape',
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            CubeSchema(schema.shape, schema.coords, dims=('lat', 'lon', 'time'))
        self.assertEqual("the first dimension in dims must be 'time'",
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            CubeSchema(schema.shape, schema.coords, dims=('time', 'lon', 'lat'))
        self.assertEqual("the last two dimensions in dims must be 'lat' and 'lon'",
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            CubeSchema(schema.shape, schema.coords, dims=schema.dims, chunks=(90, 90))
        self.assertEqual("chunks must have same length as shape",
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            coords = dict(schema.coords)
            del coords['lat']
            CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90))
        self.assertEqual("missing variables 'lon', 'lat', 'time' in coords",
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            coords = dict(schema.coords)
            lat = coords['lat']
            coords['lat'] = xr.DataArray(lat.values.reshape((1, len(lat))), dims=('b', lat.dims[0]), attrs=lat.attrs)
            CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90))
        self.assertEqual("variables 'lon', 'lat', 'time' in coords must be 1-D",
                         f'{cm.exception}')

        with self.assertRaises(ValueError) as cm:
            coords = dict(schema.coords)
            lat = coords['lat']
            coords['lat'] = xr.DataArray(lat.values[1:], dims=('lat',), attrs=lat.attrs)
            CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90))
        self.assertEqual("number of labels of 'lat' in coords does not match shape",
                         f'{cm.exception}')
Esempio n. 7
0
 def test_new_chunked(self):
     cube = new_cube(variables=dict(a=2, b=3, c=4))
     cube = cube.chunk(dict(time=1, lat=90, lon=90))
     schema = CubeSchema.new(cube)
     self._assert_schema(schema, expected_shape=cube.a.shape, expected_chunks=(1, 90, 90))
Esempio n. 8
0
    def test_new_with_cube(self):
        cube = new_cube()
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("cube is empty",
                         f'{cm.exception}')

        cube = new_cube()
        del cube.coords['lon']
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("cube has no valid spatial coordinate variables",
                         f'{cm.exception}')

        cube = new_cube()
        del cube.coords['time']
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("cube has no valid time coordinate variable",
                         f'{cm.exception}')

        cube = new_cube(variables=dict(a=1, b=2))
        cube['c'] = xr.DataArray(np.array([1, 2, 3, 4, 5]), dims=('q',))
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("all variables must have same dimensions, but variable 'c' has dimensions ('q',)",
                         f'{cm.exception}')

        cube = new_cube(variables=dict(a=1, b=2))
        cube = cube.chunk(dict(time=1, lat=90, lon=90))
        cube['b'] = cube['b'].chunk(dict(time=1, lat=45, lon=90))
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("all variables must have same chunks, but variable 'b' has chunks (1, 45, 90)",
                         f'{cm.exception}')

        cube = new_cube(variables=dict(a=1, b=2))
        cube = cube.chunk(dict(time=1, lat=(44, 43, 46, 47), lon=90))
        with self.assertRaises(ValueError) as cm:
            CubeSchema.new(cube)
        self.assertEqual("dimension 'lat' of variable 'a' has chunks of different sizes: (44, 43, 46, 47)",
                         f'{cm.exception}')
Esempio n. 9
0
 def test_new(self):
     cube = new_cube(variables=dict(a=2, b=3, c=4))
     schema = CubeSchema.new(cube)
     self._assert_schema(schema, expected_shape=cube.a.shape)
Esempio n. 10
0
def resample_in_time(dataset: xr.Dataset,
                     frequency: str,
                     method: Union[str, Sequence[str]],
                     offset=None,
                     base: int = 0,
                     tolerance=None,
                     interp_kind=None,
                     time_chunk_size=None,
                     var_names: Sequence[str] = None,
                     metadata: Dict[str, Any] = None,
                     cube_asserted: bool = False) -> xr.Dataset:
    """
    Resample a dataset in the time dimension.

    The argument *method* may be one or a sequence of
    ``'all'``, ``'any'``,
    ``'argmax'``, ``'argmin'``, ``'count'``,
    ``'first'``, ``'last'``,
    ``'max'``, ``'min'``, ``'mean'``, ``'median'``,
    ``'percentile_<p>'``,
    ``'std'``, ``'sum'``, ``'var'``.

    In value ``'percentile_<p>'`` is a placeholder,
    where ``'<p>'`` must be replaced by an integer percentage
    value, e.g. ``'percentile_90'`` is the 90%-percentile.

    *Important note:* As of xarray 0.14 and dask 2.8, the
    methods ``'median'`` and ``'percentile_<p>'` cannot be
    used if the variables in *cube* comprise chunked dask arrays.
    In this case, use the ``compute()`` or ``load()`` method
    to convert dask arrays into numpy arrays.

    :param dataset: The xcube dataset.
    :param frequency: Temporal aggregation frequency.
        Use format "<count><offset>" where <offset> is one of
        'H', 'D', 'W', 'M', 'Q', 'Y'.
    :param method: Resampling method or sequence of
        resampling methods.
    :param offset: Offset used to adjust the resampled time labels.
        Uses same syntax as *frequency*.
    :param base: For frequencies that evenly subdivide 1 day,
        the "origin" of the aggregated intervals. For example,
        for '24H' frequency, base could range from 0 through 23.
    :param time_chunk_size: If not None, the chunk size to be
        used for the "time" dimension.
    :param var_names: Variable names to include.
    :param tolerance: Time tolerance for selective
        upsampling methods. Defaults to *frequency*.
    :param interp_kind: Kind of interpolation
        if *method* is 'interpolation'.
    :param metadata: Output metadata.
    :param cube_asserted: If False, *cube* will be verified,
        otherwise it is expected to be a valid cube.
    :return: A new xcube dataset resampled in time.
    """
    if not cube_asserted:
        assert_cube(dataset)

    if frequency == 'all':
        time_gap = np.array(dataset.time[-1]) - np.array(dataset.time[0])
        days = int((np.timedelta64(time_gap, 'D') / np.timedelta64(1, 'D')) +
                   1)
        frequency = f'{days}D'

    if var_names:
        dataset = select_variables_subset(dataset, var_names)

    resampler = dataset.resample(skipna=True,
                                 closed='left',
                                 label='left',
                                 time=frequency,
                                 loffset=offset,
                                 base=base)

    if isinstance(method, str):
        methods = [method]
    else:
        methods = list(method)

    percentile_prefix = 'percentile_'

    resampled_cubes = []
    for method in methods:
        method_args = []
        method_postfix = method
        if method.startswith(percentile_prefix):
            p = int(method[len(percentile_prefix):])
            q = p / 100.0
            method_args = [q]
            method_postfix = f'p{p}'
            method = 'quantile'
        resampling_method = getattr(resampler, method)
        method_kwargs = get_method_kwargs(method, frequency, interp_kind,
                                          tolerance)
        resampled_cube = resampling_method(*method_args, **method_kwargs)
        resampled_cube = resampled_cube.rename({
            var_name: f'{var_name}_{method_postfix}'
            for var_name in resampled_cube.data_vars
        })
        resampled_cubes.append(resampled_cube)

    if len(resampled_cubes) == 1:
        resampled_cube = resampled_cubes[0]
    else:
        resampled_cube = xr.merge(resampled_cubes)

    # TODO: add time_bnds to resampled_ds
    time_coverage_start = '%s' % dataset.time[0]
    time_coverage_end = '%s' % dataset.time[-1]

    resampled_cube.attrs.update(metadata or {})
    # TODO: add other time_coverage_ attributes
    resampled_cube.attrs.update(time_coverage_start=time_coverage_start,
                                time_coverage_end=time_coverage_end)

    schema = CubeSchema.new(dataset)
    chunk_sizes = {
        schema.dims[i]: schema.chunks[i]
        for i in range(schema.ndim)
    }

    if isinstance(time_chunk_size, int) and time_chunk_size >= 0:
        chunk_sizes['time'] = time_chunk_size

    return resampled_cube.chunk(chunk_sizes)
Esempio n. 11
0
def tile(cube: str,
         variables: Optional[str],
         labels: Optional[str],
         tile_size: Optional[str],
         config_path: Optional[str],
         style_id: Optional[str],
         output_path: Optional[str],
         verbose: List[bool],
         dry_run: bool):
    """
    Create RGBA tiles from CUBE.

    Color bars and value ranges for variables can be specified in a CONFIG file.
    Here the color mappings are defined for a style named "ocean_color":

    \b
    Styles:
      - Identifier: ocean_color
        ColorMappings:
          conc_chl:
            ColorBar: "plasma"
            ValueRange: [0., 24.]
          conc_tsm:
            ColorBar: "PuBuGn"
            ValueRange: [0., 100.]
          kd489:
            ColorBar: "jet"
            ValueRange: [0., 6.]

    This is the same styles syntax as the configuration file for "xcube serve",
    hence its configuration can be reused.

    """
    import fractions
    import itertools
    import json
    import os.path
    # noinspection PyPackageRequirements
    import yaml
    import xarray as xr
    import numpy as np

    from xcube.core.mldataset import open_ml_dataset
    from xcube.core.mldataset import MultiLevelDataset
    from xcube.core.schema import CubeSchema
    from xcube.core.tile import get_ml_dataset_tile
    from xcube.core.tile import get_var_valid_range
    from xcube.core.tile import get_var_cmap_params
    from xcube.core.tile import parse_non_spatial_labels
    from xcube.core.select import select_variables_subset
    from xcube.cli.common import parse_cli_kwargs
    from xcube.cli.common import parse_cli_sequence
    from xcube.cli.common import assert_positive_int_item
    from xcube.util.tilegrid import TileGrid
    from xcube.util.tiledimage import DEFAULT_COLOR_MAP_NUM_COLORS

    # noinspection PyShadowingNames
    def write_tile_map_resource(path: str,
                                resolutions: List[fractions.Fraction],
                                tile_grid: TileGrid,
                                title='',
                                abstract='',
                                srs='CRS:84'):
        num_levels = len(resolutions)
        z_and_upp = zip(range(num_levels), map(float, resolutions))
        x1, y1, x2, y2 = tile_grid.geo_extent
        xml = [f'<TileMap version="1.0.0" tilemapservice="http://tms.osgeo.org/1.0.0">',
               f'  <Title>{title}</Title>',
               f'  <Abstract>{abstract}</Abstract>',
               f'  <SRS>{srs}</SRS>',
               f'  <BoundingBox minx="{x1}" miny="{y1}" maxx="{x2}" maxy="{y2}"/>',
               f'  <Origin x="{x1}" y="{y1}"/>',
               f'  <TileFormat width="{tile_grid.tile_width}" height="{tile_grid.tile_height}"'
               f' mime-type="image/png" extension="png"/>',
               f'  <TileSets profile="local">'] + [
                  f'    <TileSet href="{z}" order="{z}" units-per-pixel="{upp}"/>' for z, upp in z_and_upp] + [
                  f'  </TileSets>',
                  f'</TileMap>']
        with open(path, 'w') as fp:
            fp.write('\n'.join(xml))

    # noinspection PyShadowingNames
    def _convert_coord_var(coord_var: xr.DataArray):
        values = coord_var.values
        if np.issubdtype(values.dtype, np.datetime64):
            return list(np.datetime_as_string(values, timezone='UTC'))
        elif np.issubdtype(values.dtype, np.integer):
            return [int(value) for value in values]
        else:
            return [float(value) for value in values]

    # noinspection PyShadowingNames
    def _get_color_mappings(ml_dataset: MultiLevelDataset,
                            var_name: str,
                            config: Mapping[str, Any],
                            style_id: str):
        cmap_name = None
        cmap_range = None, None

        if config:
            style_id = style_id or 'default'
            styles = config.get('Styles')
            if styles:
                color_mappings = None
                for style in styles:
                    if style.get('Identifier') == style_id:
                        color_mappings = style.get('ColorMappings')
                        break
                if color_mappings:
                    color_mapping = color_mappings.get(var_name)
                    if color_mapping:
                        cmap_name = color_mapping.get('ColorBar')
                        cmap_vmin, cmap_vmax = color_mapping.get('ValueRange', (None, None))
                        cmap_range = cmap_vmin, cmap_vmax

        if cmap_name is not None and None not in cmap_range:
            return cmap_name, cmap_range
        var = ml_dataset.base_dataset[var_name]
        valid_range = get_var_valid_range(var)
        return get_var_cmap_params(var, cmap_name, cmap_range, valid_range)

    variables = parse_cli_sequence(variables, metavar='VARIABLES', num_items_min=1,
                                   item_plural_name='variables')

    tile_size = parse_cli_sequence(tile_size, num_items=2, metavar='TILE_SIZE',
                                   item_parser=int,
                                   item_validator=assert_positive_int_item,
                                   item_plural_name='tile sizes')

    labels = parse_cli_kwargs(labels, metavar='LABELS')

    verbosity = len(verbose)

    config = {}
    if config_path:
        if verbosity:
            print(f'Opening {config_path}...')
        with open(config_path, 'r') as fp:
            config = yaml.safe_load(fp)

    if verbosity:
        print(f'Opening {cube}...')

    ml_dataset = open_ml_dataset(cube, chunks='auto')
    tile_grid = ml_dataset.tile_grid
    base_dataset = ml_dataset.base_dataset
    schema = CubeSchema.new(base_dataset)
    spatial_dims = schema.x_dim, schema.y_dim

    if tile_size:
        tile_width, tile_height = tile_size
    else:
        if verbosity:
            print(f'Warning: using default tile sizes derived from CUBE')
        tile_width, tile_height = tile_grid.tile_width, tile_grid.tile_height

    indexers = None
    if labels:
        indexers = parse_non_spatial_labels(labels,
                                            schema.dims,
                                            schema.coords,
                                            allow_slices=True,
                                            exception_type=click.ClickException)

    def transform(ds: xr.Dataset) -> xr.Dataset:
        if variables:
            ds = select_variables_subset(ds, var_names=variables)
        if indexers:
            ds = ds.sel(**indexers)
        chunk_sizes = {dim: 1 for dim in ds.dims}
        chunk_sizes[spatial_dims[0]] = tile_width
        chunk_sizes[spatial_dims[1]] = tile_height
        return ds.chunk(chunk_sizes)

    ml_dataset = ml_dataset.apply(transform)
    tile_grid = ml_dataset.tile_grid
    base_dataset = ml_dataset.base_dataset
    schema = CubeSchema.new(base_dataset)
    spatial_dims = schema.x_dim, schema.y_dim

    x1, _, x2, _ = tile_grid.geo_extent
    num_levels = tile_grid.num_levels
    resolutions = [fractions.Fraction(fractions.Fraction(x2 - x1), tile_grid.width(z))
                   for z in range(num_levels)]

    if verbosity:
        print(f'Writing tile sets...')
        print(f'  Zoom levels: {num_levels}')
        print(f'  Resolutions: {", ".join(map(str, resolutions))} units/pixel')
        print(f'  Tile size:   {tile_width} x {tile_height} pixels')

    image_cache = {}

    for var_name, var in base_dataset.data_vars.items():
        color_bar, (value_min, value_max) = _get_color_mappings(ml_dataset, str(var_name), config, style_id)

        label_names = []
        label_indexes = []
        for dim in var.dims:
            if dim not in spatial_dims:
                label_names.append(dim)
                label_indexes.append(list(range(var[dim].size)))

        var_path = os.path.join(output_path, str(var_name))
        metadata_path = os.path.join(var_path, 'metadata.json')
        metadata = dict(name=str(var_name),
                        attrs={name: value
                               for name, value in var.attrs.items()},
                        dims=[str(dim)
                              for dim in var.dims],
                        dim_sizes={dim: int(var[dim].size)
                                   for dim in var.dims},
                        color_mapping=dict(color_bar=color_bar,
                                           value_min=value_min,
                                           value_max=value_max,
                                           num_colors=DEFAULT_COLOR_MAP_NUM_COLORS),
                        coordinates={name: _convert_coord_var(coord_var)
                                     for name, coord_var in var.coords.items() if coord_var.ndim == 1})
        if verbosity:
            print(f'Writing {metadata_path}')
        if not dry_run:
            os.makedirs(var_path, exist_ok=True)
            with open(metadata_path, 'w') as fp:
                json.dump(metadata, fp, indent=2)

        for label_index in itertools.product(*label_indexes):
            labels = {name: index for name, index in zip(label_names, label_index)}
            tilemap_path = os.path.join(var_path, *[str(l) for l in label_index])
            tilemap_resource_path = os.path.join(tilemap_path, 'tilemapresource.xml')
            if verbosity > 1:
                print(f'Writing {tilemap_resource_path}')
            if not dry_run:
                os.makedirs(tilemap_path, exist_ok=True)
                write_tile_map_resource(tilemap_resource_path, resolutions, tile_grid, title=f'{var_name}')
            for z in range(num_levels):
                num_tiles_x = tile_grid.num_tiles_x(z)
                num_tiles_y = tile_grid.num_tiles_y(z)
                tile_z_path = os.path.join(tilemap_path, str(z))
                if not dry_run and not os.path.exists(tile_z_path):
                    os.mkdir(tile_z_path)
                for x in range(num_tiles_x):
                    tile_zx_path = os.path.join(tile_z_path, str(x))
                    if not dry_run and not os.path.exists(tile_zx_path):
                        os.mkdir(tile_zx_path)
                    for y in range(num_tiles_y):
                        tile_bytes = get_ml_dataset_tile(ml_dataset,
                                                         str(var_name),
                                                         x, y, z,
                                                         labels=labels,
                                                         labels_are_indices=True,
                                                         cmap_name=color_bar,
                                                         cmap_range=(value_min, value_max),
                                                         image_cache=image_cache,
                                                         trace_perf=True,
                                                         exception_type=click.ClickException)
                        tile_path = os.path.join(tile_zx_path, f'{num_tiles_y - 1 - y}.png')
                        if verbosity > 2:
                            print(f'Writing tile {tile_path}')
                        if not dry_run:
                            with open(tile_path, 'wb') as fp:
                                fp.write(tile_bytes)

    print(f'Done writing tile sets.')
Esempio n. 12
0
def compute_dataset(cube_func: CubeFunc,
                    *input_cubes: xr.Dataset,
                    input_cube_schema: CubeSchema = None,
                    input_var_names: Sequence[str] = None,
                    input_params: Dict[str, Any] = None,
                    output_var_name: str = 'output',
                    output_var_dims: AbstractSet[str] = None,
                    output_var_dtype: Any = np.float64,
                    output_var_attrs: Dict[str, Any] = None,
                    vectorize: bool = None,
                    cube_asserted: bool = False) -> xr.Dataset:
    """
    Compute a new output dataset with a single variable named *output_var_name*
    from variables named *input_var_names* contained in zero, one, or more
    input data cubes in *input_cubes* using a cube factory function *cube_func*.

    *cube_func* is called concurrently for each of the chunks of the input variables.
    It is expected to return a chunk block whith is type ``np.ndarray``.

    If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*.
    If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema*
    must be given, so that a new cube can be created.

    The full signature of *cube_func* is:::

        def cube_func(*input_vars: np.ndarray,
                      input_params: Dict[str, Any] = None,
                      dim_coords: Dict[str, np.ndarray] = None,
                      dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray:
            pass

    The arguments are:

    * ``input_vars``: the variables according to the given *input_var_names*;
    * ``input_params``: is this call's *input_params*, a mapping from parameter name to value;
    * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays;
    * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges.

    Only the ``input_vars`` argument is mandatory. The keyword arguments
    ``input_params``, ``input_params``, ``input_params`` do need to be present at all.

    *output_var_dims* my be given in the case, where ...
    TODO: describe new output_var_dims...

    :param cube_func: The cube factory function.
    :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not.
    :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not.
    :param input_var_names: A sequence of variable names
    :param input_params: Optional dictionary with processing parameters passed to *cube_func*.
    :param output_var_name: Optional name of the output variable, defaults to ``'output'``.
    :param output_var_dims: Optional set of names of the output dimensions,
        used in the case *cube_func* reduces dimensions.
    :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``.
    :param output_var_attrs: Optional metadata attributes for the output variable.
    :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors
        to *cube_func*. Not implemented yet.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset that contains the computed output variable.
    """
    if vectorize is not None:
        # TODO: support vectorize = all cubes have same variables and cube_func
        #       receives variables as vectors (with extra dim)
        raise NotImplementedError('vectorize is not supported yet')

    if not cube_asserted:
        for cube in input_cubes:
            assert_cube(cube)

    # Check compatibility of inputs
    if input_cubes:
        input_cube_schema = CubeSchema.new(input_cubes[0])
        for cube in input_cubes:
            if not cube_asserted:
                assert_cube(cube)
            if cube != input_cubes[0]:
                # noinspection PyUnusedLocal
                other_schema = CubeSchema.new(cube)
                # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks
    elif input_cube_schema is None:
        raise ValueError('input_cube_schema must be given')

    output_var_name = output_var_name or 'output'

    # Collect named input variables, raise if not found
    input_var_names = input_var_names or []
    input_vars = []
    for var_name in input_var_names:
        input_var = None
        for cube in input_cubes:
            if var_name in cube.data_vars:
                input_var = cube[var_name]
                break
        if input_var is None:
            raise ValueError(
                f'variable {var_name!r} not found in any of cubes')
        input_vars.append(input_var)

    # Find out, if cube_func uses any of _PREDEFINED_KEYWORDS
    has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func(
        cube_func, input_var_names)

    def cube_func_wrapper(index_chunk, *input_var_chunks):
        nonlocal input_cube_schema, input_var_names, input_params, input_vars
        nonlocal has_input_params, has_dim_coords, has_dim_ranges

        # Note, xarray.apply_ufunc does a test call with empty input arrays,
        # so index_chunk.size == 0 is a valid case
        empty_call = index_chunk.size == 0

        # TODO: when output_var_dims is given, index_chunk must be reordered
        #   as core dimensions are moved to the and of index_chunk and input_var_chunks
        if not empty_call:
            index_chunk = index_chunk.ravel()

        if index_chunk.size < 2 * input_cube_schema.ndim:
            if not empty_call:
                warnings.warn(
                    f"unexpected index_chunk of size {index_chunk.size} received!"
                )
                return None

        dim_ranges = None
        if has_dim_ranges or has_dim_coords:
            dim_ranges = {}
            for i in range(input_cube_schema.ndim):
                dim_name = input_cube_schema.dims[i]
                if not empty_call:
                    start = int(index_chunk[2 * i + 0])
                    end = int(index_chunk[2 * i + 1])
                    dim_ranges[dim_name] = start, end
                else:
                    dim_ranges[dim_name] = ()

        dim_coords = None
        if has_dim_coords:
            dim_coords = {}
            for coord_var_name, coord_var in input_cube_schema.coords.items():
                coord_slices = [slice(None)] * coord_var.ndim
                for i in range(input_cube_schema.ndim):
                    dim_name = input_cube_schema.dims[i]
                    if dim_name in coord_var.dims:
                        j = coord_var.dims.index(dim_name)
                        coord_slices[j] = slice(*dim_ranges[dim_name])
                dim_coords[coord_var_name] = coord_var[tuple(
                    coord_slices)].values

        kwargs = {}
        if has_input_params:
            kwargs['input_params'] = input_params
        if has_dim_ranges:
            kwargs['dim_ranges'] = dim_ranges
        if has_dim_coords:
            kwargs['dim_coords'] = dim_coords

        return cube_func(*input_var_chunks, **kwargs)

    index_var = _gen_index_var(input_cube_schema)

    all_input_vars = [index_var] + input_vars

    input_core_dims = None
    if output_var_dims:
        input_core_dims = []
        has_warned = False
        for i in range(len(all_input_vars)):
            input_var = all_input_vars[i]
            var_core_dims = [
                dim for dim in input_var.dims if dim not in output_var_dims
            ]
            must_rechunk = False
            if var_core_dims and input_var.chunks:
                for var_core_dim in var_core_dims:
                    dim_index = input_var.dims.index(var_core_dim)
                    dim_chunk_size = input_var.chunks[dim_index][0]
                    dim_shape_size = input_var.shape[dim_index]
                    if dim_chunk_size != dim_shape_size:
                        must_rechunk = True
                        break
            if must_rechunk:
                if not has_warned:
                    warnings.warn(
                        f'Input variables must not be chunked in dimension(s): {", ".join(var_core_dims)}.\n'
                        f'Rechunking applies, which may drastically decrease runtime performance '
                        f'and increase memory usage.')
                    has_warned = True
                all_input_vars[i] = input_var.chunk(
                    {var_core_dim: -1
                     for var_core_dim in var_core_dims})
            input_core_dims.append(var_core_dims)

    output_var = xr.apply_ufunc(cube_func_wrapper,
                                *all_input_vars,
                                dask='parallelized',
                                input_core_dims=input_core_dims,
                                output_dtypes=[output_var_dtype])
    if output_var_attrs:
        output_var.attrs.update(output_var_attrs)
    return xr.Dataset({output_var_name: output_var},
                      coords=input_cube_schema.coords)
Esempio n. 13
0
def compute_cube(cube_func: CubeFunc,
                 *input_cubes: xr.Dataset,
                 input_cube_schema: CubeSchema = None,
                 input_var_names: Sequence[str] = None,
                 input_params: Dict[str, Any] = None,
                 output_var_name: str = 'output',
                 output_var_dtype: Any = np.float64,
                 output_var_attrs: Dict[str, Any] = None,
                 vectorize: bool = None,
                 cube_asserted: bool = False) -> xr.Dataset:
    """
    Compute a new output data cube with a single variable named *output_var_name*
    from variables named *input_var_names* contained in zero, one, or more
    input data cubes in *input_cubes* using a cube factory function *cube_func*.

    *cube_func* is called concurrently for each of the chunks of the input variables.
    It is expected to return a chunk block whith is type ``np.ndarray``.

    If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*.
    If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema*
    must be given, so that a new cube can be created.

    The full signature of *cube_func* is:::

        def cube_func(*input_vars: np.ndarray,
                      input_params: Dict[str, Any] = None,
                      dim_coords: Dict[str, np.ndarray] = None,
                      dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray:
            pass

    The arguments are:

    * ``input_vars``: the variables according to the given *input_var_names*;
    * ``input_params``: is this call's *input_params*, a mapping from parameter name to value;
    * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays;
    * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges.

    Only the ``input_vars`` argument is mandatory. The keyword arguments
    ``input_params``, ``input_params``, ``input_params`` do need to be present at all.

    :param cube_func: The cube factory function.
    :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not.
    :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not.
    :param input_var_names: A sequence of variable names
    :param input_params: Optional dictionary with processing parameters passed to *cube_func*.
    :param output_var_name: Optional name of the output variable, defaults to ``'output'``.
    :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``.
    :param output_var_attrs: Optional metadata attributes for the output variable.
    :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors
        to *cube_func*. Not implemented yet.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset that contains the computed output variable.
    """
    if vectorize is not None:
        raise NotImplementedError('vectorize is not supported yet')

    if not cube_asserted:
        for cube in input_cubes:
            assert_cube(cube)

    if input_cubes:
        input_cube_schema = CubeSchema.new(input_cubes[0])
        for cube in input_cubes:
            if not cube_asserted:
                assert_cube(cube)
            if cube != input_cubes[0]:
                # noinspection PyUnusedLocal
                other_schema = CubeSchema.new(cube)
                # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks
    elif input_cube_schema is None:
        raise ValueError('input_cube_schema must be given')

    if output_var_name is None:
        output_var_name = 'output'

    input_var_names = input_var_names or []
    input_vars = []
    for var_name in input_var_names:
        var = None
        for cube in input_cubes:
            if var_name in cube.data_vars:
                var = cube[var_name]
                break
        if var is None:
            raise ValueError(f'variable {var_name!r} not found in any of cubes')
        input_vars.append(var)

    has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func(cube_func, input_var_names)

    def cube_func_wrapper(index_chunk, *input_var_chunks):
        nonlocal input_cube_schema, input_var_names, input_params, input_vars
        nonlocal has_input_params, has_dim_coords, has_dim_ranges

        index_chunk = index_chunk.ravel()

        if index_chunk.size < 2 * input_cube_schema.ndim:
            warnings.warn(f"weird index_chunk of size {index_chunk.size} received!")
            return

        dim_ranges = None
        if has_dim_ranges or has_dim_coords:
            dim_ranges = {}
            for i in range(input_cube_schema.ndim):
                dim_name = input_cube_schema.dims[i]
                start = int(index_chunk[2 * i + 0])
                end = int(index_chunk[2 * i + 1])
                dim_ranges[dim_name] = start, end

        dim_coords = None
        if has_dim_coords:
            dim_coords = {}
            for coord_var_name, coord_var in input_cube_schema.coords.items():
                coord_slices = [slice(None)] * coord_var.ndim
                for i in range(input_cube_schema.ndim):
                    dim_name = input_cube_schema.dims[i]
                    if dim_name in coord_var.dims:
                        j = coord_var.dims.index(dim_name)
                        coord_slices[j] = slice(*dim_ranges[dim_name])
                dim_coords[coord_var_name] = coord_var[tuple(coord_slices)].values

        kwargs = {}
        if has_input_params:
            kwargs['input_params'] = input_params
        if has_dim_ranges:
            kwargs['dim_ranges'] = dim_ranges
        if has_dim_coords:
            kwargs['dim_coords'] = dim_coords

        return cube_func(*input_var_chunks, **kwargs)

    index_var = _gen_index_var(input_cube_schema)

    output_var = xr.apply_ufunc(cube_func_wrapper,
                                index_var,
                                *input_vars,
                                dask='parallelized',
                                output_dtypes=[output_var_dtype])
    if output_var_attrs:
        output_var.attrs.update(output_var_attrs)
    return xr.Dataset({output_var_name: output_var}, coords=input_cube_schema.coords)
Esempio n. 14
0
def resample_in_time(cube: xr.Dataset,
                     frequency: str,
                     method: Union[str, Sequence[str]],
                     offset=None,
                     base: int = 0,
                     tolerance=None,
                     interp_kind=None,
                     time_chunk_size=None,
                     var_names: Sequence[str] = None,
                     metadata: Dict[str, Any] = None,
                     cube_asserted: bool = False) -> xr.Dataset:
    """
    Resample a xcube dataset in the time dimension.

    :param cube: The xcube dataset.
    :param frequency: Temporal aggregation frequency. Use format "<count><offset>"
        "where <offset> is one of 'H', 'D', 'W', 'M', 'Q', 'Y'.
    :param method: Resampling method or sequence of resampling methods.
    :param offset: Offset used to adjust the resampled time labels.
        Uses same syntax as *frequency*.
    :param base: For frequencies that evenly subdivide 1 day, the "origin" of the
        aggregated intervals. For example, for '24H' frequency, base could range from 0 through 23.
    :param time_chunk_size: If not None, the chunk size to be used for the "time" dimension.
    :param var_names: Variable names to include.
    :param tolerance: Time tolerance for selective upsampling methods. Defaults to *frequency*.
    :param interp_kind: Kind of interpolation if *method* is 'interpolation'.
    :param metadata: Output metadata.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new xcube dataset resampled in time.
    """
    if not cube_asserted:
        assert_cube(cube)

    if var_names:
        cube = select_vars(cube, var_names)

    resampler = cube.resample(skipna=True,
                              closed='left',
                              label='left',
                              keep_attrs=True,
                              time=frequency,
                              loffset=offset,
                              base=base)

    if isinstance(method, str):
        methods = [method]
    else:
        methods = list(method)

    resampled_cubes = []
    for method in methods:
        resampling_method = getattr(resampler, method)
        kwargs = get_method_kwargs(method, frequency, interp_kind, tolerance)
        resampled_cube = resampling_method(**kwargs)
        resampled_cube = resampled_cube.rename(
            {var_name: f'{var_name}_{method}' for var_name in resampled_cube.data_vars})
        resampled_cubes.append(resampled_cube)

    if len(resampled_cubes) == 1:
        resampled_cube = resampled_cubes[0]
    else:
        resampled_cube = xr.merge(resampled_cubes)

    # TODO: add time_bnds to resampled_ds
    time_coverage_start = '%s' % cube.time[0]
    time_coverage_end = '%s' % cube.time[-1]

    resampled_cube.attrs.update(metadata or {})
    # TODO: add other time_coverage_ attributes
    resampled_cube.attrs.update(time_coverage_start=time_coverage_start,
                                time_coverage_end=time_coverage_end)

    schema = CubeSchema.new(cube)
    chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)}

    if isinstance(time_chunk_size, int) and time_chunk_size >= 0:
        chunk_sizes['time'] = time_chunk_size

    return resampled_cube.chunk(chunk_sizes)