Ejemplo n.º 1
0
def vars2dim(cube, variable, dim_name, output=None, format=None):
    """
    Convert cube variables into new dimension.
    Moves all variables of CUBE into into a single new variable <var-name>
    with a new dimension DIM-NAME and writes the results to OUTPUT.
    """

    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset, write_dataset
    from xcube.core.vars2dim import vars_to_dim
    import os

    if not output:
        dirname = os.path.dirname(cube)
        basename = os.path.basename(cube)
        basename, ext = os.path.splitext(basename)
        output = os.path.join(dirname, basename + '-vars2dim' + ext)

    format_name = format if format else guess_dataset_format(output)

    with open_dataset(input_path=cube) as ds:
        converted_dataset = vars_to_dim(ds,
                                        dim_name=dim_name,
                                        var_name=variable)
        write_dataset(converted_dataset,
                      output_path=output,
                      format_name=format_name)
Ejemplo n.º 2
0
    def test_write_dataset(self):

        dataset = new_cube()
        try:
            write_dataset(dataset, TEST_NC_FILE_2)
            self.assertTrue(os.path.isfile(TEST_NC_FILE_2))
        finally:
            if os.path.isfile(TEST_NC_FILE_2):
                os.remove(TEST_NC_FILE_2)
Ejemplo n.º 3
0
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]],
             var_names: Optional[Sequence[str]], output_path: str,
             output_format: Optional[str], output_size: Optional[Tuple[int,
                                                                       int]],
             output_tile_size: Optional[Tuple[int, int]],
             output_point: Optional[Tuple[float,
                                          float]], output_res: Optional[float],
             delta: float, dry_run: bool, monitor):
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset
    from xcube.core.dsio import write_dataset
    from xcube.core.rectify import rectify_dataset
    from xcube.core.rectify import ImageGeom
    from xcube.core.sentinel3 import is_sentinel3_product
    from xcube.core.sentinel3 import open_sentinel3_product

    if not output_format:
        output_format = guess_dataset_format(output_path)

    output_geom = None
    if output_size is not None and output_point is not None and output_res is not None:
        output_geom = ImageGeom(size=output_size,
                                x_min=output_point[0],
                                y_min=output_point[1],
                                xy_res=output_res)
    elif output_size is not None or output_point is not None or output_res is not None:
        raise click.ClickException(
            'SIZE, POINT, and RES must all be given or none of them.')

    monitor(f'Opening dataset from {input_path!r}...')

    if is_sentinel3_product(input_path):
        src_ds = open_sentinel3_product(input_path)
    else:
        src_ds = open_dataset(input_path)

    monitor('Rectifying...')
    reproj_ds = rectify_dataset(src_ds,
                                xy_names=xy_names,
                                var_names=var_names,
                                output_geom=output_geom,
                                tile_size=output_tile_size,
                                uv_delta=delta)

    if reproj_ds is None:
        monitor(
            f'Dataset {input_path} does not seem to have an intersection with bounding box'
        )
        return

    monitor(f'Writing rectified dataset to {output_path!r}...')
    if not dry_run:
        write_dataset(reproj_ds, output_path, output_format)

    monitor(f'Done.')
Ejemplo n.º 4
0
    def _write_test_cube(cls):
        s3_kwargs = dict(key='test_fake_id', secret='test_fake_secret')
        s3_client_kwargs = dict(endpoint_url=MOTO_SERVER_ENDPOINT_URL)
        s3 = s3fs.S3FileSystem(**s3_kwargs, client_kwargs=s3_client_kwargs)
        # Create bucket 'xcube-test', so it exists before we write a test cube
        s3.mkdir('xcube-test-cube')

        # Create a test cube with just one variable "conc_chl"
        zarr_path = os.path.join(os.path.dirname(__file__), '../../examples/serve/demo/cube-1-250-250.zarr')
        dataset = xr.open_zarr(zarr_path)
        dataset = cls._make_subset(dataset)

        # Write test cube
        write_dataset(dataset,
                      'xcube-test-cube/cube-1-250-250.zarr',
                      s3_kwargs=s3_kwargs,
                      s3_client_kwargs=s3_client_kwargs)
Ejemplo n.º 5
0
    def _write_test_cube(cls):
        # Create bucket 'xcube-test', so it exists before we write a test pyramid
        s3_conn = boto3.client('s3')
        s3_conn.create_bucket(Bucket='xcube-test', ACL='public-read')

        # Create a test cube with just one variable "conc_chl"
        zarr_path = os.path.join(
            os.path.dirname(__file__),
            '../../examples/serve/demo/cube-1-250-250.zarr')
        dataset = xr.open_zarr(zarr_path)
        dataset = xr.Dataset(dict(conc_chl=dataset.conc_chl))

        # Write test cube
        write_dataset(
            dataset,
            'https://s3.amazonaws.com/xcube-test/cube-1-250-250.zarr',
            client_kwargs=dict(provider_access_key_id='test_fake_id',
                               provider_secret_access_key='test_fake_secret'))
Ejemplo n.º 6
0
def chunk(cube, output, format=None, params=None, chunks=None):
    """
    (Re-)chunk xcube dataset.
    Changes the external chunking of all variables of CUBE according to CHUNKS and writes
    the result to OUTPUT.

    Note: There is a possibly more efficient way to (re-)chunk datasets through the
    dedicated tool "rechunker", see https://rechunker.readthedocs.io.
    """
    chunk_sizes = None
    if chunks:
        chunk_sizes = parse_cli_kwargs(chunks, metavar="CHUNKS")
        for k, v in chunk_sizes.items():
            if not isinstance(v, int) or v <= 0:
                raise click.ClickException(
                    "Invalid value for CHUNKS, "
                    f"chunk sizes must be positive integers: {chunks}")

    write_kwargs = dict()
    if params:
        write_kwargs = parse_cli_kwargs(params, metavar="PARAMS")

    from xcube.core.chunk import chunk_dataset
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset, write_dataset

    format_name = format if format else guess_dataset_format(output)

    with open_dataset(input_path=cube) as ds:
        if chunk_sizes:
            for k in chunk_sizes:
                if k not in ds.dims:
                    raise click.ClickException(
                        "Invalid value for CHUNKS, "
                        f"{k!r} is not the name of any dimension: {chunks}")

        chunked_dataset = chunk_dataset(ds,
                                        chunk_sizes=chunk_sizes,
                                        format_name=format_name)
        write_dataset(chunked_dataset,
                      output_path=output,
                      format_name=format_name,
                      **write_kwargs)
Ejemplo n.º 7
0
def _rectify(input_path: str,
             xy_names: Optional[Tuple[str, str]],
             var_names: Optional[Sequence[str]],
             output_path: str,
             output_format: Optional[str],
             output_size: Optional[Tuple[int, int]],
             output_tile_size: Optional[Tuple[int, int]],
             output_point: Optional[Tuple[float, float]],
             output_res: Optional[float],
             output_crs: Optional[str],
             delta: float,
             dry_run: bool,
             monitor):
    import pyproj.crs

    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset
    from xcube.core.dsio import write_dataset
    from xcube.core.gridmapping import GridMapping
    from xcube.core.resampling import rectify_dataset
    from xcube.core.sentinel3 import is_sentinel3_product
    from xcube.core.sentinel3 import open_sentinel3_product

    if not output_format:
        output_format = guess_dataset_format(output_path)

    output_gm = None
    output_gm_given = (output_size is not None,
                       output_point is not None,
                       output_res is not None,
                       output_crs is not None)
    if all(output_gm_given):
        output_gm = GridMapping.regular(size=output_size,
                                        xy_min=output_point,
                                        xy_res=output_res,
                                        crs=pyproj.crs.CRS.from_user_input(output_crs))
    elif any(output_gm_given):
        raise click.ClickException('SIZE, POINT, RES, and CRS must all be given or none of them.')

    monitor(f'Opening dataset from {input_path!r}...')

    if is_sentinel3_product(input_path):
        src_ds = open_sentinel3_product(input_path)
    else:
        src_ds = open_dataset(input_path)

    monitor('Rectifying...')
    rectified_ds = rectify_dataset(src_ds,
                                   xy_var_names=xy_names,
                                   var_names=var_names,
                                   target_gm=output_gm,
                                   tile_size=output_tile_size,
                                   uv_delta=delta)

    if rectified_ds is None:
        monitor(f'Dataset {input_path} does not seem to have an intersection with bounding box')
        return

    monitor(f'Writing rectified dataset to {output_path!r}...')
    if not dry_run:
        write_dataset(rectified_ds, output_path, output_format)

    monitor(f'Done.')
Ejemplo n.º 8
0
def gen(request: Optional[str],
        dataset_name: Optional[str],
        band_names: Optional[Tuple],
        tile_size: Optional[str],
        geometry: Optional[str],
        spatial_res: Optional[float],
        crs: Optional[str],
        time_range: Optional[str],
        time_period: Optional[str],
        time_tolerance: Optional[str],
        output_path: Optional[str],
        four_d: bool,
        verbose: bool):
    """
    Generate a data cube from SENTINEL Hub.

    By default, the command will create a Zarr dataset with 3D arrays
    for each band e.g. "B01", "B02" with dimensions "time", "lat", "lon".
    Use option "--4d" to write a single 4D array "band_data"
    with dimensions "time", "lat", "lon", "band".

    Please use command "xcube sh req" to generate example request files that can be passed as REQUEST.
    REQUEST may have JSON or YAML format.
    You can also pipe a JSON request into this command. In this case
    """
    import json
    import os.path
    import sys
    import xarray as xr
    from xcube.core.dsio import write_dataset
    from xcube.util.perf import measure_time
    from xcube_sh.config import CubeConfig
    from xcube_sh.observers import Observers
    from xcube_sh.sentinelhub import SentinelHub
    from xcube_sh.chunkstore import SentinelHubChunkStore

    if request:
        request_dict = _load_request(request)
    elif not sys.stdin.isatty():
        request_dict = json.load(sys.stdin)
    else:
        request_dict = {}

    cube_config_dict = request_dict.get('cube_config', {})
    _overwrite_config_params(cube_config_dict,
                             dataset_name=dataset_name,
                             band_names=band_names if band_names else None,  # because of multiple=True
                             tile_size=tile_size,
                             geometry=geometry,
                             spatial_res=spatial_res,
                             crs=crs,
                             time_range=time_range,
                             time_period=time_period,
                             time_tolerance=time_tolerance,
                             four_d=four_d)

    input_config_dict = request_dict.get('input_config', {})
    if 'datastore_id' in input_config_dict:
        input_config_dict = dict(input_config_dict)
        datastore_id = input_config_dict.pop('datastore_id')
        if datastore_id != 'sentinelhub':
            warnings.warn(f'Unknown datastore_id={datastore_id!r} encountered in request. Ignoring it...')
    # _overwrite_config_params(input_config_dict, ...)
    # TODO: validate input_config_dict

    output_config_dict = request_dict.get('output_config', {})
    _overwrite_config_params(output_config_dict,
                             path=output_path)
    # TODO: validate output_config_dict

    cube_config = CubeConfig.from_dict(cube_config_dict,
                                       exception_type=click.ClickException)

    if 'path' in output_config_dict:
        output_path = output_config_dict.pop('path')
    else:
        output_path = DEFAULT_GEN_OUTPUT_PATH
    if not _is_bucket_url(output_path) and os.path.exists(output_path):
        raise click.ClickException(f'Output {output_path} already exists. Move it away first.')

    sentinel_hub = SentinelHub(**input_config_dict)

    print(f'Writing cube to {output_path}...')

    with measure_time() as cm:
        store = SentinelHubChunkStore(sentinel_hub, cube_config)
        request_collector = Observers.request_collector()
        store.add_observer(request_collector)
        if verbose:
            store.add_observer(Observers.request_dumper())
        cube = xr.open_zarr(store)
        if _is_bucket_url(output_path):
            client_kwargs = {k: output_config_dict.pop(k)
                             for k in ('provider_access_key_id', 'provider_secret_access_key')
                             if k in output_config_dict}
            write_dataset(cube, output_path, format_name='zarr', client_kwargs=client_kwargs, **output_config_dict)
        else:
            write_dataset(cube, output_path, **output_config_dict)

    print(f"Cube written to {output_path}, took {'%.2f' % cm.duration} seconds.")

    if verbose:
        request_collector.stats.dump()