Ejemplo n.º 1
0
def extract(
    cube,
    points,
    output=None,
    format=None,
    coords=False,
    bounds=False,
    indexes=False,
    refs=False,
):
    """
    Extract cube points.

    Extracts data cells from CUBE at coordinates given in each POINTS record and writes the resulting values to given
    output path and format.

    POINTS must be a CSV file that provides at least the columns "lon", "lat", and "time". The "lon" and "lat"
    columns provide a point's location in decimal degrees. The "time" column provides a point's date or
    date-time. Its format should preferably be ISO, but other formats may work as well.
    """
    if format != 'csv':
        raise click.ClickException(f'Format {format!r} is not supported.')

    import pandas as pd

    cube_path = cube
    points_path = points
    output_path = output
    include_coords = coords
    include_bounds = bounds
    include_indexes = indexes
    include_refs = refs

    from xcube.core.dsio import open_dataset
    from xcube.core.extract import get_cube_values_for_points, DEFAULT_INDEX_NAME_PATTERN, DEFAULT_REF_NAME_PATTERN

    # We may make the following CLI options
    index_name_pattern = DEFAULT_INDEX_NAME_PATTERN
    ref_name_pattern = DEFAULT_REF_NAME_PATTERN
    time_col_names = ["time"]

    points = pd.read_csv(points_path,
                         parse_dates=time_col_names,
                         infer_datetime_format=True)
    with open_dataset(cube_path) as cube:
        values = get_cube_values_for_points(
            cube,
            points,
            include_coords=include_coords,
            include_bounds=include_bounds,
            include_indexes=include_indexes,
            index_name_pattern=index_name_pattern,
            include_refs=include_refs,
            ref_name_pattern=ref_name_pattern).to_dataframe()
        values.to_csv(
            output_path if output_path else sys.stdout,
            # We may make the following CLI options
            sep=',',
            date_format='%Y-%m-%dT%H:%M:%SZ',
            index=True)
Ejemplo n.º 2
0
def vars2dim(cube, variable, dim_name, output=None, format=None):
    """
    Convert cube variables into new dimension.
    Moves all variables of CUBE into into a single new variable <var-name>
    with a new dimension DIM-NAME and writes the results to OUTPUT.
    """

    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset, write_dataset
    from xcube.core.vars2dim import vars_to_dim
    import os

    if not output:
        dirname = os.path.dirname(cube)
        basename = os.path.basename(cube)
        basename, ext = os.path.splitext(basename)
        output = os.path.join(dirname, basename + '-vars2dim' + ext)

    format_name = format if format else guess_dataset_format(output)

    with open_dataset(input_path=cube) as ds:
        converted_dataset = vars_to_dim(ds,
                                        dim_name=dim_name,
                                        var_name=variable)
        write_dataset(converted_dataset,
                      output_path=output,
                      format_name=format_name)
Ejemplo n.º 3
0
def write_levels(output_path: str,
                 dataset: xr.Dataset = None,
                 input_path: str = None,
                 link_input: bool = False,
                 progress_monitor: PyramidLevelCallback = None,
                 **kwargs) -> List[xr.Dataset]:
    """
    Transform the given dataset given by a *dataset* instance or *input_path* string
    into the levels of a multi-level pyramid with spatial resolution
    decreasing by a factor of two in both spatial dimensions and write them to *output_path*.

    One of *dataset* and *input_path* must be given.

    :param output_path: Output path
    :param dataset: Dataset to be converted and written as levels.
    :param input_path: Input path to a dataset to be transformed and written as levels.
    :param link_input: Just link the dataset at level zero instead of writing it.
    :param progress_monitor: An optional progress monitor.
    :param kwargs: Keyword-arguments accepted by the ``compute_levels()`` function.
    :return: A list of dataset instances representing the multi-level pyramid.
    """
    if dataset is None and input_path is None:
        raise ValueError("at least one of dataset or input_path must be given")

    if link_input and input_path is None:
        raise ValueError("input_path must be provided to link input")

    _post_process_level = kwargs.pop(
        "post_process_level") if "post_process_level" in kwargs else None

    def post_process_level(level_dataset, index, num_levels):
        if _post_process_level is not None:
            level_dataset = _post_process_level(level_dataset, index,
                                                num_levels)

        if index == 0 and link_input:
            with open(os.path.join(output_path, f"{index}.link"), "w") as fp:
                fp.write(input_path)
        else:
            path = os.path.join(output_path, f"{index}.zarr")
            level_dataset.to_zarr(path)
            level_dataset.close()
            level_dataset = xr.open_zarr(path)

        if progress_monitor is not None:
            progress_monitor(level_dataset, index, num_levels)

        return level_dataset

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    if dataset is None:
        dataset = open_dataset(input_path)

    return compute_levels(dataset,
                          post_process_level=post_process_level,
                          **kwargs)
Ejemplo n.º 4
0
 def test_open_dataset(self):
     with open_dataset(TEST_NC_FILE) as ds:
         self.assertIsNotNone(ds)
         np.testing.assert_array_equal(ds.time.values,
                                       self.dataset.time.values)
         np.testing.assert_array_equal(ds.lat.values,
                                       self.dataset.lat.values)
         np.testing.assert_array_equal(ds.lon.values,
                                       self.dataset.lon.values)
Ejemplo n.º 5
0
def dump(input, variable, encoding):
    """
    Dump contents of an input dataset.
    """
    from xcube.core.dsio import open_dataset
    from xcube.core.dump import dump_dataset
    with open_dataset(input) as ds:
        text = dump_dataset(ds, var_names=variable, show_var_encoding=encoding)
        print(text)
Ejemplo n.º 6
0
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]],
             var_names: Optional[Sequence[str]], output_path: str,
             output_format: Optional[str], output_size: Optional[Tuple[int,
                                                                       int]],
             output_tile_size: Optional[Tuple[int, int]],
             output_point: Optional[Tuple[float,
                                          float]], output_res: Optional[float],
             delta: float, dry_run: bool, monitor):
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset
    from xcube.core.dsio import write_dataset
    from xcube.core.rectify import rectify_dataset
    from xcube.core.rectify import ImageGeom
    from xcube.core.sentinel3 import is_sentinel3_product
    from xcube.core.sentinel3 import open_sentinel3_product

    if not output_format:
        output_format = guess_dataset_format(output_path)

    output_geom = None
    if output_size is not None and output_point is not None and output_res is not None:
        output_geom = ImageGeom(size=output_size,
                                x_min=output_point[0],
                                y_min=output_point[1],
                                xy_res=output_res)
    elif output_size is not None or output_point is not None or output_res is not None:
        raise click.ClickException(
            'SIZE, POINT, and RES must all be given or none of them.')

    monitor(f'Opening dataset from {input_path!r}...')

    if is_sentinel3_product(input_path):
        src_ds = open_sentinel3_product(input_path)
    else:
        src_ds = open_dataset(input_path)

    monitor('Rectifying...')
    reproj_ds = rectify_dataset(src_ds,
                                xy_names=xy_names,
                                var_names=var_names,
                                output_geom=output_geom,
                                tile_size=output_tile_size,
                                uv_delta=delta)

    if reproj_ds is None:
        monitor(
            f'Dataset {input_path} does not seem to have an intersection with bounding box'
        )
        return

    monitor(f'Writing rectified dataset to {output_path!r}...')
    if not dry_run:
        write_dataset(reproj_ds, output_path, output_format)

    monitor(f'Done.')
Ejemplo n.º 7
0
def _verify(input_path: str = None, monitor=None):
    from xcube.core.dsio import open_dataset
    from xcube.core.verify import verify_cube

    monitor(f'Opening cube from {input_path!r}...')
    with open_dataset(input_path) as cube:
        report = verify_cube(cube)

    if not report:
        monitor("INPUT is a valid cube.")
        return

    monitor('INPUT is not a valid cube due to the following reasons:')
    monitor('- ' + '\n- '.join(report))
    # According to http://tldp.org/LDP/abs/html/exitcodes.html, exit code 3 is not reserved
    sys.exit(3)
Ejemplo n.º 8
0
def chunk(cube, output, format=None, params=None, chunks=None):
    """
    (Re-)chunk xcube dataset.
    Changes the external chunking of all variables of CUBE according to CHUNKS and writes
    the result to OUTPUT.

    Note: There is a possibly more efficient way to (re-)chunk datasets through the
    dedicated tool "rechunker", see https://rechunker.readthedocs.io.
    """
    chunk_sizes = None
    if chunks:
        chunk_sizes = parse_cli_kwargs(chunks, metavar="CHUNKS")
        for k, v in chunk_sizes.items():
            if not isinstance(v, int) or v <= 0:
                raise click.ClickException(
                    "Invalid value for CHUNKS, "
                    f"chunk sizes must be positive integers: {chunks}")

    write_kwargs = dict()
    if params:
        write_kwargs = parse_cli_kwargs(params, metavar="PARAMS")

    from xcube.core.chunk import chunk_dataset
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset, write_dataset

    format_name = format if format else guess_dataset_format(output)

    with open_dataset(input_path=cube) as ds:
        if chunk_sizes:
            for k in chunk_sizes:
                if k not in ds.dims:
                    raise click.ClickException(
                        "Invalid value for CHUNKS, "
                        f"{k!r} is not the name of any dimension: {chunks}")

        chunked_dataset = chunk_dataset(ds,
                                        chunk_sizes=chunk_sizes,
                                        format_name=format_name)
        write_dataset(chunked_dataset,
                      output_path=output,
                      format_name=format_name,
                      **write_kwargs)
Ejemplo n.º 9
0
def _prune(input_path: str, dry_run: bool, monitor: Monitor):
    from xcube.core.chunk import get_empty_dataset_chunks
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset

    input_format = guess_dataset_format(input_path)
    if input_format != FORMAT_NAME_ZARR:
        raise click.ClickException("input must be a dataset in Zarr format")

    num_deleted_total = 0

    monitor(f'Opening dataset from {input_path!r}...', 1)
    with open_dataset(input_path) as dataset:
        monitor('Identifying empty chunks...', 1)
        for var_name, chunk_indices in get_empty_dataset_chunks(dataset):
            num_empty_chunks = 0
            num_deleted = 0
            for chunk_index in chunk_indices:
                num_empty_chunks += 1
                if num_empty_chunks == 1:
                    monitor(
                        f'Found empty chunks in variable {var_name!r}, '
                        f'deleting block files...', 2)

                ok = _delete_block_file(input_path, var_name, chunk_index,
                                        dry_run, monitor)
                if ok:
                    num_deleted += 1
            if num_deleted > 0:
                monitor(
                    f'Deleted {num_deleted} block file(s) '
                    f'for variable {var_name!r}.', 2)
            elif num_empty_chunks > 0:
                monitor(
                    f'No block files for variable {var_name!r} '
                    f'could be deleted.', 2)
            num_deleted_total += num_deleted

    monitor(f'Done, {num_deleted_total} block file(s) deleted total.', 1)
Ejemplo n.º 10
0
def _rectify(input_path: str,
             xy_names: Optional[Tuple[str, str]],
             var_names: Optional[Sequence[str]],
             output_path: str,
             output_format: Optional[str],
             output_size: Optional[Tuple[int, int]],
             output_tile_size: Optional[Tuple[int, int]],
             output_point: Optional[Tuple[float, float]],
             output_res: Optional[float],
             output_crs: Optional[str],
             delta: float,
             dry_run: bool,
             monitor):
    import pyproj.crs

    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset
    from xcube.core.dsio import write_dataset
    from xcube.core.gridmapping import GridMapping
    from xcube.core.resampling import rectify_dataset
    from xcube.core.sentinel3 import is_sentinel3_product
    from xcube.core.sentinel3 import open_sentinel3_product

    if not output_format:
        output_format = guess_dataset_format(output_path)

    output_gm = None
    output_gm_given = (output_size is not None,
                       output_point is not None,
                       output_res is not None,
                       output_crs is not None)
    if all(output_gm_given):
        output_gm = GridMapping.regular(size=output_size,
                                        xy_min=output_point,
                                        xy_res=output_res,
                                        crs=pyproj.crs.CRS.from_user_input(output_crs))
    elif any(output_gm_given):
        raise click.ClickException('SIZE, POINT, RES, and CRS must all be given or none of them.')

    monitor(f'Opening dataset from {input_path!r}...')

    if is_sentinel3_product(input_path):
        src_ds = open_sentinel3_product(input_path)
    else:
        src_ds = open_dataset(input_path)

    monitor('Rectifying...')
    rectified_ds = rectify_dataset(src_ds,
                                   xy_var_names=xy_names,
                                   var_names=var_names,
                                   target_gm=output_gm,
                                   tile_size=output_tile_size,
                                   uv_delta=delta)

    if rectified_ds is None:
        monitor(f'Dataset {input_path} does not seem to have an intersection with bounding box')
        return

    monitor(f'Writing rectified dataset to {output_path!r}...')
    if not dry_run:
        write_dataset(rectified_ds, output_path, output_format)

    monitor(f'Done.')
Ejemplo n.º 11
0
 def test_open_dataset(self):
     ds = open_dataset(f's3://{self.CUBE}')
     self.assertIn('B02', ds)
     print(ds)
Ejemplo n.º 12
0
s3 = s3fs.S3FileSystem(anon=True, )

# In[ ]:

# List of available colombia data cubes:
s3.ls('esdl-esdc-v2.0.1')

# In[ ]:

# Commands used to download a cube
obs.get(obs.ls('esdl-esdc-v2.0.1/Cube_2019highColombiaCube_184x120x120.zarr'),
        './mylocalhighrescube',
        recursive=True)

# Commands to access the data cube using xcube and then continue analysis using xarray:

# In[ ]:

import xarray as xr
import xcube

# In[ ]:

from xcube.core.dsio import open_dataset

ds = open_dataset(
    "https://s3.eu-central-1.amazonaws.com/esdl-esdc-v2.0.1/Cube_2019highColombiaCube_184x120x120.zarr",
    s3_kwargs=dict(anon=True))

# In[ ]: