def vars2dim(cube, variable, dim_name, output=None, format=None): """ Convert cube variables into new dimension. Moves all variables of CUBE into into a single new variable <var-name> with a new dimension DIM-NAME and writes the results to OUTPUT. """ from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset, write_dataset from xcube.core.vars2dim import vars_to_dim import os if not output: dirname = os.path.dirname(cube) basename = os.path.basename(cube) basename, ext = os.path.splitext(basename) output = os.path.join(dirname, basename + '-vars2dim' + ext) format_name = format if format else guess_dataset_format(output) with open_dataset(input_path=cube) as ds: converted_dataset = vars_to_dim(ds, dim_name=dim_name, var_name=variable) write_dataset(converted_dataset, output_path=output, format_name=format_name)
def test_write_dataset(self): dataset = new_cube() try: write_dataset(dataset, TEST_NC_FILE_2) self.assertTrue(os.path.isfile(TEST_NC_FILE_2)) finally: if os.path.isfile(TEST_NC_FILE_2): os.remove(TEST_NC_FILE_2)
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]], var_names: Optional[Sequence[str]], output_path: str, output_format: Optional[str], output_size: Optional[Tuple[int, int]], output_tile_size: Optional[Tuple[int, int]], output_point: Optional[Tuple[float, float]], output_res: Optional[float], delta: float, dry_run: bool, monitor): from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset from xcube.core.dsio import write_dataset from xcube.core.rectify import rectify_dataset from xcube.core.rectify import ImageGeom from xcube.core.sentinel3 import is_sentinel3_product from xcube.core.sentinel3 import open_sentinel3_product if not output_format: output_format = guess_dataset_format(output_path) output_geom = None if output_size is not None and output_point is not None and output_res is not None: output_geom = ImageGeom(size=output_size, x_min=output_point[0], y_min=output_point[1], xy_res=output_res) elif output_size is not None or output_point is not None or output_res is not None: raise click.ClickException( 'SIZE, POINT, and RES must all be given or none of them.') monitor(f'Opening dataset from {input_path!r}...') if is_sentinel3_product(input_path): src_ds = open_sentinel3_product(input_path) else: src_ds = open_dataset(input_path) monitor('Rectifying...') reproj_ds = rectify_dataset(src_ds, xy_names=xy_names, var_names=var_names, output_geom=output_geom, tile_size=output_tile_size, uv_delta=delta) if reproj_ds is None: monitor( f'Dataset {input_path} does not seem to have an intersection with bounding box' ) return monitor(f'Writing rectified dataset to {output_path!r}...') if not dry_run: write_dataset(reproj_ds, output_path, output_format) monitor(f'Done.')
def _write_test_cube(cls): s3_kwargs = dict(key='test_fake_id', secret='test_fake_secret') s3_client_kwargs = dict(endpoint_url=MOTO_SERVER_ENDPOINT_URL) s3 = s3fs.S3FileSystem(**s3_kwargs, client_kwargs=s3_client_kwargs) # Create bucket 'xcube-test', so it exists before we write a test cube s3.mkdir('xcube-test-cube') # Create a test cube with just one variable "conc_chl" zarr_path = os.path.join(os.path.dirname(__file__), '../../examples/serve/demo/cube-1-250-250.zarr') dataset = xr.open_zarr(zarr_path) dataset = cls._make_subset(dataset) # Write test cube write_dataset(dataset, 'xcube-test-cube/cube-1-250-250.zarr', s3_kwargs=s3_kwargs, s3_client_kwargs=s3_client_kwargs)
def _write_test_cube(cls): # Create bucket 'xcube-test', so it exists before we write a test pyramid s3_conn = boto3.client('s3') s3_conn.create_bucket(Bucket='xcube-test', ACL='public-read') # Create a test cube with just one variable "conc_chl" zarr_path = os.path.join( os.path.dirname(__file__), '../../examples/serve/demo/cube-1-250-250.zarr') dataset = xr.open_zarr(zarr_path) dataset = xr.Dataset(dict(conc_chl=dataset.conc_chl)) # Write test cube write_dataset( dataset, 'https://s3.amazonaws.com/xcube-test/cube-1-250-250.zarr', client_kwargs=dict(provider_access_key_id='test_fake_id', provider_secret_access_key='test_fake_secret'))
def chunk(cube, output, format=None, params=None, chunks=None): """ (Re-)chunk xcube dataset. Changes the external chunking of all variables of CUBE according to CHUNKS and writes the result to OUTPUT. Note: There is a possibly more efficient way to (re-)chunk datasets through the dedicated tool "rechunker", see https://rechunker.readthedocs.io. """ chunk_sizes = None if chunks: chunk_sizes = parse_cli_kwargs(chunks, metavar="CHUNKS") for k, v in chunk_sizes.items(): if not isinstance(v, int) or v <= 0: raise click.ClickException( "Invalid value for CHUNKS, " f"chunk sizes must be positive integers: {chunks}") write_kwargs = dict() if params: write_kwargs = parse_cli_kwargs(params, metavar="PARAMS") from xcube.core.chunk import chunk_dataset from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset, write_dataset format_name = format if format else guess_dataset_format(output) with open_dataset(input_path=cube) as ds: if chunk_sizes: for k in chunk_sizes: if k not in ds.dims: raise click.ClickException( "Invalid value for CHUNKS, " f"{k!r} is not the name of any dimension: {chunks}") chunked_dataset = chunk_dataset(ds, chunk_sizes=chunk_sizes, format_name=format_name) write_dataset(chunked_dataset, output_path=output, format_name=format_name, **write_kwargs)
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]], var_names: Optional[Sequence[str]], output_path: str, output_format: Optional[str], output_size: Optional[Tuple[int, int]], output_tile_size: Optional[Tuple[int, int]], output_point: Optional[Tuple[float, float]], output_res: Optional[float], output_crs: Optional[str], delta: float, dry_run: bool, monitor): import pyproj.crs from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset from xcube.core.dsio import write_dataset from xcube.core.gridmapping import GridMapping from xcube.core.resampling import rectify_dataset from xcube.core.sentinel3 import is_sentinel3_product from xcube.core.sentinel3 import open_sentinel3_product if not output_format: output_format = guess_dataset_format(output_path) output_gm = None output_gm_given = (output_size is not None, output_point is not None, output_res is not None, output_crs is not None) if all(output_gm_given): output_gm = GridMapping.regular(size=output_size, xy_min=output_point, xy_res=output_res, crs=pyproj.crs.CRS.from_user_input(output_crs)) elif any(output_gm_given): raise click.ClickException('SIZE, POINT, RES, and CRS must all be given or none of them.') monitor(f'Opening dataset from {input_path!r}...') if is_sentinel3_product(input_path): src_ds = open_sentinel3_product(input_path) else: src_ds = open_dataset(input_path) monitor('Rectifying...') rectified_ds = rectify_dataset(src_ds, xy_var_names=xy_names, var_names=var_names, target_gm=output_gm, tile_size=output_tile_size, uv_delta=delta) if rectified_ds is None: monitor(f'Dataset {input_path} does not seem to have an intersection with bounding box') return monitor(f'Writing rectified dataset to {output_path!r}...') if not dry_run: write_dataset(rectified_ds, output_path, output_format) monitor(f'Done.')
def gen(request: Optional[str], dataset_name: Optional[str], band_names: Optional[Tuple], tile_size: Optional[str], geometry: Optional[str], spatial_res: Optional[float], crs: Optional[str], time_range: Optional[str], time_period: Optional[str], time_tolerance: Optional[str], output_path: Optional[str], four_d: bool, verbose: bool): """ Generate a data cube from SENTINEL Hub. By default, the command will create a Zarr dataset with 3D arrays for each band e.g. "B01", "B02" with dimensions "time", "lat", "lon". Use option "--4d" to write a single 4D array "band_data" with dimensions "time", "lat", "lon", "band". Please use command "xcube sh req" to generate example request files that can be passed as REQUEST. REQUEST may have JSON or YAML format. You can also pipe a JSON request into this command. In this case """ import json import os.path import sys import xarray as xr from xcube.core.dsio import write_dataset from xcube.util.perf import measure_time from xcube_sh.config import CubeConfig from xcube_sh.observers import Observers from xcube_sh.sentinelhub import SentinelHub from xcube_sh.chunkstore import SentinelHubChunkStore if request: request_dict = _load_request(request) elif not sys.stdin.isatty(): request_dict = json.load(sys.stdin) else: request_dict = {} cube_config_dict = request_dict.get('cube_config', {}) _overwrite_config_params(cube_config_dict, dataset_name=dataset_name, band_names=band_names if band_names else None, # because of multiple=True tile_size=tile_size, geometry=geometry, spatial_res=spatial_res, crs=crs, time_range=time_range, time_period=time_period, time_tolerance=time_tolerance, four_d=four_d) input_config_dict = request_dict.get('input_config', {}) if 'datastore_id' in input_config_dict: input_config_dict = dict(input_config_dict) datastore_id = input_config_dict.pop('datastore_id') if datastore_id != 'sentinelhub': warnings.warn(f'Unknown datastore_id={datastore_id!r} encountered in request. Ignoring it...') # _overwrite_config_params(input_config_dict, ...) # TODO: validate input_config_dict output_config_dict = request_dict.get('output_config', {}) _overwrite_config_params(output_config_dict, path=output_path) # TODO: validate output_config_dict cube_config = CubeConfig.from_dict(cube_config_dict, exception_type=click.ClickException) if 'path' in output_config_dict: output_path = output_config_dict.pop('path') else: output_path = DEFAULT_GEN_OUTPUT_PATH if not _is_bucket_url(output_path) and os.path.exists(output_path): raise click.ClickException(f'Output {output_path} already exists. Move it away first.') sentinel_hub = SentinelHub(**input_config_dict) print(f'Writing cube to {output_path}...') with measure_time() as cm: store = SentinelHubChunkStore(sentinel_hub, cube_config) request_collector = Observers.request_collector() store.add_observer(request_collector) if verbose: store.add_observer(Observers.request_dumper()) cube = xr.open_zarr(store) if _is_bucket_url(output_path): client_kwargs = {k: output_config_dict.pop(k) for k in ('provider_access_key_id', 'provider_secret_access_key') if k in output_config_dict} write_dataset(cube, output_path, format_name='zarr', client_kwargs=client_kwargs, **output_config_dict) else: write_dataset(cube, output_path, **output_config_dict) print(f"Cube written to {output_path}, took {'%.2f' % cm.duration} seconds.") if verbose: request_collector.stats.dump()