コード例 #1
0
def _prune(input_path: str = None, dry_run: bool = False, monitor=None):
    from xcube.core.chunk import get_empty_dataset_chunks
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_cube

    input_format = guess_dataset_format(input_path)
    if input_format != FORMAT_NAME_ZARR:
        raise click.ClickException("input must be a cube in ZARR format")

    monitor(f'Opening cube from {input_path!r}...')
    with open_cube(input_path) as cube:
        monitor('Identifying empty blocks...')
        empty_chunks = get_empty_dataset_chunks(cube)

    num_deleted = 0
    for var_name, chunk_indices in empty_chunks.items():
        monitor(
            f'Deleting {len(chunk_indices)} empty block file(s) for variable {var_name!r}...'
        )
        for chunk_index in chunk_indices:
            ok = _delete_block_file(input_path, var_name, chunk_index, dry_run,
                                    monitor)
            if ok:
                num_deleted += 1

    monitor(f'Done, {num_deleted} block file(s) deleted.')
コード例 #2
0
ファイル: test_esdc.py プロジェクト: IPL-UV/gauss4eo
def test_infoearth_variables():

    # load the cube from the server
    path = f"{ONLINE_CUBE}/{LOW_RES}"

    esdc = open_cube(str(path))

    for ivariable in INFO_EARTH:
        ivariable = esdc[ivariable]
コード例 #3
0
ファイル: xarray.py プロジェクト: sfoucher/xcube
    def open(cls, input_path: str, format_name: str = None, **kwargs) -> xr.Dataset:
        """
        The ``read`` method as context manager that auto-closes the data cube read.

        :param input_path: input path
        :param format_name: format, e.g. "zarr" or "netcdf4"
        :param kwargs: format-specific keyword arguments
        :return: dataset object
        """
        return open_cube(input_path, format_name=format_name, **kwargs)
コード例 #4
0
ファイル: show_remote_cubes.py プロジェクト: sfoucher/xcube
def show_remote_cubes(bucket, endpoint_url, region_name='eu-central-1'):
    s3_client_kwargs = {}
    s3_client_kwargs['endpoint_url'] = endpoint_url
    s3_client_kwargs['region_name'] = region_name
    obs_file_system = s3fs.S3FileSystem(anon=True,
                                        client_kwargs=s3_client_kwargs)

    cube_names = []
    df = pd.DataFrame(columns=[
        'cube_name', 'chunks', 'number_of_variables', 'variables',
        'start_date', 'end_date', 'spatial_coverage'
    ])

    for filepath in sorted(obs_file_system.ls(bucket)):
        if filepath.endswith('.zarr'):
            with open_cube(f'{endpoint_url}/{filepath}') as ds:
                var_list = list(ds.data_vars)
                cube_names.append(filepath)
                filename = filepath.split('/')[1]
                sd = pd.to_datetime(str(ds.time.values[0]))
                start_date = sd.strftime('%Y-%m-%d')
                ed = pd.to_datetime(str(ds.time.values[-1]))
                end_date = ed.strftime('%Y-%m-%d')
                chunksize = []
                for idx, dim in enumerate(ds[var_list[0]].dims):
                    chunksize.append(
                        f"{dim}: {ds[var_list[0]].data.chunksize[idx]}")
                try:
                    spat_cov = ([
                        f"lon_min: {ds.attrs['geospatial_lon_min']}",
                        f"lat_min: {ds.attrs['geospatial_lat_min']}",
                        f"lon_max: {ds.attrs['geospatial_lon_max']}",
                        f"lat_max: {ds.attrs['geospatial_lat_max']}"
                    ])
                except KeyError:
                    spat_cov = None
                df = df.append(
                    {
                        'cube_name': filename,
                        'chunks': ', '.join(chunksize),
                        'number_of_variables': len(var_list),
                        'variables': ', '.join(var_list),
                        'start_date': start_date,
                        'end_date': end_date,
                        'spatial_coverage': ', '.join(spat_cov)
                    },
                    ignore_index=True)
    # Make the variables column wide enough:
    df.style.set_properties(subset=['variables'], width='300px')
    return df
コード例 #5
0
ファイル: test_s3buckethandlers.py プロジェクト: micder/xcube
 def test_open_cube_from_xube_server(self):
     ds = open_cube('s3bucket/local',
                    format_name='zarr',
                    endpoint_url=SERVER_URL)
     self.assertIsNotNone(ds)
     self.assertEqual((5, 1000, 2000), ds.conc_chl.shape)
     self.assertEqual(('time', 'lat', 'lon'), ds.conc_chl.dims)
     conc_chl_values = ds.conc_chl.values
     self.assertEqual((5, 1000, 2000), conc_chl_values.shape)
     self.assertAlmostEqual(0.00005656,
                            float(np.nanmin(conc_chl_values)),
                            delta=1e-6)
     self.assertAlmostEqual(22.4421215,
                            float(np.nanmax(conc_chl_values)),
                            delta=1e-6)
コード例 #6
0
def _resample_in_time(input_path: str = None,
                      variables: Sequence[str] = None,
                      metadata: Dict[str, Any] = None,
                      output_path: str = DEFAULT_OUTPUT_PATH,
                      output_format: str = None,
                      methods: Sequence[str] = (DEFAULT_RESAMPLING_METHOD,),
                      frequency: str = DEFAULT_RESAMPLING_FREQUENCY,
                      offset: str = None,
                      base: int = DEFAULT_RESAMPLING_BASE,
                      interp_kind: str = DEFAULT_INTERPOLATION_KIND,
                      tolerance: str = None,
                      dry_run: bool = False,
                      monitor=None):
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_cube
    from xcube.core.dsio import write_cube
    from xcube.core.resample import resample_in_time
    from xcube.core.update import update_dataset_chunk_encoding

    if not output_format:
        output_format = guess_dataset_format(output_path)

    monitor(f'Opening cube from {input_path!r}...')
    with open_cube(input_path) as ds:

        monitor('Resampling...')
        agg_ds = resample_in_time(ds,
                                  frequency=frequency,
                                  method=methods,
                                  offset=offset,
                                  base=base,
                                  interp_kind=interp_kind,
                                  tolerance=tolerance,
                                  time_chunk_size=1,
                                  var_names=variables,
                                  metadata=metadata)

        agg_ds = update_dataset_chunk_encoding(agg_ds,
                                               chunk_sizes={},
                                               format_name=output_format,
                                               in_place=True)

        monitor(f'Writing resampled cube to {output_path!r}...')
        if not dry_run:
            write_cube(agg_ds, output_path, output_format, cube_asserted=True)

        monitor(f'Done.')
コード例 #7
0
    def test_write_to_and_read_from_s3(self):
        s3_conn = boto3.client('s3', endpoint_url=MOTO_SERVER_ENDPOINT_URL)
        s3_conn.create_bucket(Bucket='upload_bucket', ACL='public-read')

        s3_kwargs = dict(key='test_fake_id', secret='test_fake_secret')
        s3_client_kwargs = {'endpoint_url': MOTO_SERVER_ENDPOINT_URL}

        ds1 = new_cube(width=36, height=18, variables=dict(chl=0.5, tsm=0.2))

        write_cube(ds1,
                   'upload_bucket/cube-1-250-250.zarr',
                   format_name='zarr',
                   s3_kwargs=s3_kwargs,
                   s3_client_kwargs=s3_client_kwargs)

        ds2 = open_cube('upload_bucket/cube-1-250-250.zarr',
                        format_name='zarr',
                        s3_kwargs=s3_kwargs,
                        s3_client_kwargs=s3_client_kwargs)

        self.assertEqual(set(ds1.coords), set(ds2.coords))
        self.assertEqual(set(ds1.data_vars), set(ds2.data_vars))
コード例 #8
0
 def test_open_cube_from_xube_server_abs_path(self):
     ds = open_cube('http://localhost:8080/s3bucket/local',
                    format_name='zarr',
                    s3_kwargs={'anon': True})
     self.assertCubeOk(ds)
コード例 #9
0
 def test_open_cube_from_xube_server_rel_path(self):
     ds = open_cube('s3bucket/local',
                    format_name='zarr',
                    s3_kwargs={'anon': True},
                    s3_client_kwargs=dict(endpoint_url=SERVER_URL))
     self.assertCubeOk(ds)
コード例 #10
0
def compute(script: str,
            cube: List[str],
            input_var_names: str,
            input_params: str,
            output_path: str,
            output_format: str,
            output_var_name: str,
            output_var_dtype: str):
    """
    Compute a cube from one or more other cubes.

    The command computes a cube variable from other cube variables in CUBEs
    using a user-provided Python function in SCRIPT.

    The SCRIPT must define a function named "compute":

    \b
        def compute(*input_vars: numpy.ndarray,
                    input_params: Mapping[str, Any] = None,
                    dim_coords: Mapping[str, np.ndarray] = None,
                    dim_ranges: Mapping[str, Tuple[int, int]] = None) \\
                    -> numpy.ndarray:
            # Compute new numpy array from inputs
            # output_array = ...
            return output_array

    where input_vars are numpy arrays (chunks) in the order given by VARIABLES or given by the variable names returned
    by an optional "initialize" function that my be defined in SCRIPT too, see below. input_params is a mapping of
    parameter names to values according to PARAMS or the ones returned by the aforesaid "initialize" function.
    dim_coords is a mapping from dimension name to coordinate labels for the current chunk to be computed.
    dim_ranges is a mapping from dimension name to index ranges into coordinate arrays of the cube.

    The SCRIPT may define a function named "initialize":

    \b
        def initialize(input_cubes: Sequence[xr.Dataset],
                       input_var_names: Sequence[str],
                       input_params: Mapping[str, Any]) \\
                       -> Tuple[Sequence[str], Mapping[str, Any]]:
            # Compute new variable names and/or new parameters
            # new_input_var_names = ...
            # new_input_params = ...
            return new_input_var_names, new_input_params

    where input_cubes are the respective CUBEs, input_var_names the respective VARIABLES, and input_params
    are the respective PARAMS. The "initialize" function can be used to validate the data cubes, extract
    the desired variables in desired order and to provide some extra processing parameters passed to the
    "compute" function.

    Note that if no input variable names are specified, no variables are passed to the "compute" function.

    The SCRIPT may also define a function named "finalize":

    \b
        def finalize(output_cube: xr.Dataset,
                     input_params: Mapping[str, Any]) \\
                     -> Optional[xr.Dataset]:
            # Optionally modify output_cube and return it or return None
            return output_cube

    If defined, the "finalize" function will be called before the command writes the
    new cube and then exists. The functions may perform a cleaning up or perform side effects such
    as write the cube to some sink. If the functions returns None, the CLI will *not* write
    any cube data.

    """
    from xcube.cli.common import parse_cli_kwargs
    from xcube.core.compute import compute_cube
    from xcube.core.dsio import open_cube
    from xcube.core.dsio import guess_dataset_format, find_dataset_io

    input_paths = cube

    compute_function_name = "compute"
    initialize_function_name = "initialize"
    finalize_function_name = "finalize"

    with open(script, "r") as fp:
        code = fp.read()

    locals_dict = dict()
    exec(code, globals(), locals_dict)

    input_var_names = list(map(lambda s: s.strip(), input_var_names.split(","))) if input_var_names else None

    compute_function = _get_function(locals_dict, compute_function_name, script, force=True)
    initialize_function = _get_function(locals_dict, initialize_function_name, script, force=False)
    finalize_function = _get_function(locals_dict, finalize_function_name, script, force=False)

    input_params = parse_cli_kwargs(input_params, "PARAMS")

    input_cubes = []
    for input_path in input_paths:
        input_cubes.append(open_cube(input_path=input_path))

    if initialize_function:
        input_var_names, input_params = initialize_function(input_cubes, input_var_names, input_params)

    output_cube = compute_cube(compute_function,
                               *input_cubes,
                               input_var_names=input_var_names,
                               input_params=input_params,
                               output_var_name=output_var_name,
                               output_var_dtype=output_var_dtype)

    if finalize_function:
        output_cube = finalize_function(output_cube)

    if output_cube is not None:
        output_format = output_format or guess_dataset_format(output_path)
        dataset_io = find_dataset_io(output_format, {"w"})
        dataset_io.write(output_cube, output_path)
コード例 #11
0
ファイル: test_esdc.py プロジェクト: IPL-UV/gauss4eo
def test_lo_esdc_online():
    path = f"{ONLINE_CUBE}/{LOW_RES}"
    esdc = open_cube(str(path))