Esempio n. 1
0
File: io.py Progetto: whigg/cate
def read_netcdf(file: str,
                drop_variables: VarNamesLike.TYPE = None,
                decode_cf: bool = True,
                normalize: bool = True,
                decode_times: bool = True,
                engine: str = None) -> xr.Dataset:
    """
    Read a dataset from a netCDF 3/4 or HDF file.

    :param file: The netCDF file path.
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param engine: Optional netCDF engine name.
    """
    drop_variables = VarNamesLike.convert(drop_variables)
    ds = xr.open_dataset(file,
                         drop_variables=drop_variables,
                         decode_cf=decode_cf,
                         decode_times=decode_times,
                         engine=engine)
    chunks = get_spatial_ext_chunk_sizes(ds)
    if chunks and 'time' in ds.dims:
        chunks['time'] = 1
    if chunks:
        ds = ds.chunk(chunks)
    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Esempio n. 2
0
File: io.py Progetto: whigg/cate
def read_zarr(path: str,
              key: str = None,
              secret: str = None,
              token: str = None,
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or (S3) object storage URL.
    :param key: Optional (AWS) access key identifier. Valid only if *path* is a URL.
    :param secret: Optional (AWS) secret access key. Valid only if *path* is a URL.
    :param token: Optional (AWS) access token. Valid only if *path* is a URL.
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    is_s3_url = path.startswith('s3://')
    is_http_url = path.startswith('http://') or path.startswith('https://')
    if is_s3_url or is_http_url:
        root = path
        client_kwargs = None
        if is_http_url:
            url = urllib.parse.urlparse(path)
            root = url.path[1:] if url.path.startswith('/') else url.path
            client_kwargs = dict(endpoint_url=f'{url.scheme}://{url.netloc}')
        store = s3fs.S3Map(root, s3=s3fs.S3FileSystem(anon=not (key or secret or token),
                                                      key=key,
                                                      secret=secret,
                                                      token=token,
                                                      client_kwargs=client_kwargs))
    else:
        store = path

    ds = xr.open_zarr(store,
                      drop_variables=drop_variables,
                      decode_cf=decode_cf,
                      decode_times=decode_times)
    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Esempio n. 3
0
File: io.py Progetto: pwambach/cate
def read_zarr(path: str,
              file_system: str = 'Local',
              drop_variables: VarNamesLike.TYPE = None,
              decode_cf: bool = True,
              decode_times: bool = True,
              normalize: bool = True) -> xr.Dataset:
    """
    Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage.

    For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/.

    :param path: Zarr directory path, Zarr ZIP archive path, or object storage path or bucket name.
    :param file_system: File system identifier, "Local" is your locally mounted file system,
           for Amazon S3 use "S3", for general Object Storage use "OBS".
    :param drop_variables: List of variables to be dropped.
    :param decode_cf: Whether to decode CF attributes and coordinate variables.
    :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects).
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    """
    drop_variables = VarNamesLike.convert(drop_variables)

    if file_system == 'Local':
        ds = xr.open_zarr(path,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    elif file_system == 'S3' or file_system == 'OBS':
        import s3fs
        store = s3fs.S3Map(path, s3=(s3fs.S3FileSystem(anon=True)))
        ds = xr.open_zarr(store,
                          drop_variables=drop_variables,
                          decode_cf=decode_cf,
                          decode_times=decode_times)
    else:
        raise ValidationError(f'Unknown file_system {file_system!r}')

    if normalize:
        return adjust_temporal_attrs(normalize_op(ds))
    return ds
Esempio n. 4
0
File: io.py Progetto: whigg/cate
def open_dataset(ds_name: str = '',
                 ds_id: str = '',
                 time_range: TimeRangeLike.TYPE = None,
                 region: PolygonLike.TYPE = None,
                 var_names: VarNamesLike.TYPE = None,
                 normalize: bool = True,
                 force_local: bool = False,
                 local_ds_id: str = None,
                 monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Open a dataset from a data source identified by *ds_name*.

    :param ds_name: The name of data source. This parameter has been deprecated, please use *ds_id* instead.
    :param ds_id: The identifier for the data source.
    :param time_range: Optional time range of the requested dataset
    :param region: Optional spatial region of the requested dataset
    :param var_names: Optional names of variables of the requested dataset
    :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``.
    :param force_local: Whether to make a local copy of remote data source if it's not present
    :param local_ds_id: Optional local identifier for newly created local copy of remote data source.
           Used only if force_local=True.
    :param monitor: A progress monitor
    :return: An new dataset instance.
    """
    import cate.core.ds
    ds = cate.core.ds.open_dataset(data_source=ds_id or ds_name,
                                   time_range=time_range,
                                   var_names=var_names,
                                   region=region,
                                   force_local=force_local,
                                   local_ds_id=local_ds_id,
                                   monitor=monitor)
    if ds and normalize:
        return adjust_temporal_attrs(normalize_op(ds))

    return ds