def read_netcdf(file: str, drop_variables: VarNamesLike.TYPE = None, decode_cf: bool = True, normalize: bool = True, decode_times: bool = True, engine: str = None) -> xr.Dataset: """ Read a dataset from a netCDF 3/4 or HDF file. :param file: The netCDF file path. :param drop_variables: List of variables to be dropped. :param decode_cf: Whether to decode CF attributes and coordinate variables. :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``. :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects). :param engine: Optional netCDF engine name. """ drop_variables = VarNamesLike.convert(drop_variables) ds = xr.open_dataset(file, drop_variables=drop_variables, decode_cf=decode_cf, decode_times=decode_times, engine=engine) chunks = get_spatial_ext_chunk_sizes(ds) if chunks and 'time' in ds.dims: chunks['time'] = 1 if chunks: ds = ds.chunk(chunks) if normalize: return adjust_temporal_attrs(normalize_op(ds)) return ds
def read_zarr(path: str, key: str = None, secret: str = None, token: str = None, drop_variables: VarNamesLike.TYPE = None, decode_cf: bool = True, decode_times: bool = True, normalize: bool = True) -> xr.Dataset: """ Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage. For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/. :param path: Zarr directory path, Zarr ZIP archive path, or (S3) object storage URL. :param key: Optional (AWS) access key identifier. Valid only if *path* is a URL. :param secret: Optional (AWS) secret access key. Valid only if *path* is a URL. :param token: Optional (AWS) access token. Valid only if *path* is a URL. :param drop_variables: List of variables to be dropped. :param decode_cf: Whether to decode CF attributes and coordinate variables. :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects). :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``. """ drop_variables = VarNamesLike.convert(drop_variables) is_s3_url = path.startswith('s3://') is_http_url = path.startswith('http://') or path.startswith('https://') if is_s3_url or is_http_url: root = path client_kwargs = None if is_http_url: url = urllib.parse.urlparse(path) root = url.path[1:] if url.path.startswith('/') else url.path client_kwargs = dict(endpoint_url=f'{url.scheme}://{url.netloc}') store = s3fs.S3Map(root, s3=s3fs.S3FileSystem(anon=not (key or secret or token), key=key, secret=secret, token=token, client_kwargs=client_kwargs)) else: store = path ds = xr.open_zarr(store, drop_variables=drop_variables, decode_cf=decode_cf, decode_times=decode_times) if normalize: return adjust_temporal_attrs(normalize_op(ds)) return ds
def read_zarr(path: str, file_system: str = 'Local', drop_variables: VarNamesLike.TYPE = None, decode_cf: bool = True, decode_times: bool = True, normalize: bool = True) -> xr.Dataset: """ Read a dataset from a Zarr directory, Zarr ZIP archive, or remote Zarr object storage. For the Zarr format, refer to http://zarr.readthedocs.io/en/stable/. :param path: Zarr directory path, Zarr ZIP archive path, or object storage path or bucket name. :param file_system: File system identifier, "Local" is your locally mounted file system, for Amazon S3 use "S3", for general Object Storage use "OBS". :param drop_variables: List of variables to be dropped. :param decode_cf: Whether to decode CF attributes and coordinate variables. :param decode_times: Whether to decode time information (convert time coordinates to ``datetime`` objects). :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``. """ drop_variables = VarNamesLike.convert(drop_variables) if file_system == 'Local': ds = xr.open_zarr(path, drop_variables=drop_variables, decode_cf=decode_cf, decode_times=decode_times) elif file_system == 'S3' or file_system == 'OBS': import s3fs store = s3fs.S3Map(path, s3=(s3fs.S3FileSystem(anon=True))) ds = xr.open_zarr(store, drop_variables=drop_variables, decode_cf=decode_cf, decode_times=decode_times) else: raise ValidationError(f'Unknown file_system {file_system!r}') if normalize: return adjust_temporal_attrs(normalize_op(ds)) return ds
def open_dataset(ds_name: str = '', ds_id: str = '', time_range: TimeRangeLike.TYPE = None, region: PolygonLike.TYPE = None, var_names: VarNamesLike.TYPE = None, normalize: bool = True, force_local: bool = False, local_ds_id: str = None, monitor: Monitor = Monitor.NONE) -> xr.Dataset: """ Open a dataset from a data source identified by *ds_name*. :param ds_name: The name of data source. This parameter has been deprecated, please use *ds_id* instead. :param ds_id: The identifier for the data source. :param time_range: Optional time range of the requested dataset :param region: Optional spatial region of the requested dataset :param var_names: Optional names of variables of the requested dataset :param normalize: Whether to normalize the dataset's geo- and time-coding upon opening. See operation ``normalize``. :param force_local: Whether to make a local copy of remote data source if it's not present :param local_ds_id: Optional local identifier for newly created local copy of remote data source. Used only if force_local=True. :param monitor: A progress monitor :return: An new dataset instance. """ import cate.core.ds ds = cate.core.ds.open_dataset(data_source=ds_id or ds_name, time_range=time_range, var_names=var_names, region=region, force_local=force_local, local_ds_id=local_ds_id, monitor=monitor) if ds and normalize: return adjust_temporal_attrs(normalize_op(ds)) return ds