def add_store_config(self, store_instance_id: str, store_config: DataStoreConfig): assert_instance(store_instance_id, str, 'store_instance_id') assert_instance(store_config, DataStoreConfig, 'store_config') if store_instance_id in self._instances: self._instances[store_instance_id].close() self._instances[store_instance_id] = DataStoreInstance(store_config)
def write_data(self, data: Any, data_id: str = None, writer_id: str = None, replace: bool = False, **write_params) -> str: assert_instance(data, (xr.Dataset, MultiLevelDataset)) if not writer_id: if isinstance(data, MultiLevelDataset): predicate = get_data_accessor_predicate( type_specifier=TYPE_SPECIFIER_MULTILEVEL_DATASET, format_id='levels', storage_id=_STORAGE_ID) elif isinstance(data, xr.Dataset): predicate = get_data_accessor_predicate( type_specifier=TYPE_SPECIFIER_DATASET, format_id='zarr', storage_id=_STORAGE_ID) else: raise DataStoreError(f'Unsupported data type "{type(data)}"') extensions = find_data_writer_extensions(predicate=predicate) writer_id = extensions[0].name data_id = self._ensure_valid_data_id(data_id, data) path = self._resolve_data_id_to_path(data_id) self._new_s3_writer(writer_id).write_data(data, data_id=path, replace=replace, **write_params) self.register_data(data_id, data) return data_id
def write_data(self, data: xr.Dataset, data_id: str, replace=False, **write_params): assert_instance(data, xr.Dataset, 'data') data.to_zarr(data_id, mode='w' if replace else None, **write_params)
def write_data(self, data: xr.Dataset, data_id: str, replace=False, **write_params): assert_instance(data, xr.Dataset, 'data') data.to_netcdf(data_id, **write_params)
def write_data(self, data: xr.Dataset, data_id: str, replace=False, **write_params) -> str: assert_instance(data, xr.Dataset, name='data') assert_instance(data_id, str, name='data_id') fs, root, write_params = self.load_fs(write_params) if not replace and fs.exists(data_id): raise DataStoreError(f'Data resource {data_id} already exists') # This doesn't yet work as expected with fsspec and netcdf: # engine = write_params.pop('engine', 'scipy') # with fs.open(data_id, 'wb') as file: # data.to_netcdf(file, engine=engine, **write_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file(suffix='.nc') engine = write_params.pop('engine', 'netcdf4') data.to_netcdf(file_path, engine=engine, **write_params) if not is_local: fs.put_file(file_path, data_id) return data_id
def _assert_valid_xy_coords(xy_coords: Any): assert_instance(xy_coords, xr.DataArray, name='xy_coords') assert_true( xy_coords.ndim == 3 and xy_coords.shape[0] == 2 and xy_coords.shape[1] >= 2 and xy_coords.shape[2] >= 2, 'xy_coords must have dimensions' ' (2, height, width) with height >= 2 and width >= 2')
def __init__(self, cube_config: CubeConfig, store_pool: DataStorePool = None): assert_instance(cube_config, CubeConfig, 'cube_config') if store_pool is not None: assert_instance(store_pool, DataStorePool, 'store_pool') self._cube_config = cube_config self._store_pool = store_pool
def _inject_attrs(self, attrs: Dict[str, Any]): assert_instance(attrs, dict, name='attrs') schema = self.get_schema() assert_true(isinstance(schema, JsonObjectSchema), message='schema must be a JSON object schema') all_attrs = {k: None for k in (schema.properties or {}).keys()} all_attrs.update(attrs) JsonObjectSchema.inject_attrs(self, all_attrs)
def __init__(self, store_configs: Dict[str, DataStoreConfig] = None): if store_configs is not None: assert_instance(store_configs, dict, name='stores_configs') self._instances: Dict[str, DataStoreInstance] = { k: DataStoreInstance(v) for k, v in store_configs.items() } else: self._instances: Dict[str, DataStoreInstance] = {}
def __init__(self, request: CubeGeneratorRequest, store_pool: DataStorePool = None): assert_instance(request, CubeGeneratorRequest, name='request') if store_pool is not None: assert_instance(store_pool, DataStorePool, name='store_pool') self._request: CubeGeneratorRequest = request self._store_pool: Optional[DataStorePool] = store_pool self._dataset_descriptors: Optional[Sequence[DatasetDescriptor]] = None
def __init__(self, input_configs: Sequence[InputConfig], store_pool: DataStorePool = None): assert_true(len(input_configs) > 0, 'At least one input must be given') if store_pool is not None: assert_instance(store_pool, DataStorePool, 'store_pool') self._input_configs = input_configs self._store_pool = store_pool
def __init__(self, store_configs: DataStoreConfigDict = None): if store_configs is not None: assert_instance(store_configs, dict, name='stores_configs') else: store_configs = {} self._instances: DataStoreInstanceDict = { k: DataStoreInstance(v) for k, v in store_configs.items() }
def __init__(self, base_dataset: xr.Dataset, tile_grid: TileGrid = None, ds_id: str = None): assert_instance(base_dataset, xr.Dataset, name='base_dataset') self._base_cube, grid_mapping, _ = decode_cube(base_dataset, force_non_empty=True) super().__init__(grid_mapping=grid_mapping, tile_grid=tile_grid, ds_id=ds_id)
def __init__(self, service_config: ServiceConfig, progress_period: float = 1.0, raise_on_error: bool = False, verbosity: int = 0): super().__init__(raise_on_error=raise_on_error, verbosity=verbosity) assert_instance(service_config, ServiceConfig, 'service_config') assert_instance(progress_period, (int, float), 'progress_period') self._service_config: ServiceConfig = service_config self._access_token: Optional[str] = service_config.access_token self._progress_period: float = progress_period
def __init__(self, store_id: str, store_params: Dict[str, Any] = None, title: str = None, description: str = None): assert_given(store_id, name='store_id') if store_params is not None: assert_instance(store_params, dict, name='store_params') self._store_id = store_id self._store_params = store_params self._title = title self._description = description
def __init__(self, other: Union[Mapping, MutableMapping], logger: Logger = LOG, name: Optional[str] = None): assert_instance(other, Mapping) self._other = other self._measure_time = measure_time_cm(logger=logger) self._name = name or 'chunk_store' if hasattr(other, 'listdir'): setattr(self, 'listdir', self.__listdir) if hasattr(other, 'getsize'): setattr(self, 'getsize', self.__getsize)
def __init__(self, *args, status_code: Optional[int] = None, remote_traceback: Optional[List[str]] = None, remote_output: Optional[List[str]] = None, **kwargs): # noinspection PyArgumentList super().__init__(*args, **kwargs) if status_code is not None: assert_instance(status_code, int, 'status_code') self._status_code = status_code self._remote_traceback = remote_traceback self._remote_output = remote_output
def __init__(self, fs: Optional[fsspec.AbstractFileSystem] = None, root: str = '', max_depth: Optional[int] = 1, read_only: bool = False): if fs is not None: assert_instance(fs, fsspec.AbstractFileSystem, name='fs') self._fs = fs self._raw_root: str = root or '' self._root: Optional[str] = None self._max_depth = max_depth self._read_only = read_only self._lock = RLock()
def __init__(self, store_pool: DataStorePool = None, raise_on_error: bool = False, verbosity: int = 0): super().__init__(raise_on_error=raise_on_error, verbosity=verbosity) if store_pool is not None: assert_instance(store_pool, DataStorePool, 'store_pool') self._store_pool = store_pool if store_pool is not None \ else DataStorePool() self._generated_data_id: Optional[str] = None self._generated_cube: Optional[xr.Dataset] = None self._generated_gm: Optional[GridMapping] = None
def get_store_instance_id(self, store_config: DataStoreConfig, strict_check: bool = False) -> Optional[str]: assert_instance(store_config, DataStoreConfig, 'store_config') for id, instance in self._instances.items(): if strict_check: if instance.store_config == store_config: return id else: if instance.store_config.store_id == store_config.store_id and \ instance.store_config.store_params == \ store_config.store_params: return id return None
def _assert_valid(obj: Optional[Dict[str, Any]], schema: Optional[JsonObjectSchema], name: str, kind: str, validator: Callable[[Dict[str, Any], JsonObjectSchema], Any]): if obj is None: return assert_instance(obj, dict, name=name) if schema is not None: assert_instance(schema, JsonObjectSchema, name=f'{name}_schema') try: validator(obj, schema) except jsonschema.ValidationError as e: raise DataStoreError(f'Invalid {kind}' f' detected: {e.message}') from e
def write_data(self, data: gpd.GeoDataFrame, data_id: str, **write_params) -> str: # TODO: implement me correctly, # this is not valid for shapefile AND geojson assert_instance(data, (gpd.GeoDataFrame, pd.DataFrame), 'data') fs, root, write_params = self.load_fs(write_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file() data.to_file(file_path, driver=self.get_driver_name(), **write_params) if not is_local: fs.put_file(file_path, data_id) return data_id
def __init__(self, dtype: Type, alias: Union[None, str, Sequence[str]] = None): """ :param dtype: The Python data type. :param alias: An alias name or list of aliases. """ assert_instance(dtype, type, name='dtype') if alias is not None: assert_instance(alias, (str, tuple, list), name='alias') self._dtype = dtype self._aliases = (([] if alias is None else [alias] if isinstance(alias, str) else list(alias)) + [self._get_fully_qualified_type_name(dtype)]) self._alias_set = set(self._aliases) # for faster lookup
def open_data(self, data_id: str, **open_params) -> xr.Dataset: assert_instance(data_id, str, name='data_id') fs, root, open_params = self.load_fs(open_params) # This doesn't yet work as expected with fsspec and netcdf: # engine = open_params.pop('engine', 'scipy') # with fs.open(data_id, 'rb') as file: # return xr.open_dataset(file, engine=engine, **open_params) is_local = is_local_fs(fs) if is_local: file_path = data_id else: _, file_path = new_temp_file(suffix='.nc') fs.get_file(data_id, file_path) engine = open_params.pop('engine', 'netcdf4') return xr.open_dataset(file_path, engine=engine, **open_params)
def __init__(self, status: str, status_code: Optional[int] = None, result: Optional[R] = None, message: Optional[str] = None, output: Optional[Sequence[str]] = None, traceback: Optional[Sequence[str]] = None, versions: Optional[Dict[str, str]] = None): assert_instance(status, str, name='status') assert_in(status, STATUS_IDS, name='status') self.status = status self.status_code = status_code self.result = result self.message = message if message else None self.output = list(output) if output else None self.traceback = list(traceback) if traceback else None self.versions = dict(versions) if versions else None
def __init__(self, tile_size: int = 256, min_level: Optional[int] = None, max_level: Optional[int] = None): """ :param tile_size: :param min_level: :param max_level: """ assert_instance(tile_size, int, name='tile_size') super().__init__(tile_size=(tile_size, tile_size), num_level_0_tiles=(2, 1), crs=GEOGRAPHIC_CRS, max_resolution=180. / tile_size, extent=(-180., -90., 180., 90.), min_level=min_level, max_level=max_level)
def write_data(self, data: xr.Dataset, data_id: str, replace=False, **write_params): assert_instance(data, xr.Dataset, 'data') s3 = self._s3 if s3 is None: s3, write_params = self.consume_s3fs_params(write_params) bucket_name, write_params = self.consume_bucket_name_param( write_params) try: data.to_zarr(s3fs.S3Map( root=f'{bucket_name}/{data_id}' if bucket_name else data_id, s3=s3, check=False), mode='w' if replace else None, **write_params) except ValueError as e: raise DataStoreError(f'{e}') from e
def open_data(self, data_id: str, **open_params) -> xr.Dataset: assert_instance(data_id, str, name='data_id') fs, root, open_params = self.load_fs(open_params) zarr_store = fs.get_mapper(data_id) cache_size = open_params.pop('cache_size', None) if isinstance(cache_size, int) and cache_size > 0: zarr_store = zarr.LRUStoreCache(zarr_store, max_size=cache_size) log_access = open_params.pop('log_access', None) if log_access: zarr_store = LoggingStore(zarr_store, name=f'zarr_store({data_id!r})') consolidated = open_params.pop('consolidated', fs.exists(f'{data_id}/.zmetadata')) try: return xr.open_zarr(zarr_store, consolidated=consolidated, **open_params) except ValueError as e: raise DataStoreError(f'Failed to open' f' dataset {data_id!r}: {e}') from e
def __init__(self, path: str, sub_path: str = None, includes: Collection[str] = None, excludes: Collection[str] = None, storage_params: Dict[str, Any] = None): assert_instance(path, str, 'path') assert_given(path, 'path') if sub_path is not None: assert_instance(sub_path, str, 'sub_path') self._path = path self._sub_path = sub_path self._storage_params = dict( storage_params) if storage_params is not None else None self._includes = list(includes) if includes is not None else None self._excludes = list(excludes) if excludes is not None else None # computed members self._include_patterns = _translate_patterns(includes or []) self._exclude_patterns = _translate_patterns(excludes or []) # cached, computed members self._details: Optional[_FileSetDetails] = None
def __init__(self, source_images: Sequence[TiledImage], image_id: str = None, encode: bool = False, format: str = None, tile_cache: Cache = None, trace_perf: bool = False): assert_instance(source_images, (list, tuple), name='source_images') assert_true(len(source_images) == 3, message='source_images must have length 3') proto_source_image = source_images[0] super().__init__(size=proto_source_image.size, tile_size=proto_source_image.tile_size, num_tiles=proto_source_image.num_tiles, image_id=image_id, format=format, mode='RGBA', tile_cache=tile_cache, trace_perf=trace_perf) self._source_images = tuple(source_images) self._encode = encode