def get_open_data_params_schema(self, data_id: str = None) -> JsonObjectSchema: return JsonObjectSchema(properties=dict( group=JsonStringSchema( description='Group path. (a.k.a. path in zarr terminology.).', min_length=1, ), chunks=JsonObjectSchema( description= 'Optional chunk sizes along each dimension. Chunk size values may ' 'be None, "auto" or an integer value.', examples=[{ 'time': None, 'lat': 'auto', 'lon': 90 }, { 'time': 1, 'y': 512, 'x': 512 }], additional_properties=True, ), decode_cf=JsonBooleanSchema( description= 'Whether to decode these variables, assuming they were saved ' 'according to CF conventions.', default=True, ), mask_and_scale=JsonBooleanSchema( description= 'If True, replace array values equal to attribute "_FillValue" with NaN. ' 'Use "scaling_factor" and "add_offset" attributes to compute actual values.', default=True, ), decode_times=JsonBooleanSchema( description= 'If True, decode times encoded in the standard NetCDF datetime format ' 'into datetime objects. Otherwise, leave them encoded as numbers.', default=True, ), decode_coords=JsonBooleanSchema( description= 'If True, decode the \"coordinates\" attribute to identify coordinates in ' 'the resulting dataset.', default=True, ), drop_variables=JsonArraySchema( items=JsonStringSchema(min_length=1), ), consolidated=JsonBooleanSchema( description= 'Whether to open the store using zarr\'s consolidated metadata ' 'capability. Only works for stores that have already been consolidated.', default=False, ), ), required=[], additional_properties=False)
def get_schema(cls) -> JsonObjectSchema: return JsonObjectSchema( properties=dict( IsSubstitute=JsonBooleanSchema(), RequiredScopes=JsonArraySchema(items=IdentifierSchema)), additional_properties=False, )
def test_from_json_object_additional_properties_is_schema(self): Person = namedtuple('Person', ['name', 'age', 'deleted']) person_schema = JsonObjectSchema( properties=dict(name=JsonStringSchema(), age=JsonIntegerSchema(), deleted=JsonBooleanSchema(default=False)), factory=Person, ) schema = JsonObjectSchema(additional_properties=person_schema, ) value = { 'p1': { 'name': 'Bibo', 'age': 15, 'deleted': True }, 'p2': { 'name': 'Ernie', 'age': 12, 'deleted': False }, } self.assertEqual( { 'p1': Person(name='Bibo', age=15, deleted=True), 'p2': Person(name='Ernie', age=12, deleted=False), }, schema.from_instance(value))
def test_from_json_object_object(self): person_schema = JsonObjectSchema( properties=dict(name=JsonStringSchema(), age=JsonIntegerSchema(), deleted=JsonBooleanSchema(default=False))) schema = JsonObjectSchema(properties=dict(person=person_schema)) value = {'person': {'name': 'Bibo', 'age': 15}} self.assertEqual( {'person': { 'name': 'Bibo', 'age': 15, 'deleted': False }}, schema.from_instance(value)) Assignment = namedtuple('Assignment', ['person']) schema.factory = Assignment self.assertEqual( Assignment(person={ 'name': 'Bibo', 'age': 15, 'deleted': False }), schema.from_instance(value)) Person = namedtuple('Person', ['name', 'age', 'deleted']) person_schema.factory = Person self.assertEqual( Assignment(person=Person(name='Bibo', age=15, deleted=False)), schema.from_instance(value))
def test_to_json_object(self): person_schema = JsonObjectSchema( properties=dict(name=JsonStringSchema(), age=JsonIntegerSchema(), deleted=JsonBooleanSchema(default=False))) value = {'name': 'Bibo', 'age': 12, 'deleted': True} self.assertEqual(value, person_schema.to_instance(value)) # ok, because person_schema does not explicitly say additional_properties=False value_extra = { 'name': 'Bibo', 'age': 12, 'deleted': True, 'comment': 'Hello!' } self.assertEqual(value_extra, person_schema.to_instance(value_extra)) Person = namedtuple('Person', ['name', 'age', 'deleted']) def serialize(person: Person) -> Dict[str, Any]: return person._asdict() person_schema.serializer = serialize person = Person(**value) self.assertEqual(value, person_schema.to_instance(person))
def test_from_instance_tuple(self): self.assertEqual([False, 2, 'U'], JsonArraySchema(items=[ JsonBooleanSchema(), JsonIntegerSchema(), JsonStringSchema() ]).from_instance([False, 2, 'U']))
def get_write_data_params_schema(self) -> JsonObjectSchema: return JsonObjectSchema(properties=dict( group=JsonStringSchema( description='Group path. (a.k.a. path in zarr terminology.).', min_length=1, ), encoding=JsonObjectSchema( description='Nested dictionary with variable names as keys and ' 'dictionaries of variable specific encodings as values.', examples=[{ 'my_variable': { 'dtype': 'int16', 'scale_factor': 0.1, } }], additional_properties=True, ), consolidated=JsonBooleanSchema( description='If True, apply zarr’s consolidate_metadata() ' 'function to the store after writing.'), append_dim=JsonStringSchema( description= 'If set, the dimension on which the data will be appended.', min_length=1, )), required=[], additional_properties=False)
def get_data_store_params_schema(cls) -> JsonObjectSchema: return JsonObjectSchema(properties=dict( root=JsonStringSchema(default=''), max_depth=JsonIntegerSchema(nullable=True, default=1), read_only=JsonBooleanSchema(default=False), ), additional_properties=False)
def get_storage_options_schema(cls) -> JsonObjectSchema: return JsonObjectSchema( properties=dict(auto_mkdirs=JsonBooleanSchema( description='Whether, when opening a file, the directory' ' containing it should be created (if it' ' doesn\'t already exist).'), **COMMON_STORAGE_OPTIONS_SCHEMA_PROPERTIES), additional_properties=True, )
def get_delete_data_params_schema(self, data_id: str = None) \ -> JsonObjectSchema: return JsonObjectSchema( properties=dict( recursive=JsonBooleanSchema(), maxdepth=JsonIntegerSchema(), storage_options=self.get_storage_options_schema(), ), additional_properties=False, )
def _get_open_data_params_schema(self, dsd: DatasetDescriptor = None ) -> JsonObjectSchema: min_date, max_date = dsd.time_range if dsd.time_range is not None else ( None, None) cube_params = dict( dataset_name=JsonStringSchema(min_length=1), variable_names=JsonArraySchema(items=JsonStringSchema( enum=[v.name for v in dsd. data_vars] if dsd and dsd.data_vars else None)), variable_units=JsonArraySchema(), variable_sample_types=JsonArraySchema(), tile_size=JsonArraySchema( items=(JsonNumberSchema(minimum=1, maximum=2500, default=DEFAULT_TILE_SIZE), JsonNumberSchema(minimum=1, maximum=2500, default=DEFAULT_TILE_SIZE)), default=(DEFAULT_TILE_SIZE, DEFAULT_TILE_SIZE)), crs=JsonStringSchema(default=DEFAULT_CRS, enum=AVAILABLE_CRS_IDS), bbox=JsonArraySchema(items=(JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema(), JsonNumberSchema())), spatial_res=JsonNumberSchema(exclusive_minimum=0.0), time_range=JsonDateSchema.new_range(min_date=min_date, max_date=max_date), # TODO: add pattern time_period=JsonStringSchema( default='1D', nullable=True, enum=[None, *map(lambda n: f'{n}D', range(1, 14)), '1W', '2W']), time_tolerance=JsonStringSchema(default=DEFAULT_TIME_TOLERANCE, format='^([1-9]*[0-9]*)[NULSTH]$'), collection_id=JsonStringSchema(), four_d=JsonBooleanSchema(default=False), ) cache_params = dict(max_cache_size=JsonIntegerSchema(minimum=0), ) # required cube_params required = [ 'bbox', 'spatial_res', 'time_range', ] sh_params = {} if self._sentinel_hub is None: # If we are NOT connected to the API (yet), we also include store parameters sh_schema = SentinelHubDataStore.get_data_store_params_schema() sh_params = sh_schema.properties required.extend(sh_schema.required or []) return JsonObjectSchema(properties=dict(**sh_params, **cube_params, **cache_params), required=required)
def get_data_store_params_schema(cls) -> JsonObjectSchema: params = dict(normalize_names=JsonBooleanSchema(default=False)) # For now, let CDS API use defaults or environment variables for # most parameters. cds_params = dict(num_retries=JsonIntegerSchema( default=DEFAULT_NUM_RETRIES, minimum=0), ) params.update(cds_params) return JsonObjectSchema(properties=params, required=None, additional_properties=False)
def test_to_dict(self): schema = JsonObjectSchema(properties=dict( consolidated=JsonBooleanSchema())) self.assertEqual( { 'type': 'object', 'properties': { 'consolidated': { 'type': 'boolean' } }, }, schema.to_dict())
def test_from_json_object_array_object(self): person_schema = JsonObjectSchema( properties=dict(name=JsonStringSchema(), age=JsonIntegerSchema(), deleted=JsonBooleanSchema(default=False))) schema = JsonObjectSchema(properties=dict(persons=JsonArraySchema( items=person_schema))) value = { 'persons': [{ 'name': 'Bibo', 'age': 15 }, { 'name': 'Ernie', 'age': 12 }] } self.assertEqual( { 'persons': [{ 'name': 'Bibo', 'age': 15, 'deleted': False }, { 'name': 'Ernie', 'age': 12, 'deleted': False }] }, schema.from_instance(value)) Assignment = namedtuple('Assignment', ['persons']) schema.factory = Assignment self.assertEqual( Assignment(persons=[{ 'name': 'Bibo', 'age': 15, 'deleted': False }, { 'name': 'Ernie', 'age': 12, 'deleted': False }]), schema.from_instance(value)) Person = namedtuple('Person', ['name', 'age', 'deleted']) person_schema.factory = Person self.assertEqual( Assignment(persons=[ Person(name='Bibo', age=15, deleted=False), Person(name='Ernie', age=12, deleted=False) ]), schema.from_instance(value))
def get_schema(cls) -> JsonObjectSchema: """Get the JSON schema for CodeConfig objects.""" return JsonObjectSchema( properties=dict( callable_ref=JsonStringSchema(min_length=1), callable_params=JsonObjectSchema(additional_properties=True), inline_code=JsonStringSchema(min_length=1), file_set=FileSet.get_schema(), install_required=JsonBooleanSchema(), ), additional_properties=False, factory=cls, )
def get_schema(cls): return JsonObjectSchema( properties=dict( store_id=JsonStringSchema(min_length=1), writer_id=JsonStringSchema(min_length=1), data_id=JsonStringSchema(default=None), store_params=JsonObjectSchema(additional_properties=True), write_params=JsonObjectSchema(additional_properties=True), replace=JsonBooleanSchema(default=False), ), additional_properties=False, required=[], factory=cls, )
def get_data_store_params_schema(cls) -> JsonObjectSchema: cciodp_params = dict( endpoint_url=JsonStringSchema(default=OPENSEARCH_CEDA_URL), endpoint_description_url=JsonStringSchema(default=CCI_ODD_URL), enable_warnings=JsonBooleanSchema( default=False, title='Whether to output warnings'), num_retries=JsonIntegerSchema( default=DEFAULT_NUM_RETRIES, minimum=0, title='Number of retries when requesting data fails'), retry_backoff_max=JsonIntegerSchema( default=DEFAULT_RETRY_BACKOFF_MAX, minimum=0), retry_backoff_base=JsonNumberSchema( default=DEFAULT_RETRY_BACKOFF_BASE, exclusive_minimum=1.0), user_agent=JsonStringSchema(default=None)) return JsonObjectSchema(properties=dict(**cciodp_params), required=None, additional_properties=False)
def get_write_data_params_schema(self) -> JsonObjectSchema: schema = super().get_write_data_params_schema() # creates deep copy # TODO: remove use_saved_levels, instead see #619 schema.properties['use_saved_levels'] = JsonBooleanSchema( description='Whether to open an already saved level' ' and downscale it then.' ' May be used to avoid computation of' ' entire Dask graphs at each level.', default=False, ) schema.properties['base_dataset_id'] = JsonStringSchema( description='If given, avoids writing the base dataset' ' at level 0. Instead a file "{data_id}/0.link"' ' is created whose content is the given base dataset' ' identifier.', ) schema.properties['tile_size'] = JsonIntegerSchema( description='Tile size to be used for all levels of the' ' written multi-level dataset.', ) return schema
def get_data_store_params_schema(cls) -> JsonObjectSchema: sh_params = dict( client_id=JsonStringSchema( title='SENTINEL Hub API client identifier', description= 'Preferably set by environment variable SH_CLIENT_ID'), client_secret=JsonStringSchema( title='SENTINEL Hub API client secret', description= 'Preferably set by environment variable SH_CLIENT_SECRET'), api_url=JsonStringSchema(default=DEFAULT_SH_API_URL, title='SENTINEL Hub API URL'), oauth2_url=JsonStringSchema( default=DEFAULT_SH_OAUTH2_URL, title='SENTINEL Hub API authorisation URL'), enable_warnings=JsonBooleanSchema( default=False, title='Whether to output warnings'), error_policy=JsonStringSchema( default='fail', enum=['fail', 'warn', 'ignore'], title='Policy for errors while requesting data'), num_retries=JsonIntegerSchema( default=DEFAULT_NUM_RETRIES, minimum=0, title='Number of retries when requesting data fails'), retry_backoff_max=JsonIntegerSchema( default=DEFAULT_RETRY_BACKOFF_MAX, minimum=0), retry_backoff_base=JsonNumberSchema( default=DEFAULT_RETRY_BACKOFF_BASE, exclusive_minimum=1.0), ) required = None if not DEFAULT_CLIENT_ID or not DEFAULT_CLIENT_SECRET: required = [] if DEFAULT_CLIENT_ID is None: required.append('client_id') if DEFAULT_CLIENT_SECRET is None: required.append('client_secret') return JsonObjectSchema(properties=sh_params, required=required, additional_properties=False)
def get_s3_params_schema(self) -> JsonObjectSchema: # TODO: Use defaults as described in # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html return JsonObjectSchema(properties=dict( anon=JsonBooleanSchema( title='Whether to anonymously connect to AWS S3'), aws_access_key_id=JsonStringSchema( min_length=1, title='AWS access key identifier', description= 'Can also be set in profile section of ~/.aws/config, ' 'or by environment variable AWS_ACCESS_KEY_ID'), aws_secret_access_key=JsonStringSchema( min_length=1, title='AWS secret access key', description= 'Can also be set in profile section of ~/.aws/config, ' 'or by environment variable AWS_SECRET_ACCESS_KEY'), aws_session_token=JsonStringSchema( min_length=1, title='Session token.', description= 'Can also be set in profile section of ~/.aws/config, ' 'or by environment variable AWS_SESSION_TOKEN'), endpoint_url=JsonStringSchema(min_length=1, format='uri', title='Alternative endpoint URL'), bucket_name=JsonStringSchema(min_length=1, title='Name of the bucket'), profile_name=JsonStringSchema( min_length=1, title='Name of the AWS configuration profile', description='Section name with within ~/.aws/config file, ' 'which provides AWS configurations and credentials.'), region_name=JsonStringSchema(min_length=1, default='eu-central-1', enum=[r[1] for r in self._regions], title='AWS storage region name'), ), )
def test_from_json_object(self): value = {'name': 'Bibo', 'age': 12, 'deleted': True} person_schema = JsonObjectSchema( properties=dict(name=JsonStringSchema(), age=JsonIntegerSchema(), deleted=JsonBooleanSchema(default=False))) self.assertEqual(value, person_schema.from_instance(value)) self.assertEqual({ 'name': 'Bibo', 'age': 12, 'deleted': False }, person_schema.from_instance({ 'name': 'Bibo', 'age': 12 })) Person = namedtuple('Person', ['name', 'age', 'deleted']) person_schema.factory = Person self.assertEqual(Person(name='Bibo', age=12, deleted=True), person_schema.from_instance(value))
def test_to_dict(self): descriptor = DatasetDescriptor( data_id='xyz', crs='EPSG:9346', bbox=(10., 20., 30., 40.), spatial_res=20., time_range=('2017-06-05', '2017-06-27'), time_period='daily', open_params_schema=JsonObjectSchema( properties=dict( consolidated=JsonBooleanSchema(), ), additional_properties=False, ) ) descriptor_dict = descriptor.to_dict() self.assertEqual( { 'data_id': 'xyz', 'crs': 'EPSG:9346', 'data_type': 'dataset', 'bbox': [10.0, 20.0, 30.0, 40.0], 'spatial_res': 20.0, 'time_range': ['2017-06-05', '2017-06-27'], 'time_period': 'daily', 'open_params_schema': { 'type': 'object', 'properties': { 'consolidated': { 'type': 'boolean' } }, 'additionalProperties': False, }, }, descriptor_dict)
def get_data_store_params_schema(cls) -> JsonObjectSchema: return JsonObjectSchema(properties=dict( base_dir=JsonStringSchema(min_length=1), read_only=JsonBooleanSchema(default=False)), required=['base_dir'], additional_properties=False)
from xcube.util.assertions import assert_instance from xcube.util.jsonschema import JsonBooleanSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonNumberSchema from xcube.util.jsonschema import JsonObjectSchema from ..accessor import DataOpener from ..accessor import DataWriter from ..datatype import DataType from ..error import DataStoreError COMMON_STORAGE_OPTIONS_SCHEMA_PROPERTIES = dict( # passed to ``DirCache``, if the implementation supports # directory listing caching. Pass use_listings_cache=False # to disable such caching. use_listings_cache=JsonBooleanSchema(), listings_expiry_time=JsonNumberSchema(), max_paths=JsonIntegerSchema(), # If this is a cachable implementation, pass True here to force # creating a new instance even if a matching instance exists, and prevent # storing this instance. skip_instance_cache=JsonBooleanSchema(), asynchronous=JsonBooleanSchema(), ) PROTOCOL_PARAM_NAME = 'protocol' STORAGE_OPTIONS_PARAM_NAME = 'storage_options' FS_PARAM_NAME = 'fs' ROOT_PARAM_NAME = 'root'
def test_from_json_boolean(self): self.assertEqual(True, JsonBooleanSchema().from_instance(True))
def get_storage_options_schema(cls) -> JsonObjectSchema: # We may use here AWS S3 defaults as described in # https://boto3.amazonaws.com/v1/documentation/api/ # latest/guide/configuration.html return JsonObjectSchema( properties=dict( anon=JsonBooleanSchema( title='Whether to anonymously connect to AWS S3.'), key=JsonStringSchema( min_length=1, title='AWS access key identifier.', description='Can also be set in profile section' ' of ~/.aws/config, or by environment' ' variable AWS_ACCESS_KEY_ID.'), secret=JsonStringSchema( min_length=1, title='AWS secret access key.', description='Can also be set in profile section' ' of ~/.aws/config, or by environment' ' variable AWS_SECRET_ACCESS_KEY.'), token=JsonStringSchema( min_length=1, title='Session token.', description='Can also be set in profile section' ' of ~/.aws/config, or by environment' ' variable AWS_SESSION_TOKEN.'), use_ssl=JsonBooleanSchema( description='Whether to use SSL in connections to S3;' ' may be faster without, but insecure.', default=True, ), requester_pays=JsonBooleanSchema( description='If "RequesterPays" buckets are supported.', default=False, ), s3_additional_kwargs=JsonObjectSchema( description='parameters that are used when calling' ' S3 API methods. Typically used for' ' things like "ServerSideEncryption".', additional_properties=True, ), client_kwargs=JsonObjectSchema( description='Parameters for the botocore client.', properties=dict( endpoint_url=JsonStringSchema( min_length=1, format='uri', title='Alternative endpoint URL.'), # bucket_name=JsonStringSchema( # min_length=1, # title='Name of the bucket' # ), profile_name=JsonStringSchema( min_length=1, title='Name of the AWS configuration profile', description='Section name with within' ' ~/.aws/config file,' ' which provides AWS configurations' ' and credentials.'), region_name=JsonStringSchema( min_length=1, title='AWS storage region name'), ), additional_properties=True, ), **COMMON_STORAGE_OPTIONS_SCHEMA_PROPERTIES, ), additional_properties=True, )
from abc import ABC from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema from xcube.util.jsonschema import JsonNumberSchema from xcube.util.jsonschema import JsonObject from xcube.util.jsonschema import JsonObjectSchema from xcube.util.jsonschema import JsonStringSchema BooleanSchema = JsonBooleanSchema() NumberSchema = JsonNumberSchema() UrlSchema = JsonStringSchema(format='uri') IdentifierSchema = JsonStringSchema(min_length=1) ChunkSizeSchema = JsonStringSchema(min_length=2) # TODO: use pattern StringSchema = JsonStringSchema() PathSchema = JsonStringSchema(min_length=1) BoundingBoxSchema = JsonArraySchema( items=[NumberSchema, NumberSchema, NumberSchema, NumberSchema]) FileSystemSchema = JsonStringSchema( enum=['memory', 'obs', 'local', 's3', 'file']) class _ConfigObject(JsonObject, ABC): def __init__(self, **kwargs): self._inject_attrs(kwargs) class ServiceConfig(_ConfigObject): @classmethod def get_schema(cls) -> JsonObjectSchema: return JsonObjectSchema(
from xcube.core.chunkstore import LoggingStore from xcube.util.assertions import assert_instance from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonObjectSchema from xcube.util.jsonschema import JsonStringSchema from xcube.util.temp import new_temp_file from ..accessor import FsDataAccessor from ..helpers import is_local_fs from ...datatype import DATASET_TYPE from ...datatype import DataType from ...error import DataStoreError ZARR_OPEN_DATA_PARAMS_SCHEMA = JsonObjectSchema(properties=dict( log_access=JsonBooleanSchema(default=False), cache_size=JsonIntegerSchema(minimum=0, ), group=JsonStringSchema( description='Group path.' ' (a.k.a. path in zarr terminology.).', min_length=1, ), chunks=JsonObjectSchema( description='Optional chunk sizes along each dimension.' ' Chunk size values may be None, "auto"' ' or an integer value.', examples=[{ 'time': None, 'lat': 'auto', 'lon': 90 }, {