Beispiel #1
0
    def _get_data_descriptor_from_metadata(
            self, data_id: str, metadata: dict) -> DatasetDescriptor:
        ds_metadata = metadata.copy()
        dims = self._normalize_dims(ds_metadata.get('dimensions', {}))
        if 'time' not in dims:
            dims['time'] = ds_metadata.get('time_dimension_size')
        else:
            dims['time'] *= ds_metadata.get('time_dimension_size')
        temporal_resolution = _get_temporal_resolution_from_id(data_id)
        dataset_info = self._cci_odp.get_dataset_info(data_id, ds_metadata)
        spatial_resolution = dataset_info['lat_res']
        if spatial_resolution <= 0:
            spatial_resolution = None
        bbox = dataset_info['bbox']
        # only use date parts of times
        temporal_coverage = (
            dataset_info['temporal_coverage_start'].split('T')[0],
            dataset_info['temporal_coverage_end'].split('T')[0])
        var_infos = ds_metadata.get('variable_infos', {})
        var_descriptors = self._get_variable_descriptors(
            dataset_info['var_names'], var_infos)
        coord_descriptors = self._get_variable_descriptors(
            dataset_info['coord_names'], var_infos, normalize_dims=False)
        if 'time' not in coord_descriptors.keys(
        ) and 't' not in coord_descriptors.keys():
            time_attrs = {
                "units": "seconds since 1970-01-01T00:00:00Z",
                "calendar": "proleptic_gregorian",
                "standard_name": "time"
            }
            coord_descriptors['time'] = VariableDescriptor('time',
                                                           dtype='int64',
                                                           dims=('time', ),
                                                           attrs=time_attrs)

        if 'variables' in ds_metadata:
            ds_metadata.pop('variables')
        ds_metadata.pop('dimensions')
        ds_metadata.pop('variable_infos')
        attrs = ds_metadata.get('attributes', {}).get('NC_GLOBAL', {})
        ds_metadata.pop('attributes')
        attrs.update(ds_metadata)
        self._remove_irrelevant_metadata_attributes(attrs)
        descriptor = DatasetDescriptor(data_id,
                                       type_specifier=self._type_specifier,
                                       dims=dims,
                                       coords=coord_descriptors,
                                       data_vars=var_descriptors,
                                       attrs=attrs,
                                       bbox=bbox,
                                       spatial_res=spatial_resolution,
                                       time_range=temporal_coverage,
                                       time_period=temporal_resolution)
        data_schema = self._get_open_data_params_schema(descriptor)
        descriptor.open_params_schema = data_schema
        return descriptor
Beispiel #2
0
    def test_serialisation(self):
        result = CubeInfoResult(
            status='ok',
            message='Success!',
            result=CubeInfo(
                dataset_descriptor=DatasetDescriptor(data_id='bibo.zarr'),
                size_estimation={}))

        result_dict = result.to_dict()
        self.assertIsInstance(result_dict, dict)
        self.assertEqual(
            {
                'status': 'ok',
                'message': 'Success!',
                'result': {
                    'dataset_descriptor': {
                        'data_id': 'bibo.zarr',
                        'data_type': 'dataset'
                    },
                    'size_estimation': {}
                },
            }, result_dict)

        result2 = CubeInfoResult.from_dict(result_dict)
        self.assertIsInstance(result2, CubeInfoResult)
Beispiel #3
0
    def test_describe_data(self):
        dd = self.store.describe_data('cube_1')
        self.assertIsInstance(dd, DatasetDescriptor)
        self.assertEqual(
            DatasetDescriptor(
                data_id='cube_1',
                type_specifier=TYPE_SPECIFIER_CUBE,
            ).to_dict(), dd.to_dict())

        dd = self.store.describe_data('cube_1', type_specifier='dataset[cube]')
        self.assertIsInstance(dd, DatasetDescriptor)
        self.assertEqual(
            DatasetDescriptor(
                data_id='cube_1',
                type_specifier=TYPE_SPECIFIER_CUBE,
            ).to_dict(), dd.to_dict())
Beispiel #4
0
    def test_serialisation(self):
        result = CubeInfoWithCostsResult(
            status='ok',
            message='Success!',
            result=CubeInfoWithCosts(
                dataset_descriptor=DatasetDescriptor(data_id='bibo.zarr'),
                size_estimation={},
                cost_estimation=CostEstimation(required=10,
                                               available=20,
                                               limit=100)))

        result_dict = result.to_dict()
        self.assertIsInstance(result_dict, dict)
        self.assertEqual(
            {
                'status': 'ok',
                'message': 'Success!',
                'result': {
                    'dataset_descriptor': {
                        'data_id': 'bibo.zarr',
                        'data_type': 'dataset'
                    },
                    'size_estimation': {},
                    'cost_estimation': {
                        'available': 20,
                        'limit': 100,
                        'required': 10
                    },
                },
            }, result_dict)

        result2 = CubeInfoWithCostsResult.from_dict(result_dict)
        self.assertIsInstance(result2, GenericCubeGeneratorResult)
        self.assertIsInstance(result2.result, CubeInfoWithCosts)
Beispiel #5
0
 def get_schema(cls) -> JsonObjectSchema:
     return JsonObjectSchema(
         properties=dict(
             dataset_descriptor=DatasetDescriptor.get_schema(),
             size_estimation=JsonObjectSchema(additional_properties=True)),
         required=['dataset_descriptor', 'size_estimation'],
         additional_properties=True,
         factory=cls)
Beispiel #6
0
    def describe_data(self, data_id: str) -> DataDescriptor:
        ds_info = self._dataset_dicts[data_id.split(':')[0]]

        return DatasetDescriptor(
            data_id=data_id,
            data_vars=self._create_variable_descriptors(data_id),
            crs=ds_info['crs'],
            bbox=tuple(ds_info['bbox']),
            spatial_res=ds_info['spatial_res'],
            time_range=tuple(ds_info['time_range']),
            time_period=ds_info['time_period'],
            open_params_schema=self.get_open_data_params_schema(data_id))
Beispiel #7
0
    def _describe_data(self, data_id: str) -> DatasetDescriptor:
        dataset_metadata, collection_metadata = self._get_dataset_and_collection_metadata(
            data_id)
        band_metadatas = dataset_metadata.get('bands', {})

        if self._sentinel_hub is not None:
            # If we are connected to the API, we return band names by API
            band_names = self._sentinel_hub.band_names(data_id)
        else:
            # Otherwise all we know about
            band_names = band_metadatas.keys()

        data_vars = []
        for band_name in band_names:
            band_metadata = band_metadatas.get(band_name,
                                               dict(sample_type='FLOAT32'))
            data_vars.append(
                VariableDescriptor(name=band_name,
                                   dtype=band_metadata.get(
                                       'sample_type', 'FLOAT32'),
                                   dims=('time', 'lat', 'lon'),
                                   attrs=band_metadatas.copy()))

        dataset_attrs = dataset_metadata.copy()

        bbox = None
        time_range = None
        if collection_metadata is not None:
            extent = collection_metadata.get('extent')
            if extent is not None:
                bbox = extent.get("spatial", {}).get('bbox')
                interval = extent.get("temporal", {}).get('interval')
                if isinstance(interval, list) and len(interval) == 2:
                    min_datetime, max_datetime = interval
                    # Get rid of time part
                    time_range = (min_datetime.split('T')[0]
                                  if min_datetime is not None else None,
                                  max_datetime.split('T')[0]
                                  if max_datetime is not None else None)

            if 'title' in collection_metadata:
                dataset_attrs['title'] = collection_metadata['title']
            if 'description' in collection_metadata:
                dataset_attrs['description'] = collection_metadata[
                    'description']

        return DatasetDescriptor(
            data_id=data_id,
            data_vars=data_vars,
            bbox=bbox,
            time_range=time_range,
            time_period=dataset_metadata.get('request_period'),
            attrs=dataset_metadata)
Beispiel #8
0
    def generate(self) -> CubeInfo:
        try:
            cube_config, resolved_crs, resolved_time_range = \
                 self._compute_effective_cube_config()
        except (TypeError, ValueError) as e:
            raise CubeGeneratorError(f'{e}', status_code=400) from e

        x_min, y_min, x_max, y_max = cube_config.bbox
        spatial_res = cube_config.spatial_res

        width = round((x_max - x_min) / spatial_res)
        height = round((y_max - y_min) / spatial_res)
        width = 2 if width < 2 else width
        height = 2 if height < 2 else height

        num_tiles_x = 1
        num_tiles_y = 1
        tile_width = width
        tile_height = height

        tile_size = cube_config.tile_size
        if tile_size is None and cube_config.chunks is not None:
            # TODO: this is just an assumption, with new
            #   Resampling module, use GridMapping
            #   to identify the actual names for the
            #   spatial tile dimensions.
            tile_size_x = cube_config.chunks.get('lon',
                                                 cube_config.chunks.get('x'))
            tile_size_y = cube_config.chunks.get('lat',
                                                 cube_config.chunks.get('y'))
            if tile_size_x and tile_size_y:
                tile_size = tile_size_x, tile_size_y

        if tile_size is not None:
            tile_width, tile_height = tile_size

            # TODO: this must be made common store logic
            if width > 1.5 * tile_width:
                num_tiles_x = _idiv(width, tile_width)
                width = num_tiles_x * tile_width

            # TODO: this must be made common store logic
            if height > 1.5 * tile_height:
                num_tiles_y = _idiv(height, tile_height)
                height = num_tiles_y * tile_height

        variable_names = cube_config.variable_names

        num_times = len(resolved_time_range)
        num_variables = len(variable_names)
        num_requests = num_variables \
                       * num_times \
                       * num_tiles_x * num_tiles_y
        # TODO: get original data types from dataset descriptors
        num_bytes_per_pixel = 4
        num_bytes = num_variables \
                    * num_times \
                    * (height * width * num_bytes_per_pixel)

        x_name, y_name = ('lon', 'lat') \
            if resolved_crs.is_geographic else ('x', 'y')

        data_id = self._request.output_config.data_id or 'unnamed'
        # TODO: get original variable descriptors from input dataset descriptors
        data_vars = {
            name: VariableDescriptor(name,
                                     dtype='float32',
                                     dims=('time', y_name, x_name))
            for name in variable_names
        }
        dims = {'time': num_times, y_name: height, x_name: width}
        dataset_descriptor = DatasetDescriptor(
            data_id,
            crs=cube_config.crs,
            bbox=cube_config.bbox,
            spatial_res=cube_config.spatial_res,
            time_range=cube_config.time_range,
            time_period=cube_config.time_period,
            dims=dims,
            data_vars=data_vars)
        size_estimation = dict(image_size=[width, height],
                               tile_size=[tile_width, tile_height],
                               num_variables=num_variables,
                               num_tiles=[num_tiles_x, num_tiles_y],
                               num_requests=num_requests,
                               num_bytes=num_bytes)

        return CubeInfo(dataset_descriptor=dataset_descriptor,
                        size_estimation=size_estimation)
    def describe_data(self, data_id: str) -> DatasetDescriptor:
        _, variable_spec, aggregation = data_id.split(':')

        sm_attrs = dict(saturation=('percent',
                                    'Percent of Saturation Soil Moisture'),
                        volumetric=('m3 m-3',
                                    'Volumetric Soil Moisture'))[variable_spec]

        descriptors_common = [
            VariableDescriptor(name='sensor',
                               dtype='int16',
                               dims=('time', 'lat', 'lon'),
                               attrs={'long_name': 'Sensor'}),
            VariableDescriptor(
                name='freqbandID',
                dtype='int16',
                dims=('time', 'lat', 'lon'),
                attrs={'long_name': 'Frequency Band Identification'}),
            VariableDescriptor(name='sm',
                               dtype='float32',
                               dims=('time', 'lat', 'lon'),
                               attrs={
                                   'units': sm_attrs[0],
                                   'long_name': sm_attrs[1]
                               }),
        ]

        descriptors_daily = [
            VariableDescriptor(
                # The product user guide claims that sm_uncertainty is
                # available for all three aggregation periods, but in practice
                # it only seems to be present in the daily data.
                name='sm_uncertainty',
                dtype='float32',
                dims=('time', 'lat', 'lon'),
                attrs={
                    'units': sm_attrs[0],
                    'long_name': sm_attrs[1] + ' Uncertainty'
                }),
            VariableDescriptor(name='t0',
                               dtype='float64',
                               dims=('time', 'lat', 'lon'),
                               attrs={
                                   'units':
                                   'days since 1970-01-01 00:00:00 UTC',
                                   'long_name': 'Observation Timestamp'
                               }),
            VariableDescriptor(name='dnflag',
                               dtype='int8',
                               dims=('time', 'lat', 'lon'),
                               attrs={'long_name': 'Day / Night Flag'}),
            VariableDescriptor(name='flag',
                               dtype='int8',
                               dims=('time', 'lat', 'lon'),
                               attrs={'long_name': 'Flag'}),
            VariableDescriptor(
                name='mode',
                dtype='int8',
                dims=('time', 'lat', 'lon'),
                # Note: the product user guide gives the long name as
                # 'Satellite Mode' with one space, but the long name in the
                # actual NetCDF files has two spaces.
                attrs={'long_name': 'Satellite  Mode'}),
        ]
        descriptors_aggregated = [
            VariableDescriptor(
                name='nobs',
                dtype='int16',
                dims=('time', 'lat', 'lon'),
                attrs={'long_name': 'Number of valid observation'}),
        ]

        descriptors = descriptors_common + \
            (descriptors_daily if aggregation == 'daily'
             else descriptors_aggregated)

        return DatasetDescriptor(
            data_id=data_id,
            data_vars={desc.name: desc
                       for desc in descriptors},
            crs='WGS84',
            bbox=(-180, -90, 180, 90),
            spatial_res=0.25,
            time_range=('1978-11-01', None),
            time_period=self._aggregation_map[aggregation],
            open_params_schema=self.get_open_data_params_schema(data_id))