Beispiel #1
0
    def test_convert(self):
        expected = (datetime(2001, 1, 1), datetime(2002, 1, 1, 23, 59, 59))
        actual = TimeRangeLike.convert('2001-01-01, 2002-01-01')
        self.assertTrue(actual == expected)

        with self.assertRaises(ValueError) as err:
            TimeRangeLike.convert('2002-01-01, 2001-01-01')
        self.assertTrue('cannot convert' in str(err.exception))
        self.assertEqual(None, TimeRangeLike.convert(None))
Beispiel #2
0
    def test_make_local(self):
        data_source = self._local_data_store.query('local_w_temporal')[0]

        with unittest.mock.patch.object(EsaCciOdpDataStore,
                                        'query',
                                        return_value=[]):
            new_ds = data_source.make_local(
                'from_local_to_local', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)))
            self.assertEqual(new_ds.name, 'local.from_local_to_local')
            self.assertEqual(
                new_ds.temporal_coverage(),
                TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                       datetime.datetime(1978, 11, 15, 23,
                                                         59))))

            data_source.update_local(new_ds.name,
                                     (datetime.datetime(1978, 11, 15, 00, 00),
                                      datetime.datetime(1978, 11, 16, 23, 59)))
            self.assertEqual(
                new_ds.temporal_coverage(),
                TimeRangeLike.convert((datetime.datetime(1978, 11, 15, 0, 0),
                                       datetime.datetime(1978, 11, 16, 23,
                                                         59))))

            with self.assertRaises(ValueError) as context:
                data_source.update_local(
                    "wrong_ds_name", (datetime.datetime(1978, 11, 15, 00, 00),
                                      datetime.datetime(1978, 11, 16, 23, 59)))
            self.assertTrue("Couldn't find local DataSource",
                            context.exception.args[0])

            new_ds_w_one_variable = data_source.make_local(
                'from_local_to_local_var', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm'])
            self.assertEqual(new_ds_w_one_variable.name,
                             'local.from_local_to_local_var')
            data_set = new_ds_w_one_variable.open_dataset()
            self.assertSetEqual(set(data_set.variables),
                                {'sm', 'lat', 'lon', 'time'})

            new_ds_w_region = data_source.make_local(
                'from_local_to_local_region', None,
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59)), "10,10,20,20",
                ['sm'])  # type: LocalDataSource
            self.assertEqual(new_ds_w_region.name,
                             'local.from_local_to_local_region')
            self.assertEqual(new_ds_w_region.spatial_coverage(),
                             PolygonLike.convert("10,10,20,20"))
            data_set = new_ds_w_region.open_dataset()
            self.assertSetEqual(set(data_set.variables),
                                {'sm', 'lat', 'lon', 'time'})
Beispiel #3
0
    def test_load_old_datasource_from_json_dict(self):
        test_data = {
            'name':
            'local.test_name',
            'meta_data': {
                'temporal_coverage': "2001-01-01 00:00:00,2001-01-31 23:59:59",
                'spatial_coverage': "-180,-90,180,90",
                'variables': ['var_test_1', 'var_test_2'],
            },
            "meta_info": {},
            'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                      ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
        }
        data_source = LocalDataSource.from_json_dict(
            json_dict=test_data, data_store=self.data_store)
        self.assertIsNotNone(data_source)
        self.assertEqual(
            data_source.temporal_coverage(),
            TimeRangeLike.convert(
                test_data.get('meta_data').get('temporal_coverage')))
        self.assertEqual(
            data_source.spatial_coverage(),
            PolygonLike.convert(
                test_data.get('meta_data').get('spatial_coverage')))
        self.assertListEqual(
            [var.get('name') for var in data_source.variables_info],
            test_data.get('meta_data').get('variables'))

        test_data = {
            'name':
            'local.test_name',
            'meta_data': {
                'temporal_covrage': "2001-01-01 00:00:00,2001-01-31 23:59:59",
                'spatial_coverage': "-180,-90,180,90",
                'variables': ['var_test_1', 'var_test_2'],
            },
            "meta_info": {},
            'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                      ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
        }
        data_source = LocalDataSource.from_json_dict(
            json_dict=test_data, data_store=self.data_store)
        self.assertIsNotNone(data_source)
        self.assertEqual(
            data_source.temporal_coverage(),
            TimeRangeLike.convert(
                test_data.get('meta_data').get('temporal_covrage')))
        self.assertEqual(
            data_source.spatial_coverage(),
            PolygonLike.convert(
                test_data.get('meta_data').get('spatial_coverage')))
        self.assertListEqual(
            [var.get('name') for var in data_source.variables_info],
            test_data.get('meta_data').get('variables'))
Beispiel #4
0
    def __init__(self,
                 ds_id: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 meta_info: dict = None,
                 status: DataSourceStatus = None):
        self._id = ds_id
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(
            temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            tuple([
                                files_range[0][0],
                                files_range[files_number - 1][1]
                            ]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            (files_range[0], files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(
            spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(variables) if variables else []

        self._meta_info = meta_info if meta_info else OrderedDict()

        if self._variables and not self._meta_info.get('variables', None):
            self._meta_info['variables'] = [{
                'name': var_name,
                'units': '',
                'long_name': '',
                'standard_name': ''
            } for var_name in self._variables]

        self._status = status if status else DataSourceStatus.READY
Beispiel #5
0
    def test_make_local(self):
        data_source = self._local_data_store.query('local_w_temporal')[0]

        with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):
            new_ds_title = 'from_local_to_local'
            new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                      datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds = data_source.make_local(new_ds_title, time_range=new_ds_time_range)
            self.assertIsNotNone(new_ds)

            self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
            self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                (datetime.datetime(1978, 11, 14, 0, 0),
                 datetime.datetime(1978, 11, 15, 23, 59))))

            new_ds_2_title = 'from_local_to_local_var'
            new_ds_2_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds_2_vars = VarNamesLike.convert(['sm'])

            new_ds_w_one_variable = data_source.make_local(new_ds_2_title,
                                                           time_range=new_ds_2_time_range,
                                                           var_names=new_ds_2_vars)
            self.assertIsNotNone(new_ds_w_one_variable)
            self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_2_title)
            data_set = new_ds_w_one_variable.open_dataset()
            self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

            new_ds_3_title = 'from_local_to_local_range'
            new_ds_3_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)))
            new_ds_3_vars = VarNamesLike.convert(['sm'])
            new_ds_3_region = PolygonLike.convert("10,10,20,20")

            new_ds_w_region = data_source.make_local(new_ds_3_title,
                                                     time_range=new_ds_3_time_range,
                                                     var_names=new_ds_3_vars,
                                                     region=new_ds_3_region)  # type: LocalDataSource
            self.assertIsNotNone(new_ds_w_region)
            self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_3_title)
            self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))
            data_set = new_ds_w_region.open_dataset()
            self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})

            no_data = data_source.make_local('no_data',
                                             time_range=(datetime.datetime(2020, 11, 14, 0, 0),
                                                         datetime.datetime(2020, 11, 15, 23, 59)))
            self.assertIsNone(no_data)
Beispiel #6
0
    def update_temporal_coverage(self, time_range: TimeRangeLike.TYPE):
        """

        :param time_range: Time range to be added to data source temporal coverage
        :return:
        """
        self._extend_temporal_coverage(TimeRangeLike.convert(time_range))
Beispiel #7
0
 def temporal_coverage(self,
                       monitor: Monitor = Monitor.NONE
                       ) -> Optional[TimeRange]:
     if not self._temporal_coverage:
         temp_coverage_start = self._catalogue_data.get(
             'temporal_coverage_start', None)
         temp_coverage_end = self._catalogue_data.get(
             'temporal_coverage_end', None)
         if temp_coverage_start and temp_coverage_end:
             self._temporal_coverage = TimeRangeLike.convert("{},{}".format(
                 temp_coverage_start, temp_coverage_end))
             # ODP datasets that are split into per-year datasets
             # have the year they are covering in the 'realization' attribute
             # the CSW does not have separate temporal coverages for them
             realization = self._json_dict.get('realization', None)
             if realization and len(realization):
                 matcher = _YEAR_REALIZATION.match(realization[0])
                 if matcher:
                     year = matcher.group(0)
                     rel_start = max(self._temporal_coverage[0],
                                     datetime(int(year), 1, 1))
                     rel_end = min(
                         self._temporal_coverage[1],
                         datetime(int(year) + 1, 1, 1) -
                         timedelta(seconds=1))
                     self._temporal_coverage = (rel_start, rel_end)
         else:
             self.update_file_list(monitor)
     if self._temporal_coverage:
         return self._temporal_coverage
     return None
Beispiel #8
0
    def update_temporal_coverage(self, time_range: TimeRangeLike.TYPE):
        """

        :param time_range: Time range to be added to data source temporal coverage
        :return:
        """
        self._extend_temporal_coverage(TimeRangeLike.convert(time_range))
Beispiel #9
0
def anomaly_internal(ds: xr.Dataset,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly using as reference data the mean of an optional region
    and time slice from the given dataset. If no time slice/spatial region is
    given, the operation will calculate anomaly using the mean of the whole
    dataset as the reference.

    This is done for each data array in the dataset.
    :param ds: The dataset to calculate anomalies from
    :param time_range: Time range to use for reference data
    :param region: Spatial region to use for reference data
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    ref = ds.copy()
    if time_range:
        time_range = TimeRangeLike.convert(time_range)
        ref = subset_temporal(ref, time_range)
    if region:
        region = PolygonLike.convert(region)
        ref = subset_spatial(ref, region)
    with monitor.observing("Calculating anomaly"):
        ref = ref.mean(keep_attrs=True, skipna=True)
        diff = ds - ref
    return diff
Beispiel #10
0
 def test_format(self):
     expected = '2001-01-01T00:00:00, 2002-01-01T00:00:00'
     actual = TimeRangeLike.format((datetime(2001, 1,
                                             1), datetime(2002, 1, 1)))
     self.assertTrue(expected, actual)
     converted = TimeRangeLike.convert(actual)
     self.assertTrue(converted, expected)
Beispiel #11
0
def anomaly_internal(ds: xr.Dataset,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Calculate anomaly using as reference data the mean of an optional region
    and time slice from the given dataset. If no time slice/spatial region is
    given, the operation will calculate anomaly using the mean of the whole
    dataset as the reference.

    This is done for each data array in the dataset.
    :param ds: The dataset to calculate anomalies from
    :param time_range: Time range to use for reference data
    :param region: Spatial region to use for reference data
    :param monitor: a progress monitor.
    :return: The anomaly dataset
    """
    ref = ds.copy()
    if time_range:
        time_range = TimeRangeLike.convert(time_range)
        ref = subset_temporal(ref, time_range)
    if region:
        region = PolygonLike.convert(region)
        ref = subset_spatial(ref, region)
    with monitor.observing("Calculating anomaly"):
        ref = ref.mean(keep_attrs=True, skipna=True)
        diff = ds - ref
    return diff
Beispiel #12
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'CCI Open Data Portal data source "{}"\ndoes not seem to have any datasets'.format(self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(TimeRangeLike.format(time_range))
            raise DataAccessError(msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                ds = normalize_impl(ds)
                ds = subset_spatial_impl(ds, region)
            if var_names:
                ds = ds.drop([var_name for var_name in ds.data_vars.keys() if var_name not in var_names])
            return ds

        except OSError as e:
            if time_range:
                raise DataAccessError("Cannot open remote dataset for time range {}:\n"
                                      "{}"
                                      .format(TimeRangeLike.format(time_range), e), source=self) from e
            else:
                raise DataAccessError("Cannot open remote dataset:\n"
                                      "{}"
                                      .format(TimeRangeLike.format(time_range), e), source=self) from e
Beispiel #13
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(
                         time_series[i], datetime
                 ) and time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 ds = normalize_impl(ds)
                 ds = subset_spatial_impl(ds, region)
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.data_vars.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             if time_range:
                 raise DataAccessError(
                     "Cannot open local dataset for time range {}:\n"
                     "{}".format(TimeRangeLike.format(time_range), e),
                     source=self) from e
             else:
                 raise DataAccessError("Cannot open local dataset:\n"
                                       "{}".format(e),
                                       source=self) from e
     else:
         if time_range:
             raise DataAccessError(
                 "No local datasets available for\nspecified time range {}".
                 format(TimeRangeLike.format(time_range)),
                 source=self)
         else:
             raise DataAccessError("No local datasets available",
                                   source=self)
Beispiel #14
0
    def test_convert(self):
        self.assertEqual(TimeRangeLike.convert(None), None)
        self.assertEqual(TimeRangeLike.convert((None, None)), None)
        self.assertEqual(TimeRangeLike.convert([None, None]), None)
        self.assertEqual(TimeRangeLike.convert(''), None)
        self.assertEqual(TimeRangeLike.convert((datetime(2001, 1, 1), datetime(2002, 2, 1))),
                         (datetime(2001, 1, 1), datetime(2002, 2, 1)))
        self.assertEqual(TimeRangeLike.convert([datetime(2001, 1, 1), datetime(2002, 2, 1)]),
                         (datetime(2001, 1, 1), datetime(2002, 2, 1)))
        self.assertEqual(TimeRangeLike.convert('2001-01-01, 2002-01-01'),
                         (datetime(2001, 1, 1), datetime(2002, 1, 1, 23, 59, 59)))
        self.assertEqual(TimeRangeLike.convert('2001-01-01, 2002-01-01'),
                         (datetime(2001, 1, 1), datetime(2002, 1, 1, 23, 59, 59)))

        with self.assertRaises(ValidationError) as err:
            TimeRangeLike.convert('2002-01-01, 2001-01-01')
        self.assertTrue('cannot be converted into a' in str(err.exception))
Beispiel #15
0
 def temporal_coverage(self, monitor: Monitor = Monitor.NONE) -> Optional[TimeRange]:
     start_time = self._cube.config.start_time
     end_time = self._cube.config.end_time
     if start_time and end_time:
         try:
             return TimeRangeLike.convert("{},{}".format(start_time, end_time))
         except ValueError:
             pass
     return None
Beispiel #16
0
    def __init__(self,
                 ds_id: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 meta_info: dict = None,
                 status: DataSourceStatus = None):
        self._id = ds_id
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(tuple([files_range[0][0],
                                                                                 files_range[files_number - 1][1]]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert((files_range[0],
                                                                           files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(variables) if variables else []

        self._meta_info = meta_info if meta_info else OrderedDict()

        if self._variables and not self._meta_info.get('variables', None):
            self._meta_info['variables'] = [
                {'name': var_name,
                 'units': '',
                 'long_name': '',
                 'standard_name': ''
                 } for var_name in self._variables]

        self._status = status if status else DataSourceStatus.READY
Beispiel #17
0
 def test_empty_error(self):
     data_source = SimpleDataSource("foo")
     error = data_source._empty_error()
     self.assertIsInstance(error, DataAccessError)
     self.assertEqual('Data source "foo" does not seem to have any datasets',
                      f"{error}")
     error = data_source._empty_error(TimeRangeLike.convert("2010-01-01,2010-01-06"))
     self.assertIsInstance(error, DataAccessError)
     self.assertEqual('Data source "foo" does not seem to have any datasets'
                      ' in given time range 2010-01-01, 2010-01-06T23:59:59',
                      f"{error}")
Beispiel #18
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None))
     unique_paths = list(set(paths))
     existing_paths = [p for p in unique_paths if os.path.exists(p)]
     if len(existing_paths) == 0:
         raise ValueError('No local file available. Consider syncing the dataset.')
     return open_xarray_dataset(existing_paths)
Beispiel #19
0
    def __init__(self,
                 name: str,
                 files: Union[Sequence[str], OrderedDict],
                 data_store: 'LocalDataStore',
                 temporal_coverage: TimeRangeLike.TYPE = None,
                 spatial_coverage: PolygonLike.TYPE = None,
                 variables: VarNamesLike.TYPE = None,
                 reference_type: str = None,
                 reference_name: str = None):
        self._name = name
        if isinstance(files, Sequence):
            self._files = OrderedDict.fromkeys(files)
        else:
            self._files = files
        self._data_store = data_store

        initial_temporal_coverage = TimeRangeLike.convert(
            temporal_coverage) if temporal_coverage else None
        if not initial_temporal_coverage:
            files_number = len(self._files.items())
            if files_number > 0:
                files_range = list(self._files.values())
                if files_range:
                    if isinstance(files_range[0], Tuple):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            tuple([
                                files_range[0][0],
                                files_range[files_number - 1][1]
                            ]))
                    elif isinstance(files_range[0], datetime):
                        initial_temporal_coverage = TimeRangeLike.convert(
                            (files_range[0], files_range[files_number - 1]))

        self._temporal_coverage = initial_temporal_coverage
        self._spatial_coverage = PolygonLike.convert(
            spatial_coverage) if spatial_coverage else None
        self._variables = VarNamesLike.convert(
            variables) if variables else None

        self._reference_type = reference_type if reference_type else None
        self._reference_name = reference_name
Beispiel #20
0
 def test_empty_error(self):
     data_source = SimpleDataSource("foo")
     error = data_source._empty_error()
     self.assertIsInstance(error, DataAccessError)
     self.assertEqual(
         'Data source "foo" does not seem to have any datasets', f"{error}")
     error = data_source._empty_error(
         TimeRangeLike.convert("2010-01-01,2010-01-06"))
     self.assertIsInstance(error, DataAccessError)
     self.assertEqual(
         'Data source "foo" does not seem to have any datasets'
         ' in given time range 2010-01-01, 2010-01-06T23:59:59', f"{error}")
Beispiel #21
0
def subset_temporal(ds: DatasetLike.TYPE,
                    time_range: TimeRangeLike.TYPE) -> xr.Dataset:
    """
    Do a temporal subset of the dataset.

    :param ds: Dataset or dataframe to subset
    :param time_range: Time range to select
    :return: Subset dataset
    """
    ds = DatasetLike.convert(ds)
    time_range = TimeRangeLike.convert(time_range)
    return adjust_temporal_attrs(subset_temporal_impl(ds, time_range))
Beispiel #22
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None,
                  monitor: Monitor = Monitor.NONE) -> Any:
     paths = self.resolve_paths(TimeRangeLike.convert(time_range) if time_range else (None, None))
     unique_paths = list(set(paths))
     existing_paths = [p for p in unique_paths if os.path.exists(p)]
     if len(existing_paths) == 0:
         raise ValueError('No local file available. Consider syncing the dataset.')
     return open_xarray_dataset(existing_paths, region=region, var_names=var_names, monitor=monitor)
Beispiel #23
0
def subset_temporal(ds: DatasetLike.TYPE,
                    time_range: TimeRangeLike.TYPE) -> xr.Dataset:
    """
    Do a temporal subset of the dataset.

    :param ds: Dataset or dataframe to subset
    :param time_range: Time range to select
    :return: Subset dataset
    """
    ds = DatasetLike.convert(ds)
    time_range = TimeRangeLike.convert(time_range)
    return adjust_temporal_attrs(subset_temporal_impl(ds, time_range))
Beispiel #24
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        _uuid = LocalDataStore.generate_uuid(ref_id=self.id, time_range=time_range, region=region, var_names=var_names)

        if not local_name or len(local_name) == 0:
            local_name = "local.{}.{}".format(self.id, _uuid)
            existing_ds_list = local_store.query(ds_id=local_name)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % local_name)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == _uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError('Datastore {} already contains dataset {}'.format(local_store.id, local_name))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = _uuid

        local_ds = local_store.create_data_source(local_name, region, local_name,
                                                  time_range=time_range, var_names=var_names,
                                                  meta_info=self.meta_info.copy())
        if local_ds:
            if not local_ds.is_complete:
                self._make_local(local_ds, time_range, region, var_names, monitor=monitor)

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        return None
Beispiel #25
0
    def _reduce_temporal_coverage(self, time_interval: TimeRangeLike.TYPE):
        """

        :param time_interval: Time range to be removed from data source temporal coverage
        :return:
        """
        time_range = TimeRangeLike.convert(time_interval)
        if not time_range or not self._temporal_coverage:
            return
        if time_range[0] > self._temporal_coverage[0] and time_range[1] == self._temporal_coverage[1]:
            self._temporal_coverage = (self._temporal_coverage[0], time_range[0])
        if time_range[1] < self._temporal_coverage[1] and time_range[0] == self._temporal_coverage[0]:
            self._temporal_coverage = (time_range[1], self._temporal_coverage[1])
Beispiel #26
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None,
                     monitor: Monitor = Monitor.NONE) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        paths = []
        if time_range:
            time_series = list(self._files.values())
            file_paths = list(self._files.keys())
            for i in range(len(time_series)):
                if time_series[i]:
                    if isinstance(time_series[i], Tuple) and \
                            time_series[i][0] >= time_range[0] and \
                            time_series[i][1] <= time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
                    elif isinstance(time_series[i], datetime) and time_range[0] <= time_series[i] < time_range[1]:
                        paths.extend(self._resolve_file_path(file_paths[i]))
        else:
            for file in self._files.items():
                paths.extend(self._resolve_file_path(file[0]))

        if not paths:
            raise self._empty_error(time_range)

        paths = sorted(set(paths))
        try:
            excluded_variables = self._meta_info.get('exclude_variables')
            if excluded_variables:
                drop_variables = [variable.get('name') for variable in excluded_variables]
            else:
                drop_variables = None
            # TODO: combine var_names and drop_variables
            return open_xarray_dataset(paths,
                                       region=region,
                                       var_names=var_names,
                                       drop_variables=drop_variables,
                                       monitor=monitor)
        except HTTPError as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e) from e
        except (URLError, socket.timeout) as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e, error_cls=NetworkError) from e
        except OSError as e:
            raise self._cannot_access_error(time_range, region, var_names,
                                            verb="open", cause=e) from e
Beispiel #27
0
 def test_load_datasource_from_json_dict(self):
     test_data = {
         'name':
         'local.test_name2',
         "meta_info": {
             "temporal_coverage_start":
             "2001-01-01T00:00:00",
             "temporal_coverage_end":
             "2001-01-31T23:59:59",
             "bbox_maxx":
             "180.0",
             "bbox_maxy":
             "90.0",
             "bbox_minx":
             "-180.0",
             "bbox_miny":
             "-90.0",
             "variables": [{
                 "name": "var_1",
                 "units": "kelvin",
                 "long_name": "var_1 long name..",
                 "standard_name": "std_var_1"
             }, {
                 "name": "var_2",
                 "units": "celsius",
                 "long_name": "var_2 long name..",
                 "standard_name": "std_var_2"
             }]
         },
         'files': [['file_1', '2002-02-01 00:00:00', '2002-02-01 23:59:59'],
                   ['file_2', '2002-03-01 00:00:00', '2002-03-01 23:59:59']]
     }
     data_source = LocalDataSource.from_json_dict(
         json_dict=test_data, data_store=self.data_store)
     self.assertIsNotNone(data_source)
     self.assertEqual(
         data_source.temporal_coverage(),
         TimeRangeLike.convert("{},{}".format(
             test_data.get('meta_info').get('temporal_coverage_start'),
             test_data.get('meta_info').get('temporal_coverage_end'))))
     self.assertEqual(
         data_source.spatial_coverage(),
         PolygonLike.convert(",".join([
             test_data.get('meta_info').get('bbox_minx'),
             test_data.get('meta_info').get('bbox_miny'),
             test_data.get('meta_info').get('bbox_maxx'),
             test_data.get('meta_info').get('bbox_maxy'),
         ])))
     self.assertListEqual(data_source.variables_info,
                          test_data.get('meta_info').get('variables'))
Beispiel #28
0
    def test_convert(self):
        self.assertEqual(TimeRangeLike.convert(None), None)
        self.assertEqual(TimeRangeLike.convert((None, None)), None)
        self.assertEqual(TimeRangeLike.convert([None, None]), None)
        self.assertEqual(TimeRangeLike.convert(''), None)
        self.assertEqual(
            TimeRangeLike.convert((datetime(2001, 1, 1), datetime(2002, 2,
                                                                  1))),
            (datetime(2001, 1, 1), datetime(2002, 2, 1)))
        self.assertEqual(
            TimeRangeLike.convert([datetime(2001, 1, 1),
                                   datetime(2002, 2, 1)]),
            (datetime(2001, 1, 1), datetime(2002, 2, 1)))
        self.assertEqual(
            TimeRangeLike.convert('2001-01-01, 2002-01-01'),
            (datetime(2001, 1, 1), datetime(2002, 1, 1, 23, 59, 59)))
        self.assertEqual(
            TimeRangeLike.convert('2001-01-01, 2002-01-01'),
            (datetime(2001, 1, 1), datetime(2002, 1, 1, 23, 59, 59)))

        with self.assertRaises(ValidationError) as err:
            TimeRangeLike.convert('2002-01-01, 2001-01-01')
        self.assertTrue('cannot be converted into a' in str(err.exception))
Beispiel #29
0
 def add_dataset(self, file, time_coverage: TimeRangeLike.TYPE = None, update: bool = False,
                 extract_meta_info: bool = False):
     if update or self._files.keys().isdisjoint([file]):
         self._files[file] = time_coverage
         if time_coverage:
             self._extend_temporal_coverage(TimeRangeLike.convert(time_coverage))
     self._files = OrderedDict(sorted(self._files.items(),
                                      key=lambda f: f[1] if isinstance(f, Tuple) and f[1] else datetime.max))
     if extract_meta_info:
         try:
             ds = xr.open_dataset(file)
             self._meta_info.update(ds.attrs)
         except OSError:
             pass
     self.save()
Beispiel #30
0
    def _reduce_temporal_coverage(self, time_interval: TimeRangeLike.TYPE):
        """

        :param time_interval: Time range to be removed from data source temporal coverage
        :return:
        """
        time_range = TimeRangeLike.convert(time_interval)
        if not time_range or not self._temporal_coverage:
            return
        if time_range[0] > self._temporal_coverage[0] and time_range[
                1] == self._temporal_coverage[1]:
            self._temporal_coverage = (self._temporal_coverage[0],
                                       time_range[0])
        if time_range[1] < self._temporal_coverage[1] and time_range[
                0] == self._temporal_coverage[0]:
            self._temporal_coverage = (time_range[1],
                                       self._temporal_coverage[1])
Beispiel #31
0
 def open_dataset(self,
                  time_range: TimeRangeLike.TYPE = None,
                  region: PolygonLike.TYPE = None,
                  var_names: VarNamesLike.TYPE = None,
                  protocol: str = None) -> Any:
     time_range = TimeRangeLike.convert(time_range) if time_range else None
     if region:
         region = PolygonLike.convert(region)
     if var_names:
         var_names = VarNamesLike.convert(var_names)
     paths = []
     if time_range:
         time_series = list(self._files.values())
         file_paths = list(self._files.keys())
         for i in range(len(time_series)):
             if time_series[i]:
                 if isinstance(time_series[i], Tuple) and \
                         time_series[i][0] >= time_range[0] and \
                         time_series[i][1] <= time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
                 elif isinstance(time_series[i], datetime) and \
                         time_range[0] <= time_series[i] < time_range[1]:
                     paths.extend(self._resolve_file_path(file_paths[i]))
     else:
         for file in self._files.items():
             paths.extend(self._resolve_file_path(file[0]))
     if paths:
         paths = sorted(set(paths))
         try:
             ds = open_xarray_dataset(paths)
             if region:
                 [lat_min, lon_min, lat_max, lon_max] = region.bounds
                 ds = ds.sel(drop=False,
                             lat=slice(lat_min, lat_max),
                             lon=slice(lon_min, lon_max))
             if var_names:
                 ds = ds.drop([
                     var_name for var_name in ds.variables.keys()
                     if var_name not in var_names
                 ])
             return ds
         except OSError as e:
             raise IOError("Files: {} caused:\nOSError({}): {}".format(
                 paths, e.errno, e.strerror))
     else:
         return None
Beispiel #32
0
    def _extend_temporal_coverage(self, time_interval: TimeRangeLike.TYPE):
        """

        :param time_interval: Time range to be added to data source temporal coverage
        :return:
        """

        time_range = TimeRangeLike.convert(time_interval)
        if not time_range or None in time_range:
            return

        if self._temporal_coverage and not (None in self._temporal_coverage):
            if time_range[0] >= self._temporal_coverage[1]:
                self._temporal_coverage = tuple([self._temporal_coverage[0], time_range[1]])
            elif time_range[1] <= self._temporal_coverage[0]:
                self._temporal_coverage = tuple([time_range[0], self._temporal_coverage[1]])
        else:
            self._temporal_coverage = time_range
        self.save()
Beispiel #33
0
    def update_local(self,
                     local_id: str,
                     time_range: TimeRangeLike.TYPE,
                     monitor: Monitor = Monitor.NONE) -> bool:

        data_sources = query_data_sources(
            None, local_id)  # type: Sequence['DataSource']
        data_source = next(
            (ds for ds in data_sources
             if isinstance(ds, LocalDataSource) and ds.name == local_id),
            None)  # type: LocalDataSource
        if not data_source:
            raise ValueError("Couldn't find local DataSource",
                             (local_id, data_sources))

        time_range = TimeRangeLike.convert(time_range) if time_range else None

        to_remove = []
        to_add = []
        if time_range and time_range[1] > time_range[0]:
            if time_range[0] != data_source.temporal_coverage()[0]:
                if time_range[0] > data_source.temporal_coverage()[0]:
                    to_remove.append(
                        (data_source.temporal_coverage()[0], time_range[0]))
                else:
                    to_add.append(
                        (time_range[0], data_source.temporal_coverage()[0]))

            if time_range[1] != data_source.temporal_coverage()[1]:
                if time_range[1] < data_source.temporal_coverage()[1]:
                    to_remove.append(
                        (time_range[1], data_source.temporal_coverage()[1]))
                else:
                    to_add.append(
                        (data_source.temporal_coverage()[1], time_range[1]))
        if to_remove:
            for time_range_to_remove in to_remove:
                data_source.reduce_temporal_coverage(time_range_to_remove)
        if to_add:
            for time_range_to_add in to_add:
                self._make_local(data_source, time_range_to_add, None,
                                 data_source.variables_info, monitor)
        return bool(to_remove or to_add)
Beispiel #34
0
def subset_temporal(ds: xr.Dataset,
                    time_range: TimeRangeLike.TYPE) -> xr.Dataset:
    """
    Do a temporal subset of the dataset.

    :param ds: Dataset to subset
    :param time_range: Time range to select
    :return: Subset dataset
    """
    time_range = TimeRangeLike.convert(time_range)
    # If it can be selected, go ahead
    try:
        time_slice = slice(time_range[0], time_range[1])
        indexers = {'time': time_slice}
        return ds.sel(**indexers)
    except TypeError:
        raise ValueError('Time subset operation expects a dataset with the'
                         ' time coordinate of type datetime64[ns], but received'
                         ' {}. Running the harmonization operation on this'
                         ' dataset may help'.format(ds.time.dtype))
Beispiel #35
0
 def add_dataset(self,
                 file,
                 time_coverage: TimeRangeLike.TYPE = None,
                 update: bool = False,
                 extract_meta_info: bool = False):
     if update or self._files.keys().isdisjoint([file]):
         self._files[file] = time_coverage
         if time_coverage:
             self._extend_temporal_coverage(
                 TimeRangeLike.convert(time_coverage))
     self._files = OrderedDict(
         sorted(self._files.items(),
                key=lambda f: f[1]
                if isinstance(f, Tuple) and f[1] else datetime.max))
     if extract_meta_info:
         try:
             ds = xr.open_dataset(file)
             self._meta_info.update(ds.attrs)
         except OSError:
             pass
     self.save()
Beispiel #36
0
    def _extend_temporal_coverage(self, time_interval: TimeRangeLike.TYPE):
        """

        :param time_interval: Time range to be added to data source temporal coverage
        :return:
        """

        time_range = TimeRangeLike.convert(time_interval)
        if not time_range or None in time_range:
            return

        if self._temporal_coverage and not (None in self._temporal_coverage):
            if time_range[0] >= self._temporal_coverage[1]:
                self._temporal_coverage = tuple(
                    [self._temporal_coverage[0], time_range[1]])
            elif time_range[1] <= self._temporal_coverage[0]:
                self._temporal_coverage = tuple(
                    [time_range[0], self._temporal_coverage[1]])
        else:
            self._temporal_coverage = time_range
        self.save()
Beispiel #37
0
    def open_dataset(self,
                     time_range: TimeRangeLike.TYPE = None,
                     region: PolygonLike.TYPE = None,
                     var_names: VarNamesLike.TYPE = None,
                     protocol: str = None) -> Any:
        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'Data source \'{}\' does not seem to have any data files'.format(
                self.name)
            if time_range is not None:
                msg += ' in given time range {}'.format(
                    TimeRangeLike.format(time_range))
            raise IOError(msg)

        files = self._get_urls_list(selected_file_list, _ODP_PROTOCOL_OPENDAP)
        try:
            ds = open_xarray_dataset(files)
            if region:
                [lat_min, lon_min, lat_max, lon_max] = region.bounds
                ds = ds.sel(drop=False,
                            lat=slice(lat_min, lat_max),
                            lon=slice(lon_min, lon_max))
            if var_names:
                ds = ds.drop([
                    var_name for var_name in ds.variables.keys()
                    if var_name not in var_names
                ])
            return ds

        except OSError as e:
            raise IOError("Files: {} caused:\nOSError({}): {}".format(
                files, e.errno, e.strerror))
Beispiel #38
0
    def test_make_local_and_update(self):

        soilmoisture_data_sources = self.data_store.query(
            query_expr='esacci.SOILMOISTURE.day.L3S.SSMV.multi-sensor.multi-platform.COMBINED.02-1.r1')
        soilmoisture_data_source = soilmoisture_data_sources[0]

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):

                new_ds_title = 'local_ds_test'
                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 16, 23, 59)))
                try:
                    new_ds = soilmoisture_data_source.make_local(new_ds_title, time_range=new_ds_time_range)
                except Exception:
                    raise ValueError(reference_path, os.listdir(reference_path))
                self.assertIsNotNone(new_ds)

                self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
                self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range)

                new_ds_w_one_variable_title = 'local_ds_test_var'
                new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                          datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm'])

                new_ds_w_one_variable = soilmoisture_data_source.make_local(
                    new_ds_w_one_variable_title,
                    time_range=new_ds_w_one_variable_time_range,
                    var_names=new_ds_w_one_variable_var_names
                )
                self.assertIsNotNone(new_ds_w_one_variable)

                self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_w_one_variable_title)
                ds = new_ds_w_one_variable.open_dataset()

                new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(ds.variables),
                                    set(new_ds_w_one_variable_var_names))

                new_ds_w_region_title = 'from_local_to_local_region'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)

                new_ds_w_region_title = 'from_local_to_local_region_one_var'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                new_ds_w_region_title = 'from_local_to_local_region_two_var_sm_uncertainty'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm', 'sm_uncertainty'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)  # type: LocalDataSource

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                empty_ds_timerange = (datetime.datetime(2017, 12, 1, 0, 0), datetime.datetime(2017, 12, 31, 23, 59))
                with self.assertRaises(DataAccessError) as cm:
                    soilmoisture_data_source.make_local('empty_ds', time_range=empty_ds_timerange)
                self.assertEqual(f'Data source "{soilmoisture_data_source.id}" does not'
                                 f' seem to have any datasets in given'
                                 f' time range {TimeRangeLike.format(empty_ds_timerange)}',
                                 str(cm.exception))

                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 14, 23, 59)))

                new_ds = soilmoisture_data_source.make_local("title_test_copy", time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], soilmoisture_data_source.meta_info['title'])

                title = "Title Test!"
                new_ds = soilmoisture_data_source.make_local("title_test_set", title, time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], title)
Beispiel #39
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        ds_id = local_name
        title = local_id

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        uuid = LocalDataStore.generate_uuid(ref_id=self.id,
                                            time_range=time_range,
                                            region=region,
                                            var_names=var_names)

        if not ds_id or len(ds_id) == 0:
            ds_id = "local.{}.{}".format(self.id, uuid)
            existing_ds_list = local_store.query(ds_id=ds_id)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % ds_id)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError(
                        'Datastore {} already contains dataset {}'.format(
                            local_store.id, ds_id))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = uuid

        local_ds = local_store.create_data_source(ds_id,
                                                  title=title,
                                                  time_range=time_range,
                                                  region=region,
                                                  var_names=var_names,
                                                  meta_info=local_meta_info,
                                                  lock_file=True)
        if local_ds:
            if not local_ds.is_complete:
                try:
                    self._make_local(local_ds,
                                     time_range,
                                     region,
                                     var_names,
                                     monitor=monitor)
                except Cancellation as c:
                    local_store.remove_data_source(local_ds)
                    raise c
                except Exception as e:
                    if local_ds.is_empty:
                        local_store.remove_data_source(local_ds)
                    raise e

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        else:
            return None
Beispiel #40
0
    def _make_local(self,
                    local_ds: LocalDataSource,
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id
        time_range = TimeRangeLike.convert(time_range)
        region = PolygonLike.convert(region)
        var_names = VarNamesLike.convert(var_names)

        time_range, region, var_names = self._apply_make_local_fixes(
            time_range, region, var_names)

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        do_update_of_verified_time_coverage_start_once = True
        verified_time_coverage_start = None
        verified_time_coverage_end = None

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        if region or var_names:
            protocol = _ODP_PROTOCOL_OPENDAP
        else:
            protocol = _ODP_PROTOCOL_HTTP

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        selected_file_list = self._find_files(time_range)
        if not selected_file_list:
            msg = 'CCI Open Data Portal data source "{}"\ndoes not seem to have any datasets'.format(
                self.id)
            if time_range is not None:
                msg += ' in given time range {}'.format(
                    TimeRangeLike.format(time_range))
            raise DataAccessError(msg)
        try:
            if protocol == _ODP_PROTOCOL_OPENDAP:

                do_update_of_variables_meta_info_once = True
                do_update_of_region_meta_info_once = True

                files = self._get_urls_list(selected_file_list, protocol)
                monitor.start('Sync ' + self.id, total_work=len(files))
                for idx, dataset_uri in enumerate(files):
                    child_monitor = monitor.child(work=1)

                    file_name = os.path.basename(dataset_uri)
                    local_filepath = os.path.join(local_path, file_name)

                    time_coverage_start = selected_file_list[idx][1]
                    time_coverage_end = selected_file_list[idx][2]

                    try:
                        child_monitor.start(label=file_name, total_work=1)

                        remote_dataset = xr.open_dataset(dataset_uri)

                        if var_names:
                            remote_dataset = remote_dataset.drop([
                                var_name for var_name in
                                remote_dataset.data_vars.keys()
                                if var_name not in var_names
                            ])

                        if region:
                            remote_dataset = normalize_impl(remote_dataset)
                            remote_dataset = subset_spatial_impl(
                                remote_dataset, region)
                            geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds

                            remote_dataset.attrs[
                                'geospatial_lat_min'] = geo_lat_min
                            remote_dataset.attrs[
                                'geospatial_lat_max'] = geo_lat_max
                            remote_dataset.attrs[
                                'geospatial_lon_min'] = geo_lon_min
                            remote_dataset.attrs[
                                'geospatial_lon_max'] = geo_lon_max
                            if do_update_of_region_meta_info_once:
                                local_ds.meta_info['bbox_maxx'] = geo_lon_max
                                local_ds.meta_info['bbox_minx'] = geo_lon_min
                                local_ds.meta_info['bbox_maxy'] = geo_lat_max
                                local_ds.meta_info['bbox_miny'] = geo_lat_min
                                do_update_of_region_meta_info_once = False

                        if compression_enabled:
                            for sel_var_name in remote_dataset.variables.keys(
                            ):
                                remote_dataset.variables.get(
                                    sel_var_name).encoding.update(
                                        encoding_update)

                        remote_dataset.to_netcdf(local_filepath)

                        child_monitor.progress(work=1,
                                               msg=str(time_coverage_start))
                    finally:
                        if do_update_of_variables_meta_info_once:
                            variables_info = local_ds.meta_info.get(
                                'variables', [])
                            local_ds.meta_info['variables'] = [
                                var_info for var_info in variables_info
                                if var_info.get('name') in remote_dataset.
                                variables.keys() and var_info.get(
                                    'name') not in remote_dataset.dims.keys()
                            ]
                            do_update_of_variables_meta_info_once = False

                        local_ds.add_dataset(
                            os.path.join(local_id, file_name),
                            (time_coverage_start, time_coverage_end))

                        if do_update_of_verified_time_coverage_start_once:
                            verified_time_coverage_start = time_coverage_start
                            do_update_of_verified_time_coverage_start_once = False
                        verified_time_coverage_end = time_coverage_end
                    child_monitor.done()
            else:
                outdated_file_list = []
                for file_rec in selected_file_list:
                    filename, _, _, file_size, url = file_rec
                    dataset_file = os.path.join(local_path, filename)
                    # todo (forman, 20160915): must perform better checks on dataset_file if it is...
                    # ... outdated or incomplete or corrupted.
                    # JSON also includes "checksum" and "checksum_type" fields.
                    if not os.path.isfile(dataset_file) or (
                            file_size
                            and os.path.getsize(dataset_file) != file_size):
                        outdated_file_list.append(file_rec)

                if outdated_file_list:
                    with monitor.starting('Sync ' + self.id,
                                          len(outdated_file_list)):
                        bytes_to_download = sum(
                            [file_rec[3] for file_rec in outdated_file_list])
                        dl_stat = _DownloadStatistics(bytes_to_download)

                        file_number = 1

                        for filename, coverage_from, coverage_to, file_size, url in outdated_file_list:
                            dataset_file = os.path.join(local_path, filename)
                            sub_monitor = monitor.child(work=1.0)

                            # noinspection PyUnusedLocal
                            def reporthook(block_number, read_size,
                                           total_file_size):
                                dl_stat.handle_chunk(read_size)
                                sub_monitor.progress(work=read_size,
                                                     msg=str(dl_stat))

                            sub_monitor_msg = "file %d of %d" % (
                                file_number, len(outdated_file_list))
                            with sub_monitor.starting(sub_monitor_msg,
                                                      file_size):
                                urllib.request.urlretrieve(
                                    url[protocol],
                                    filename=dataset_file,
                                    reporthook=reporthook)
                            file_number += 1
                            local_ds.add_dataset(
                                os.path.join(local_id, filename),
                                (coverage_from, coverage_to))

                            if do_update_of_verified_time_coverage_start_once:
                                verified_time_coverage_start = coverage_from
                                do_update_of_verified_time_coverage_start_once = False
                            verified_time_coverage_end = coverage_to
        except OSError as e:
            raise DataAccessError(
                "Copying remote data source failed: {}".format(e),
                source=self) from e
        local_ds.meta_info['temporal_coverage_start'] = TimeLike.format(
            verified_time_coverage_start)
        local_ds.meta_info['temporal_coverage_end'] = TimeLike.format(
            verified_time_coverage_end)
        local_ds.save(True)
Beispiel #41
0
    def test_make_local_and_update(self):

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):
                try:
                    new_ds = self.data_source.make_local('local_ds_test', None,
                                                         (datetime.datetime(1978, 11, 14, 0, 0),
                                                          datetime.datetime(1978, 11, 15, 23, 59)))
                except:
                    raise ValueError(reference_path, os.listdir(reference_path))

                self.assertEqual(new_ds.name, 'local.local_ds_test')
                self.assertEqual(new_ds.temporal_coverage(),
                                 (datetime.datetime(1978, 11, 14, 0, 0),
                                  datetime.datetime(1978, 11, 15, 23, 59)))

                self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 15, 00, 00),
                                                            datetime.datetime(1978, 11, 16, 23, 59)))
                self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                                 (datetime.datetime(1978, 11, 15, 0, 0),
                                  datetime.datetime(1978, 11, 16, 23, 59))))

                self.data_source.update_local(new_ds.name, (datetime.datetime(1978, 11, 14, 00, 00),
                                                            datetime.datetime(1978, 11, 15, 23, 59)))
                self.assertEqual(new_ds.temporal_coverage(), TimeRangeLike.convert(
                                 (datetime.datetime(1978, 11, 14, 0, 0),
                                  datetime.datetime(1978, 11, 15, 23, 59))))

                with self.assertRaises(ValueError) as context:
                    self.data_source.update_local("wrong_ds_name", (datetime.datetime(1978, 11, 15, 00, 00),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                self.assertTrue("Couldn't find local DataSource", context.exception.args[0])

                new_ds_w_one_variable = self.data_source.make_local(
                    'local_ds_test_2', None, (datetime.datetime(1978, 11, 14, 0, 0),
                                              datetime.datetime(1978, 11, 15, 23, 59)), None, ['sm'])
                self.assertEqual(new_ds_w_one_variable.name, 'local.local_ds_test_2')
                ds = new_ds_w_one_variable.open_dataset()
                self.assertSetEqual(set(ds.variables), {'sm', 'lat', 'lon', 'time'})

                new_ds_w_region = self.data_source.make_local(
                    'from_local_to_local_region', None, (datetime.datetime(1978, 11, 14, 0, 0),
                                                         datetime.datetime(1978, 11, 15, 23, 59)),
                    "10,10,20,20", ['sm'])  # type: LocalDataSource
                self.assertEqual(new_ds_w_region.name, 'local.from_local_to_local_region')
                self.assertEqual(new_ds_w_region.spatial_coverage(), PolygonLike.convert("10,10,20,20"))
                data_set = new_ds_w_region.open_dataset()
                self.assertSetEqual(set(data_set.variables), {'sm', 'lat', 'lon', 'time'})
Beispiel #42
0
    def test_make_local_and_update(self):

        soilmoisture_data_sources = self.data_store.query(
            query_expr='esacci.SOILMOISTURE.day.L3S.SSMV.multi-sensor.multi-platform.COMBINED.02-1.r1')
        soilmoisture_data_source = soilmoisture_data_sources[0]

        reference_path = os.path.join(os.path.dirname(__file__),
                                      os.path.normpath('resources/datasources/local/files/'))

        def find_files_mock(_, time_range):

            def build_file_item(item_name: str, date_from: datetime, date_to: datetime, size: int):

                return [item_name, date_from, date_to, size,
                        {'OPENDAP': os.path.join(reference_path, item_name),
                         'HTTPServer': 'file:' + urllib.request.pathname2url(os.path.join(reference_path, item_name))}]

            reference_files = {
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781114000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 14, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 14, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781115000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 15, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 15, 23, 59),
                    'size': 21511378
                },
                'ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-19781116000000-fv02.2.nc': {
                    'date_from': datetime.datetime(1978, 11, 16, 0, 0),
                    'date_to': datetime.datetime(1978, 11, 16, 23, 59),
                    'size': 21511378
                }
            }

            reference_files_list = []

            for reference_file in reference_files.items():
                file_name = reference_file[0]
                file_date_from = reference_file[1].get('date_from')
                file_date_to = reference_file[1].get('date_to')
                file_size = reference_file[1].get('size')
                if time_range:
                    if file_date_from >= time_range[0] and file_date_to <= time_range[1]:
                        reference_files_list.append(build_file_item(file_name,
                                                                    file_date_from,
                                                                    file_date_to,
                                                                    file_size))
                else:
                    reference_files_list.append(build_file_item(file_name,
                                                                file_date_from,
                                                                file_date_to,
                                                                file_size))
            return reference_files_list

        with unittest.mock.patch('cate.ds.esa_cci_odp.EsaCciOdpDataSource._find_files', find_files_mock):
            with unittest.mock.patch.object(EsaCciOdpDataStore, 'query', return_value=[]):

                new_ds_title = 'local_ds_test'
                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 16, 23, 59)))
                try:
                    new_ds = soilmoisture_data_source.make_local(new_ds_title, time_range=new_ds_time_range)
                except Exception:
                    raise ValueError(reference_path, os.listdir(reference_path))
                self.assertIsNotNone(new_ds)

                self.assertEqual(new_ds.id, "local.%s" % new_ds_title)
                self.assertEqual(new_ds.temporal_coverage(), new_ds_time_range)

                new_ds_w_one_variable_title = 'local_ds_test_var'
                new_ds_w_one_variable_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                          datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_one_variable_var_names = VarNamesLike.convert(['sm'])

                new_ds_w_one_variable = soilmoisture_data_source.make_local(
                    new_ds_w_one_variable_title,
                    time_range=new_ds_w_one_variable_time_range,
                    var_names=new_ds_w_one_variable_var_names
                )
                self.assertIsNotNone(new_ds_w_one_variable)

                self.assertEqual(new_ds_w_one_variable.id, "local.%s" % new_ds_w_one_variable_title)
                ds = new_ds_w_one_variable.open_dataset()

                new_ds_w_one_variable_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(ds.variables),
                                    set(new_ds_w_one_variable_var_names))

                new_ds_w_region_title = 'from_local_to_local_region'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)

                new_ds_w_region_title = 'from_local_to_local_region_one_var'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                new_ds_w_region_title = 'from_local_to_local_region_two_var_sm_uncertainty'
                new_ds_w_region_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                                    datetime.datetime(1978, 11, 16, 23, 59)))
                new_ds_w_region_var_names = VarNamesLike.convert(['sm', 'sm_uncertainty'])
                new_ds_w_region_spatial_coverage = PolygonLike.convert("10,20,30,40")

                new_ds_w_region = soilmoisture_data_source.make_local(
                    new_ds_w_region_title,
                    time_range=new_ds_w_region_time_range,
                    var_names=new_ds_w_region_var_names,
                    region=new_ds_w_region_spatial_coverage)

                self.assertIsNotNone(new_ds_w_region)

                self.assertEqual(new_ds_w_region.id, "local.%s" % new_ds_w_region_title)

                self.assertEqual(new_ds_w_region.spatial_coverage(), new_ds_w_region_spatial_coverage)
                data_set = new_ds_w_region.open_dataset()
                new_ds_w_region_var_names.extend(['lat', 'lon', 'time'])

                self.assertSetEqual(set(data_set.variables), set(new_ds_w_region_var_names))

                empty_ds_timerange = (datetime.datetime(2017, 12, 1, 0, 0), datetime.datetime(2017, 12, 31, 23, 59))
                with self.assertRaises(DataAccessError) as cm:
                    soilmoisture_data_source.make_local('empty_ds', time_range=empty_ds_timerange)
                self.assertEqual(f'Data source "{soilmoisture_data_source.id}" does not'
                                 f' seem to have any datasets in given'
                                 f' time range {TimeRangeLike.format(empty_ds_timerange)}',
                                 str(cm.exception))

                new_ds_time_range = TimeRangeLike.convert((datetime.datetime(1978, 11, 14, 0, 0),
                                                           datetime.datetime(1978, 11, 14, 23, 59)))

                new_ds = soilmoisture_data_source.make_local("title_test_copy", time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], soilmoisture_data_source.meta_info['title'])

                title = "Title Test!"
                new_ds = soilmoisture_data_source.make_local("title_test_set", title, time_range=new_ds_time_range)
                self.assertIsNotNone(new_ds)
                self.assertEqual(new_ds.meta_info['title'], title)
Beispiel #43
0
    def _make_local(self,
                    local_ds: 'LocalDataSource',
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(
            var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL',
                                             NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({
                'zlib': True,
                'complevel': compression_level
            })

        local_path = os.path.join(local_ds.data_store.data_store_path,
                                  local_id)
        data_store_path = local_ds.data_store.data_store_path
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        monitor.start("Sync " + self.id, total_work=len(self._files.items()))
        for remote_relative_filepath, coverage in self._files.items():
            child_monitor = monitor.child(work=1)

            file_name = os.path.basename(remote_relative_filepath)
            local_relative_filepath = os.path.join(local_id, file_name)
            local_absolute_filepath = os.path.join(data_store_path,
                                                   local_relative_filepath)

            remote_absolute_filepath = os.path.join(
                self._data_store.data_store_path, remote_relative_filepath)

            if isinstance(coverage, Tuple):

                time_coverage_start = coverage[0]
                time_coverage_end = coverage[1]

                if not time_range or time_coverage_start >= time_range[
                        0] and time_coverage_end <= time_range[1]:
                    if region or var_names:

                        do_update_of_variables_meta_info_once = True
                        do_update_of_region_meta_info_once = True

                        try:
                            remote_dataset = xr.open_dataset(
                                remote_absolute_filepath)

                            if var_names:
                                remote_dataset = remote_dataset.drop([
                                    var_name for var_name in
                                    remote_dataset.data_vars.keys()
                                    if var_name not in var_names
                                ])

                            if region:
                                remote_dataset = normalize_impl(remote_dataset)
                                remote_dataset = subset_spatial_impl(
                                    remote_dataset, region)
                                geo_lon_min, geo_lat_min, geo_lon_max, geo_lat_max = region.bounds

                                remote_dataset.attrs[
                                    'geospatial_lat_min'] = geo_lat_min
                                remote_dataset.attrs[
                                    'geospatial_lat_max'] = geo_lat_max
                                remote_dataset.attrs[
                                    'geospatial_lon_min'] = geo_lon_min
                                remote_dataset.attrs[
                                    'geospatial_lon_max'] = geo_lon_max
                                if do_update_of_region_meta_info_once:
                                    local_ds.meta_info[
                                        'bbox_maxx'] = geo_lon_max
                                    local_ds.meta_info[
                                        'bbox_minx'] = geo_lon_min
                                    local_ds.meta_info[
                                        'bbox_maxy'] = geo_lat_max
                                    local_ds.meta_info[
                                        'bbox_miny'] = geo_lat_min
                                    do_update_of_region_meta_info_once = False

                            if compression_enabled:
                                for sel_var_name in remote_dataset.variables.keys(
                                ):
                                    remote_dataset.variables.get(
                                        sel_var_name).encoding.update(
                                            encoding_update)

                            remote_dataset.to_netcdf(local_absolute_filepath)

                            child_monitor.progress(
                                work=1, msg=str(time_coverage_start))
                        finally:
                            if do_update_of_variables_meta_info_once:
                                variables_info = local_ds.meta_info.get(
                                    'variables', [])
                                local_ds.meta_info['variables'] = [
                                    var_info for var_info in variables_info
                                    if var_info.get('name') in remote_dataset.
                                    variables.keys() and var_info.get('name')
                                    not in remote_dataset.dims.keys()
                                ]
                                do_update_of_variables_meta_info_once = False

                            local_ds.add_dataset(
                                os.path.join(local_id, file_name),
                                (time_coverage_start, time_coverage_end))

                        child_monitor.done()
                    else:
                        shutil.copy(remote_absolute_filepath,
                                    local_absolute_filepath)
                        local_ds.add_dataset(
                            local_relative_filepath,
                            (time_coverage_start, time_coverage_end))
                        child_monitor.done()
        monitor.done()
        return local_id
Beispiel #44
0
    def _make_local(self,
                    local_ds: 'LocalDataSource',
                    time_range: TimeRangeLike.TYPE = None,
                    region: PolygonLike.TYPE = None,
                    var_names: VarNamesLike.TYPE = None,
                    monitor: Monitor = Monitor.NONE):

        local_id = local_ds.id

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        var_names = VarNamesLike.convert(var_names) if var_names else None  # type: Sequence

        compression_level = get_config_value('NETCDF_COMPRESSION_LEVEL', NETCDF_COMPRESSION_LEVEL)
        compression_enabled = True if compression_level > 0 else False

        encoding_update = dict()
        if compression_enabled:
            encoding_update.update({'zlib': True, 'complevel': compression_level})

        local_path = os.path.join(local_ds.data_store.data_store_path, local_id)
        data_store_path = local_ds.data_store.data_store_path
        if not os.path.exists(local_path):
            os.makedirs(local_path)

        monitor.start("Sync " + self.id, total_work=len(self._files.items()))
        for remote_relative_filepath, coverage in self._files.items():
            child_monitor = monitor.child(work=1)

            file_name = os.path.basename(remote_relative_filepath)
            local_relative_filepath = os.path.join(local_id, file_name)
            local_absolute_filepath = os.path.join(data_store_path, local_relative_filepath)

            remote_absolute_filepath = os.path.join(self._data_store.data_store_path, remote_relative_filepath)

            if isinstance(coverage, Tuple):

                time_coverage_start = coverage[0]
                time_coverage_end = coverage[1]

                if not time_range or time_coverage_start >= time_range[0] and time_coverage_end <= time_range[1]:
                    if region or var_names:

                        do_update_of_variables_meta_info_once = True
                        do_update_of_region_meta_info_once = True

                        remote_dataset = None
                        try:
                            remote_dataset = xr.open_dataset(remote_absolute_filepath)

                            if var_names:
                                remote_dataset = remote_dataset.drop(
                                    [var_name for var_name in remote_dataset.data_vars.keys()
                                     if var_name not in var_names])

                            if region:
                                remote_dataset = normalize_impl(remote_dataset)
                                remote_dataset = adjust_spatial_attrs_impl(subset_spatial_impl(remote_dataset, region),
                                                                           allow_point=False)

                                if do_update_of_region_meta_info_once:
                                    # subset_spatial_impl
                                    local_ds.meta_info['bbox_maxx'] = remote_dataset.attrs['geospatial_lon_max']
                                    local_ds.meta_info['bbox_minx'] = remote_dataset.attrs['geospatial_lon_min']
                                    local_ds.meta_info['bbox_maxy'] = remote_dataset.attrs['geospatial_lat_max']
                                    local_ds.meta_info['bbox_miny'] = remote_dataset.attrs['geospatial_lat_min']
                                    do_update_of_region_meta_info_once = False

                            if compression_enabled:
                                for sel_var_name in remote_dataset.variables.keys():
                                    remote_dataset.variables.get(sel_var_name).encoding.update(encoding_update)

                            remote_dataset.to_netcdf(local_absolute_filepath)

                            child_monitor.progress(work=1, msg=str(time_coverage_start))
                        finally:
                            if do_update_of_variables_meta_info_once and remote_dataset is not None:
                                variables_info = local_ds.meta_info.get('variables', [])
                                local_ds.meta_info['variables'] = [var_info for var_info in variables_info
                                                                   if var_info.get('name')
                                                                   in remote_dataset.variables.keys()
                                                                   and var_info.get('name')
                                                                   not in remote_dataset.dims.keys()]
                                # noinspection PyUnusedLocal
                                do_update_of_variables_meta_info_once = False

                            local_ds.add_dataset(os.path.join(local_id, file_name),
                                                 (time_coverage_start, time_coverage_end))

                        child_monitor.done()
                    else:
                        shutil.copy(remote_absolute_filepath, local_absolute_filepath)
                        local_ds.add_dataset(local_relative_filepath, (time_coverage_start, time_coverage_end))
                        child_monitor.done()
        monitor.done()
        return local_id