Ejemplo n.º 1
0
    def test_find_data_sources_default_data_store(self):
        size_before = len(DATA_STORE_REGISTRY)
        orig_stores = list(DATA_STORE_REGISTRY.get_data_stores())
        try:
            DATA_STORE_REGISTRY._data_stores.clear()
            self.assertEqual(0, len(DATA_STORE_REGISTRY))

            from cate.ds.esa_cci_ftp import set_default_data_store as set_default_data_store_ftp
            set_default_data_store_ftp()
            self.assertEqual(1, len(DATA_STORE_REGISTRY))

            data_sources = find_data_sources()
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 98)
            self.assertEqual(data_sources[0].id, "AEROSOL_ATSR2_SU_L3_V4.2_DAILY")

            data_sources = find_data_sources(ds_id="AEROSOL_ATSR2_SU_L3_V4.2_DAILY")
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 1)

            data_sources = find_data_sources(ds_id="ZZ")
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 0)
        finally:
            DATA_STORE_REGISTRY._data_stores.clear()
            for data_store in orig_stores:
                DATA_STORE_REGISTRY.add_data_store(data_store)
        self.assertEqual(size_before, len(DATA_STORE_REGISTRY))
Ejemplo n.º 2
0
    def test_find_data_sources_default_data_store(self):
        size_before = len(DATA_STORE_REGISTRY)
        orig_stores = list(DATA_STORE_REGISTRY.get_data_stores())
        try:
            DATA_STORE_REGISTRY._data_stores.clear()
            self.assertEqual(0, len(DATA_STORE_REGISTRY))

            from cate.ds.esa_cci_ftp import set_default_data_store as set_default_data_store_ftp
            set_default_data_store_ftp()
            self.assertEqual(1, len(DATA_STORE_REGISTRY))

            data_sources = find_data_sources()
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 98)
            self.assertEqual(data_sources[0].id,
                             "AEROSOL_ATSR2_SU_L3_V4.2_DAILY")

            data_sources = find_data_sources(
                ds_id="AEROSOL_ATSR2_SU_L3_V4.2_DAILY")
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 1)

            data_sources = find_data_sources(ds_id="ZZ")
            self.assertIsNotNone(data_sources)
            self.assertEqual(len(data_sources), 0)
        finally:
            DATA_STORE_REGISTRY._data_stores.clear()
            for data_store in orig_stores:
                DATA_STORE_REGISTRY.add_data_store(data_store)
        self.assertEqual(size_before, len(DATA_STORE_REGISTRY))
Ejemplo n.º 3
0
    def tearDownClass(cls):
        # clean up frozen files
        for d in DATA_STORE_REGISTRY.get_data_stores():
            d.get_updates(reset=True)

        DATA_STORE_REGISTRY._data_stores.clear()
        for data_store in cls._orig_stores:
            DATA_STORE_REGISTRY.add_data_store(data_store)
Ejemplo n.º 4
0
def _create_test_data_store():
    with open(os.path.join(os.path.dirname(__file__), 'esgf-index-cache.json')) as fp:
        json_text = fp.read()
    json_dict = json.loads(json_text)
    # The EsaCciOdpDataStore created with an initial json_dict avoids fetching it from remote
    data_store = EsaCciOdpDataStore('test-odp', index_cache_json_dict=json_dict)
    DATA_STORE_REGISTRY.add_data_store(data_store)
    return data_store
Ejemplo n.º 5
0
def set_default_data_store():
    """
    Defines the ESA CCI ODP data store and makes it the default data store.

    All data sources of the FTP data store are read from a JSON file ``esa_cci_ftp.json`` contained in this package.
    This JSON file has been generated from a scan of the entire FTP tree.
    """
    DATA_STORE_REGISTRY.add_data_store(EsaCciOdpDataStore())
Ejemplo n.º 6
0
    def setUp(self):
        self.data_store = _create_test_data_store()
        data_sources = self.data_store.query('OC')
        self.assertIsNotNone(data_sources)
        self.assertIsNotNone(data_sources[0])
        self.data_source = data_sources[0]
        self.tmp_dir = tempfile.mkdtemp()

        self._existing_local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
        DATA_STORE_REGISTRY.add_data_store(LocalDataStore('local', self.tmp_dir))
Ejemplo n.º 7
0
def _create_test_data_store():
    with open(os.path.join(os.path.dirname(__file__), 'esgf-index-cache.json')) as fp:
        json_text = fp.read()
    json_dict = json.loads(json_text)
    for d in DATA_STORE_REGISTRY.get_data_stores():
        d.get_updates(reset=True)
    # The EsaCciOdpDataStore created with an initial json_dict avoids fetching it from remote
    data_store = EsaCciOdpDataStore('test-odp', index_cache_json_dict=json_dict, index_cache_update_tag='test1')
    DATA_STORE_REGISTRY.add_data_store(data_store)
    return data_store
Ejemplo n.º 8
0
    def setUp(self):
        self.data_store = _create_test_data_store()
        oc_data_sources = self.data_store.query(query_expr='OC')
        self.assertIsNotNone(oc_data_sources)
        self.assertIsNotNone(oc_data_sources[0])
        self.first_oc_data_source = oc_data_sources[0]
        self.tmp_dir = tempfile.mkdtemp()

        self._existing_local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
        DATA_STORE_REGISTRY.add_data_store(LocalDataStore('local', self.tmp_dir))
Ejemplo n.º 9
0
 def test_open_dataset_duplicated_names(self):
     try:
         ds_a1 = SimpleDataSource('duplicate')
         ds_a2 = SimpleDataSource('duplicate')
         duplicated_cat = SimpleDataStore('duplicated_cat', [ds_a1, ds_a2])
         DATA_STORE_REGISTRY.add_data_store(duplicated_cat)
         with self.assertRaises(ValueError) as cm:
             open_dataset('duplicate')
         self.assertEqual("2 data_sources found for the given query term 'duplicate'", str(cm.exception))
     finally:
         DATA_STORE_REGISTRY.remove_data_store('duplicated_cat')
Ejemplo n.º 10
0
    def setUp(self):
        self.tmp_dir = tempfile.mkdtemp()
        self.data_store = LocalDataStore('test', self.tmp_dir)
        self.assertTrue(os.path.isdir(self.tmp_dir))
        self.assertEqual(0, len(os.listdir(self.tmp_dir)))
        self.data_store.add_pattern("ozone", "/DATA/ozone/*/*.nc")
        self.data_store.add_pattern("aerosol", ["/DATA/aerosol/*/*/AERO_V1*.nc", "/DATA/aerosol/*/*/AERO_V2*.nc"])
        self.assertEqual(2, len(os.listdir(self.tmp_dir)))

        self._existing_local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
        DATA_STORE_REGISTRY.add_data_store(LocalDataStore('local', self.tmp_dir))
Ejemplo n.º 11
0
 def test_open_dataset_duplicated_names(self):
     try:
         ds_a1 = SimpleDataSource('duplicate')
         ds_a2 = SimpleDataSource('duplicate')
         duplicated_cat = SimpleDataStore('duplicated_cat', [ds_a1, ds_a2])
         DATA_STORE_REGISTRY.add_data_store(duplicated_cat)
         with self.assertRaises(ValueError) as cm:
             open_dataset('duplicate')
         self.assertEqual("2 data_sources found for the given query term 'duplicate'", str(cm.exception))
     finally:
         DATA_STORE_REGISTRY.remove_data_store('duplicated_cat')
Ejemplo n.º 12
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        _uuid = LocalDataStore.generate_uuid(ref_id=self.id, time_range=time_range, region=region, var_names=var_names)

        if not local_name or len(local_name) == 0:
            local_name = "local.{}.{}".format(self.id, _uuid)
            existing_ds_list = local_store.query(ds_id=local_name)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % local_name)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == _uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError('Datastore {} already contains dataset {}'.format(local_store.id, local_name))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = _uuid

        local_ds = local_store.create_data_source(local_name, region, local_name,
                                                  time_range=time_range, var_names=var_names,
                                                  meta_info=self.meta_info.copy())
        if local_ds:
            if not local_ds.is_complete:
                self._make_local(local_ds, time_range, region, var_names, monitor=monitor)

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        return None
Ejemplo n.º 13
0
def set_default_data_store():
    """
    Defines the ESA CCI Data Portal's FTP data store and makes it the default data store.

    All data sources of the FTP data store are read from a JSON file ``esa_cci_ftp.json``
    contained in this package.
    This JSON file has been generated from a scan of the entire FTP tree.
    """
    cate_data_root_dir = os.environ.get('CATE_ESA_CCI_FTP_DATA_STORE_PATH',
                                        os.path.join(get_data_stores_path(), 'esa_cci_ftp'))
    json_data = pkgutil.get_data('cate.ds', 'esa_cci_ftp.json')
    data_store = FileSetDataStore.from_json('esa_cci_ftp', cate_data_root_dir, json_data.decode('utf-8'))
    DATA_STORE_REGISTRY.add_data_store(data_store)
Ejemplo n.º 14
0
def set_default_data_store():
    """
    Defines the ESA CCI Data Portal's FTP data store and makes it the default data store.

    All data sources of the FTP data store are read from a JSON file ``esa_cci_ftp.json``
    contained in this package.
    This JSON file has been generated from a scan of the entire FTP tree.
    """
    cate_data_root_dir = os.environ.get('CATE_ESA_CCI_FTP_DATA_STORE_PATH',
                                        os.path.join(get_data_stores_path(), 'esa_cci_ftp'))
    json_data = pkgutil.get_data('cate.ds', 'esa_cci_ftp.json')
    data_store = FileSetDataStore.from_json('esa_cci_ftp', cate_data_root_dir, json_data.decode('utf-8'))
    DATA_STORE_REGISTRY.add_data_store(data_store)
Ejemplo n.º 15
0
def _create_test_data_store():
    with open(os.path.join(os.path.dirname(__file__), 'resources/os-data-list.json')) as fp:
        json_text = fp.read()
    json_dict = json.loads(json_text)
    with open(os.path.join(os.path.dirname(__file__), 'resources/drs_ids.txt')) as fp:
        drs_ids = fp.read().split('\n')
    for d in DATA_STORE_REGISTRY.get_data_stores():
        d.get_updates(reset=True)
    metadata_path = os.path.join(os.path.dirname(__file__), 'resources/datasources/metadata')
    # The EsaCciOdpDataStore created with an initial json_dict and a metadata dir avoids fetching from remote
    data_store = EsaCciOdpDataStore('test-odp', index_cache_json_dict=json_dict, index_cache_update_tag='test1',
                                    meta_data_store_path=metadata_path, drs_ids=drs_ids)
    DATA_STORE_REGISTRY.add_data_store(data_store)
    return data_store
Ejemplo n.º 16
0
    def get_data_source_temporal_coverage(self, data_store_id: str, data_source_id: str, monitor: Monitor) \
            -> Dict[str, Any]:
        """
        Get the temporal coverage of the data source.

        :param data_store_id: ID of the data store
        :param data_source_id: ID of the data source
        :param monitor: a progress monitor
        :return: JSON-serializable list of data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id)
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % data_store_id)
        data_sources = data_store.query(ds_id=data_source_id)
        if not data_sources:
            raise ValueError('data source "%s" not found' % data_source_id)
        data_source = data_sources[0]
        temporal_coverage = data_source.temporal_coverage(monitor=monitor)
        meta_info = OrderedDict()
        if temporal_coverage:
            start, end = temporal_coverage
            meta_info['temporal_coverage_start'] = start.strftime('%Y-%m-%d')
            meta_info['temporal_coverage_end'] = end.strftime('%Y-%m-%d')
        # TODO mz add available data information
        return meta_info
Ejemplo n.º 17
0
    def get_data_sources(self, data_store_id: str, monitor: Monitor) -> list:
        """
        Get data sources for a given data store.

        :param data_store_id: ID of the data store
        :param monitor: a progress monitor
        :return: JSON-serializable list of data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id)
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % data_store_id)
        data_sources = data_store.query(monitor=monitor)
        if data_store_id == 'esa_cci_odp':
            # Filter ESA Open Data Portal data sources
            data_source_dict = {ds.id: ds for ds in data_sources}
            # noinspection PyTypeChecker
            data_source_ids = filter_fileset(data_source_dict.keys(),
                                             includes=conf.get_config_value('included_data_sources', default=None),
                                             excludes=conf.get_config_value('excluded_data_sources', default=None))
            data_sources = [data_source_dict[ds_id] for ds_id in data_source_ids]

        data_sources = sorted(data_sources, key=lambda ds: ds.title or ds.id)
        return [dict(id=data_source.id,
                     title=data_source.title,
                     meta_info=data_source.meta_info) for data_source in data_sources]
Ejemplo n.º 18
0
    def get_data_sources(self, data_store_id: str,
                         monitor: Monitor) -> List[Dict[str, Any]]:
        """
        Get data sources for a given data store.

        :param data_store_id: ID of the data store
        :param monitor: a progress monitor
        :return: JSON-serializable list of data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id)
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % data_store_id)
        data_sources = data_store.query(monitor=monitor)
        if data_store_id == 'esa_cci_odp_os':
            # Filter ESA Open Data Portal data sources
            data_source_dict = {
                ds.id: ds
                for ds in data_sources if ds.cate_openable
            }
            data_source_ids = list(data_source_dict.keys())
            data_sources = [
                data_source_dict[ds_id] for ds_id in data_source_ids
            ]

        data_sources = sorted(data_sources, key=lambda ds: ds.title or ds.id)
        return [
            dict(id=data_source.id,
                 title=data_source.title,
                 meta_info=data_source.meta_info)
            for data_source in data_sources
        ]
Ejemplo n.º 19
0
 def test_set_default_data_store(self):
     set_default_data_store()
     data_stores_path = get_data_stores_path()
     data_store = DATA_STORE_REGISTRY.get_data_store('esa_cci_ftp')
     self.assertIsInstance(data_store, FileSetDataStore)
     self.assertEqual(data_store.root_dir,
                      os.path.join(data_stores_path, 'esa_cci_ftp'))
Ejemplo n.º 20
0
 def test_make_local_wo_subsets(self):
     data_store = EsaCciOdpDataStore()
     local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
     cci_dataset_collection = 'esacci.OZONE.mon.L3.NP.multi-sensor.multi-platform.MERGED.fv0002.r1'
     data_source = data_store.query(cci_dataset_collection)[0]
     random_string = f"test{random.choice(string.ascii_lowercase)}"
     ds = data_source.make_local(random_string)
     self.assertIsNotNone(ds)
     local_data_store.remove_data_source(f"local.{random_string}")
Ejemplo n.º 21
0
    def get_data_stores(self) -> list:
        """
        Get registered data stores.

        :return: JSON-serializable list of data stores, sorted by name.
        """
        data_stores = sorted(DATA_STORE_REGISTRY.get_data_stores(), key=lambda ds: ds.title or ds.id)
        return [dict(id=data_store.id,
                     title=data_store.title,
                     isLocal=data_store.is_local) for data_store in data_stores]
Ejemplo n.º 22
0
    def get_data_stores(self) -> List[Dict[str, Any]]:
        """
        Get registered data stores.

        :return: JSON-serializable list of data stores, sorted by name.
        """
        data_stores = sorted(DATA_STORE_REGISTRY.get_data_stores(), key=lambda ds: ds.title or ds.id)
        return [dict(id=data_store.id,
                     title=data_store.title,
                     isLocal=data_store.is_local,
                     description=data_store.description,
                     notices=[notice.to_dict() for notice in data_store.notices]) for data_store in data_stores]
Ejemplo n.º 23
0
    def test_cate_init_registers_data_store(self):
        if DATA_STORE_REGISTRY is None:
            print("EsdcDataStoreTest not executed, no Cate found.")
            return

        from esdl.cate.esdc import cate_init
        cate_init()

        data_store = DATA_STORE_REGISTRY.get_data_store('esdc')
        self.assertIsNotNone(data_store)
        self.assertEqual(data_store.id, 'esdc')
        self.assertEqual(data_store.title, 'Earth System Data Cube')
Ejemplo n.º 24
0
    def setUp(self):
        self.tmp_dir = tempfile.mkdtemp()
        self._dummy_store = LocalDataStore('dummy', 'dummy')

        self._local_data_store = LocalDataStore('test', os.path.join(os.path.dirname(__file__),
                                                                     'resources/datasources/local/'))

        self.ds1 = LocalDataSource("ozone",
                                   ["/DATA/ozone/*/*.nc"],
                                   self._dummy_store)
        self.ds2 = LocalDataSource("aerosol",
                                   ["/DATA/aerosol/*/A*.nc", "/DATA/aerosol/*/B*.nc"],
                                   self._dummy_store)

        self.empty_ds = LocalDataSource("empty",
                                        [],
                                        self._dummy_store)

        self.ds3 = LocalDataSource("w_temporal_1",
                                   OrderedDict([
                                       ("/DATA/file1.nc",
                                        (datetime.datetime(2017, 1, 27, 0, 0),
                                         datetime.datetime(2017, 1, 28, 0, 0))),
                                       ("/DATA/file2.nc",
                                        (datetime.datetime(2017, 1, 28, 0, 0),
                                         datetime.datetime(2017, 1, 29, 0, 0)))]),
                                   self._dummy_store)

        self.ds4 = LocalDataSource("w_temporal_2",
                                   OrderedDict(),
                                   self._dummy_store)

        self.assertIsNotNone(self.ds1)
        self.assertIsNotNone(self.ds2)
        self.assertIsNotNone(self.empty_ds)
        self.assertIsNotNone(self.ds3)
        self.assertIsNotNone(self.ds4)

        self._existing_local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
        DATA_STORE_REGISTRY.add_data_store(LocalDataStore('local', self.tmp_dir))
Ejemplo n.º 25
0
    def get_data_stores(self) -> list:
        """
        Get registered data stores.

        :return: JSON-serializable list of data stores, sorted by name.
        """
        data_stores = DATA_STORE_REGISTRY.get_data_stores()
        data_store_list = []
        for data_store in data_stores:
            data_store_list.append(
                dict(id=data_store.name, name=data_store.name, description=''))

        return sorted(data_store_list, key=lambda ds: ds['name'])
Ejemplo n.º 26
0
    def remove_local_datasource(self, data_source_name: str,
                                remove_files: bool) -> list:
        """
        Removes the datasource (and optionally the giles belonging  to it) from the local data store.

        :param data_source_name: The name of the local data source.
        :param remove_files: Wether to remove the files belonging to this data source.
        :return: JSON-serializable list of 'local' data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store('local')
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % 'local')
        data_store.remove_data_source(data_source_name, remove_files)
        return self.get_data_sources('local', monitor=Monitor.NONE)
Ejemplo n.º 27
0
    def remove_local_data_source(self, data_source_id: str, remove_files: bool, monitor: Monitor) -> list:
        """
        Removes the datasource (and optionally the giles belonging  to it) from the local data store.

        :param data_source_id: The identifier of the local data source.
        :param remove_files: Wether to remove the files belonging to this data source.
        :param monitor: a progress monitor.
        :return: JSON-serializable list of 'local' data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store('local')
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % 'local')
        # TODO use monitor, while removing files
        data_store.remove_data_source(data_source_id, remove_files)
        return self.get_data_sources('local', monitor=monitor)
Ejemplo n.º 28
0
 def create_catalog_differences(self, new_ds):
     for d in DATA_STORE_REGISTRY.get_data_stores():
         diff_file = os.path.join(d.data_store_path, d._get_update_tag() + '-diff.json')
         if os.path.isfile(diff_file):
             with open(diff_file, 'r') as json_in:
                 report = json.load(json_in)
             report['new'].append(new_ds)
         else:
             generated = datetime.datetime.now()
             report = {"generated": str(generated),
                       "source_ref_time": str(generated),
                       "new": [new_ds],
                       "del": list()}
         with open(diff_file, 'w') as json_out:
             json.dump(report, json_out)
Ejemplo n.º 29
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> 'DataSource':
        if not local_name:
            raise ValueError('local_name is required')
        elif len(local_name) == 0:
            raise ValueError('local_name cannot be empty')

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        local_ds = local_store.create_data_source(local_name, region,
                                                  _REFERENCE_DATA_SOURCE_TYPE,
                                                  self.name)
        self._make_local(local_ds, time_range, region, var_names, monitor)
        return local_ds
Ejemplo n.º 30
0
 def test_make_local_spatial_2(self):
     data_store = EsaCciOdpDataStore()
     local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
     # The following reproduces Cate issues #823, #822, #818, #816, #783, #892, #900:
     cci_dataset_collection = 'esacci.SST.day.L4.SSTdepth.multi-sensor.multi-platform.OSTIA.1-1.r1'
     data_source = data_store.query(cci_dataset_collection)[0]
     ds_from_remote_source = data_source.open_dataset(time_range=['1991-09-01', '1991-09-03'],
                                                      var_names=['sea_ice_fraction', 'analysed_sst'],
                                                      region='-2.8, 70.6,-2.7, 70.7')
     self.assertIsNotNone(ds_from_remote_source)
     random_string = f"test{random.choice(string.ascii_lowercase)}"
     ds = data_source.make_local(random_string,
                                 time_range=['1991-09-01', '1991-09-03'],
                                 region='-2.8, 70.6,-2.7, 70.7')
     self.assertIsNotNone(ds)
     local_data_store.remove_data_source(f"local.{random_string}")
Ejemplo n.º 31
0
    def add_local_data_source(self, data_source_id: str, file_path_pattern: str, monitor: Monitor):
        """
        Adds a local data source made up of the specified files.

        :param data_source_id: The identifier of the local data source.
        :param file_path_pattern: The files path containing wildcards.
        :param monitor: a progress monitor.
        :return: JSON-serializable list of 'local' data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store('local')
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % 'local')
        with monitor.starting('Adding local data source', 100):
            # TODO use monitor, while extracting metadata
            data_store.add_pattern(data_source_id=data_source_id, files=file_path_pattern)
            return self.get_data_sources('local', monitor=monitor.child(100))
Ejemplo n.º 32
0
    def test_make_local_spatial_1(self):
        data_store = EsaCciOdpDataStore()
        local_data_store = DATA_STORE_REGISTRY.get_data_store('local')
        # The following reproduces Cate issues #823, #822, #818, #816, #783, #892, #900:

        cci_dataset_collection = 'esacci.SST.satellite-orbit-frequency.L3U.SSTskin.AVHRR-3.Metop-A.AVHRRMTA_G.2-1.r1'
        data_source = data_store.query(cci_dataset_collection)[0]
        ds_from_remote_source = data_source.open_dataset(time_range=['2006-11-21', '2006-11-23'],
                                                         var_names=['sst_dtime', 'sea_surface_temperature_depth'],
                                                         region='-49.8, 13.1,-49.7, 13.2')
        self.assertIsNotNone(ds_from_remote_source)
        random_string = f"test{random.choice(string.ascii_lowercase)}"
        ds = data_source.make_local(random_string,
                                    time_range=['2006-11-21', '2006-11-23'],
                                    region='-49.8, 13.1,-49.7, 13.2')
        self.assertIsNotNone(ds)
        local_data_store.remove_data_source(f"local.{random_string}")
Ejemplo n.º 33
0
    def get_data_sources(self, data_store_id: str, monitor: Monitor) -> list:
        """
        Get data sources for a given data store.

        :param data_store_id: ID of the data store
        :param monitor: a progress monitor
        :return: JSON-serializable list of data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id)
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % data_store_id)

        data_sources = data_store.query(monitor=monitor)
        data_source_list = []
        for data_source in data_sources:
            data_source_list.append(
                dict(id=data_source.name,
                     name=data_source.name,
                     meta_info=data_source.meta_info))

        return sorted(data_source_list, key=lambda ds: ds['name'])
Ejemplo n.º 34
0
    def get_data_sources(self, data_store_id: str,
                         monitor: Monitor) -> List[Dict[str, Any]]:
        """
        Get data sources for a given data store.

        :param data_store_id: ID of the data store
        :param monitor: a progress monitor
        :return: JSON-serializable list of data sources, sorted by name.
        """
        data_store = DATA_STORE_REGISTRY.get_data_store(data_store_id)
        if data_store is None:
            raise ValueError('Unknown data store: "%s"' % data_store_id)
        data_sources = data_store.query(monitor=monitor)
        if data_store_id == 'esa_cci_odp_os':
            # Filter ESA Open Data Portal data sources
            data_source_dict = {ds.id: ds for ds in data_sources}
            data_source_ids = list(data_source_dict.keys())
            data_sources = [
                data_source_dict[ds_id] for ds_id in data_source_ids
            ]

        data_sources = sorted(data_sources, key=lambda ds: ds.title or ds.id)

        serialized_data_sources = []
        for data_source in data_sources:
            verification_flags = list(data_source.verification_flags) \
                if data_source.verification_flags is not None else None
            type_specifier = data_source.type_specifier \
                if data_source.type_specifier is not None else None
            serialized_data_sources.append(
                dict(id=data_source.id,
                     title=data_source.title,
                     metaInfo=data_source.meta_info,
                     verificationFlags=verification_flags,
                     typeSpecifier=type_specifier))
        return serialized_data_sources
Ejemplo n.º 35
0
    def make_local(self,
                   local_name: str,
                   local_id: str = None,
                   time_range: TimeRangeLike.TYPE = None,
                   region: PolygonLike.TYPE = None,
                   var_names: VarNamesLike.TYPE = None,
                   monitor: Monitor = Monitor.NONE) -> Optional[DataSource]:

        time_range = TimeRangeLike.convert(time_range) if time_range else None
        region = PolygonLike.convert(region) if region else None
        var_names = VarNamesLike.convert(var_names) if var_names else None

        ds_id = local_name
        title = local_id

        local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            add_to_data_store_registry()
            local_store = DATA_STORE_REGISTRY.get_data_store('local')
        if not local_store:
            raise ValueError('Cannot initialize `local` DataStore')

        uuid = LocalDataStore.generate_uuid(ref_id=self.id,
                                            time_range=time_range,
                                            region=region,
                                            var_names=var_names)

        if not ds_id or len(ds_id) == 0:
            ds_id = "local.{}.{}".format(self.id, uuid)
            existing_ds_list = local_store.query(ds_id=ds_id)
            if len(existing_ds_list) == 1:
                return existing_ds_list[0]
        else:
            existing_ds_list = local_store.query(ds_id='local.%s' % ds_id)
            if len(existing_ds_list) == 1:
                if existing_ds_list[0].meta_info.get('uuid', None) == uuid:
                    return existing_ds_list[0]
                else:
                    raise ValueError(
                        'Datastore {} already contains dataset {}'.format(
                            local_store.id, ds_id))

        local_meta_info = self.meta_info.copy()
        local_meta_info['ref_uuid'] = local_meta_info.get('uuid', None)
        local_meta_info['uuid'] = uuid

        local_ds = local_store.create_data_source(ds_id,
                                                  title=title,
                                                  time_range=time_range,
                                                  region=region,
                                                  var_names=var_names,
                                                  meta_info=local_meta_info,
                                                  lock_file=True)
        if local_ds:
            if not local_ds.is_complete:
                try:
                    self._make_local(local_ds,
                                     time_range,
                                     region,
                                     var_names,
                                     monitor=monitor)
                except Cancellation as c:
                    local_store.remove_data_source(local_ds)
                    raise c
                except Exception as e:
                    if local_ds.is_empty:
                        local_store.remove_data_source(local_ds)
                    raise e

            if local_ds.is_empty:
                local_store.remove_data_source(local_ds)
                return None

            local_store.register_ds(local_ds)
            return local_ds
        else:
            return None
Ejemplo n.º 36
0
def long_term_average(source: str,
                      year_min: int,
                      year_max: int,
                      file: str,
                      var: VarNamesLike.TYPE = None,
                      save: bool = False,
                      monitor: Monitor = Monitor.NONE) -> xr.Dataset:
    """
    Perform the long term monthly average of the given monthly or daily data
    source for the given range of years.

    Depending on the given year range, data size, as well as internet
    connection quality, this operation can potentially take a very long time
    to finish.

    Careful consideration is needed in choosing the var parameter to create
    meaningful outputs. This is unique for each data source.

    :param source: The data source from which to extract the monthly average
    :param year_min: The earliest year of the desired time range
    :param year_max: The most recent year of the desired time range
    :param file: filepath where to save the long term average dataset
    :param var: If given, only these variable names will be preserved in the
    output.
    :param save: If True, saves the data downloaded during this operation. This
    can potentially be a very large amount of data.
    :param monitor: A progress monitor to use
    :return: The Long Term Average dataset.
    """
    var = VarNamesLike.convert(var)

    n_years = year_max - year_min + 1
    res = 0
    total_work = 100

    # Select the appropriate data source
    data_store_list = DATA_STORE_REGISTRY.get_data_stores()
    data_sources = query_data_sources(data_store_list, name=source)
    if len(data_sources) == 0:
        raise ValueError("No data_source found for the given query\
                         term {}".format(source))
    elif len(data_sources) > 1:
        raise ValueError("{} data_sources found for the given query\
                         term {}".format(data_sources, source))

    data_source = data_sources[0]
    source_info = data_source.cache_info

    # Check if we have a monthly data source
    fq = data_source.meta_info['time_frequency']
    if fq != 'mon':
        raise ValueError("Only monthly datasets are supported for time being.")

    with monitor.starting('LTA', total_work=total_work):
        # Set up the monitor
        monitor.progress(work=0)
        step = total_work * 0.9 / n_years

        # Process the data source year by year
        year = year_min
        while year != year_max + 1:

            tmin = "{}-01-01".format(year)
            tmax = "{}-12-31".format(year)

            # Determine if the data for the given year are already downloaded
            # If at least one file of the given time range is present, we
            # don't delete the data for this year, we do the syncing anyway.
            was_already_downloaded = False
            dt_range = to_datetime_range(tmin, tmax)
            for date in source_info:
                if dt_range[0] <= date <= dt_range[1]:
                    was_already_downloaded = True
                    # One is enough
                    break

            worked = monitor._worked
            data_source.sync(dt_range, monitor=monitor.child(work=step * 0.9))
            if worked == monitor._worked:
                monitor.progress(work=step * 0.9)

            ds = data_source.open_dataset(dt_range)

            # Filter the dataset
            ds = select_var(ds, var)

            try:
                if res == 0:
                    res = ds / n_years
                else:
                    # Xarray doesn't do automatic alignment for in place
                    # operations, hence we have to do it manually
                    res = res + ds.reindex_like(res) / n_years
            except TypeError:
                raise TypeError('One or more data arrays feature a dtype that\
                                can not be divided. Consider using the var\
                                parameter to filter the dataset.')

            ds.close()
            # delete data for the current year, if it should be deleted and it
            # was not already downloaded.
            if (not save) and (not was_already_downloaded):
                data_source.delete_local(dt_range)

            monitor.progress(work=step * 0.1)

            year = year + 1

        monitor.progress(msg='Saving the LTA dataset')
        save_dataset(res, file)
        monitor.progress(total_work * 0.1)

    return res
Ejemplo n.º 37
0
 def setUpClass(cls):
     cls._orig_stores = list(DATA_STORE_REGISTRY.get_data_stores())
     DATA_STORE_REGISTRY._data_stores.clear()
     DATA_STORE_REGISTRY.add_data_store(_create_test_data_store())
Ejemplo n.º 38
0
 def tearDown(self):
     if self._existing_local_data_store:
         DATA_STORE_REGISTRY.add_data_store(self._existing_local_data_store)
     shutil.rmtree(self.tmp_dir, ignore_errors=True)
     self.data_store.get_updates(reset=True)
Ejemplo n.º 39
0
 def test_set_default_data_store(self):
     set_default_data_store()
     data_stores_path = get_data_stores_path()
     data_store = DATA_STORE_REGISTRY.get_data_store('esa_cci_ftp')
     self.assertIsInstance(data_store, FileSetDataStore)
     self.assertEqual(data_store.root_dir, os.path.join(data_stores_path, 'esa_cci_ftp'))
Ejemplo n.º 40
0
def add_to_data_store_registry():
    data_store = LocalDataStore('local', get_data_store_path())
    DATA_STORE_REGISTRY.add_data_store(data_store)
Ejemplo n.º 41
0
def get_data_store_ids():
    data_store = DATA_STORE_REGISTRY.get_data_store(ESA_CCI_ODP_DATA_STORE_ID)
    data_sources = data_store.query()
    return data_sources
Ejemplo n.º 42
0
from cate.core.ds import DATA_STORE_REGISTRY
from cate.core.monitor import ConsoleMonitor
import cate.ops as ops
import xarray as xr
from datetime import datetime
import time

# Open with chunks
t = time.time()
monitor = ConsoleMonitor()
data_store = DATA_STORE_REGISTRY.get_data_store('esa_cci_odp')

print('1')
# sst = xr.open_mfdataset('/home/ccitbx/Desktop/sst/*.nc', chunks = {'lat':36,
#                                                             'lon':72,
#                                                             'time':31},
#                       concat_dim = 'time')
sst = xr.open_mfdataset('/home/ccitbx/Desktop/sst/*.nc', concat_dim = 'time')
print(sst.nbytes * (2 ** -30))
print('2')

# sm = xr.open_mfdataset('/home/ccitbx/Desktop/sm/*.nc', chunks = {'lat':72,
#                                                           'lon':144,
#                                                           'time':31},
#                      concat_dim = 'time')
sm = xr.open_mfdataset('/home/ccitbx/Desktop/sm/*.nc', concat_dim = 'time')
print(sm.nbytes * (2 ** -30))
print('3')

sm_mean = sm.mean('time', keep_attrs=True, skipna=True)
print('4')