class AcquisitionRecord(object): """AcquisitionRecord database interface class.""" ACQUISITION_METADATA_FIELDS = [ 'satellite_tag', 'sensor_name', 'x_ref', 'y_ref', 'start_datetime', 'end_datetime', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'gcp_count', 'mtl_text', 'cloud_cover' ] def __init__(self, collection, dataset): self.collection = collection self.datacube = collection.datacube self.db = IngestDBWrapper(self.datacube.db_connection) self.acquisition_dict = {} self.acquisiton_id = None # set below # Fill a dictonary with data for the acquisition. # Start with fields from the dataset metadata. for field in self.ACQUISITION_METADATA_FIELDS: self.acquisition_dict[field] = dataset.metadata_dict[field] # Next look up the satellite_id and sensor_id in the # database and fill these in. self.acquisition_dict['satellite_id'] = \ self.db.get_satellite_id(self.acquisition_dict['satellite_tag']) self.acquisition_dict['sensor_id'] = \ self.db.get_sensor_id(self.acquisition_dict['satellite_id'], self.acquisition_dict['sensor_name']) # Finally look up the acquisiton_id, or create a new record if it # does not exist, and fill it into the dictionary. self.acquisition_id = \ self.db.get_acquisition_id_fuzzy(self.acquisition_dict) if self.acquisition_id is None: self.acquisition_id = \ self.db.insert_acquisition_record(self.acquisition_dict) else: # Do we update the acquisition record here? pass self.acquisition_dict['acquisition_id'] = self.acquisition_id def create_dataset_record(self, dataset): """Factory method to create an instance of the DatasetRecord class. This method creates a new record in the database if one does not already exist. It will overwrite an earlier dataset record (and its tiles) if one exists. It will raise a DatasetError if a later (or equal time) record for this dataset already exists in the database. """ return DatasetRecord(self.collection, self, dataset)
class Collection(object): """Collection database interface class.""" # # Interface methods # def __init__(self, datacube): """Initialise the collection object.""" self.datacube = datacube self.db = IngestDBWrapper(datacube.db_connection) self.new_bands = self.__reindex_bands(datacube.bands) self.transaction_stack = [] self.temp_tile_directory = os.path.join(self.datacube.tile_root, 'ingest_temp', self.datacube.process_id) create_directory(self.temp_tile_directory) def cleanup(self): """Do end-of-process cleanup. Deletes the process-specific temporary dirctory. Does not close the database connection (at present), because the datacube object has a destructor which does that. """ shutil.rmtree(self.temp_tile_directory, ignore_errors=True) @staticmethod def get_dataset_key(dataset): """Return the dataset key for use with the new_bands dictionary. This is a tuple (satellite_tag, sensor_name, processing_level) except that for derived datasets (currently PQA and FC) the satellite_tag is replaced with 'DERIVED' and the processing_level is used as the sensor_name. So the tuple looks like: ('DERIVED', processing_level, processing_level). """ derived_levels = {'PQA', 'FC'} satellite = dataset.get_satellite_tag() sensor = dataset.get_sensor_name() level = dataset.get_processing_level() if level in derived_levels: satellite = 'DERIVED' sensor = level return (satellite, sensor, level) def get_temp_tile_directory(self): """Return a path to a directory for temporary tile related files.""" return self.temp_tile_directory def check_metadata(self, dataset): """Check that the satellite, sensor, and bands are in the database. Checks that the dataset is of a kind that the database knows about (by checking basic metadata), and the bands that the database expects are present. Raises a DatasetError if the checks fail. """ self.__check_satellite_and_sensor(dataset) self.__check_processing_level(dataset) self.__check_bands(dataset) def transaction(self, db=None): """Returns a Transaction context manager object. This is for use in a 'with' statement. It uses the Collection's database collection if one is not provided. """ return Transaction(self.db if db is None else db, self.transaction_stack) def lock_datasets(self, dataset_list): """Returns a Lock context manager object. dataset_list is a list of dataset ids for the datasets to be locked. This is for use in a 'with' statement. It uses the Collection's datacube object to manage the individual locks. """ lock_list = ['Dataset-' + str(dataset_id) for dataset_id in dataset_list] return Lock(self.datacube, lock_list) def create_acquisition_record(self, dataset): """Factory method to create an instance of the AcquisitonRecord class. This method creates a corresponding record in the database if one does not already exist. """ return AcquisitionRecord(self, dataset) def create_tile_contents(self, tile_type_id, tile_footprint, band_stack): """Factory method to create an instance of the TileContents class. The tile_type_dict contains the information required for resampling extents and resolution. """ tile_type_info = self.datacube.tile_type_dict[tile_type_id] tile_contents = TileContents(self.datacube.tile_root, tile_type_info, tile_footprint, band_stack) return tile_contents def current_transaction(self): """Returns the current transaction.""" return self.transaction_stack[-1] def mark_tile_for_removal(self, tile_pathname): """Mark a tile file for removal on transaction commit.""" self.current_transaction().mark_tile_for_removal(tile_pathname) def mark_tile_for_creation(self, tile_contents): """Mark a tile file for creation on transaction commit.""" self.current_transaction().mark_tile_for_creation(tile_contents) # # worker methods # @staticmethod def __reindex_bands(bands): """Reindex the datacube.bands nested dict structure. This method returns the new nested dict which is indexed by: new_bands[dataset_key][tile_type][file_number] where dataset_key is a tuple: (satellite_tag, sensor_name, processing_level). The original indexing is bands[tile_type][satellite_sensor][file_number] where satellite_sensor is a tuple: (satellite_tag, sensor_name) Note that satellite_tag and sensor_name are replaced by 'DERIVED' and the processing_level for PQA and FC datasets. This needs to be taken into account when constructing a dataset_key. """ new_bands = {} for (tile_type, band_dict) in bands.items(): for ((satellite, sensor), sensor_dict) in band_dict.items(): for (file_number, band_info) in sensor_dict.items(): dataset_key = (satellite, sensor, band_info['level_name']) new_bands.setdefault(dataset_key, {}) new_bands[dataset_key].setdefault(tile_type, {}) new_bands[dataset_key][tile_type][file_number] = band_info return new_bands def __check_satellite_and_sensor(self, dataset): """Check that the dataset's satellite and sensor are in the database. Raises a DatasetError if they are not. """ satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag()) if satellite_id is None: raise DatasetError("Unknown satellite tag: '%s'" % dataset.get_satellite_tag()) sensor_id = self.db.get_sensor_id(satellite_id, dataset.get_sensor_name()) if sensor_id is None: msg = ("Unknown satellite and sensor pair: '%s', '%s'" % (dataset.get_satellite_tag(), dataset.get_sensor_name())) raise DatasetError(msg) def __check_processing_level(self, dataset): """Check that the dataset's processing_level is in the database. Raises a DatasetError if it is not. """ level_id = self.db.get_level_id(dataset.get_processing_level()) if level_id is None: raise DatasetError("Unknown processing level: '%s'" % dataset.get_processing_level()) def __check_bands(self, dataset): """Check that the dataset has the expected bands. Raises a DatasetError if any band expected for this dataset (according to the database) is missing. """ try: dataset_bands = self.new_bands[self.get_dataset_key(dataset)] except KeyError: raise DatasetError('No tile types for this dataset.') for tile_type_bands in dataset_bands.values(): for band_info in tile_type_bands.values(): dataset.find_band_file(band_info['file_pattern'])
class Collection(object): """Collection database interface class.""" # # Interface methods # def __init__(self, datacube): """Initialise the collection object.""" self.datacube = datacube self.db = IngestDBWrapper(datacube.db_connection) self.new_bands = self.__reindex_bands(datacube.bands) self.transaction_stack = [] self.temp_tile_directory = os.path.join(self.datacube.tile_root, 'ingest_temp', self.datacube.process_id) create_directory(self.temp_tile_directory) def cleanup(self): """Do end-of-process cleanup. Deletes the process-specific temporary dirctory. Does not close the database connection (at present), because the datacube object has a destructor which does that. """ shutil.rmtree(self.temp_tile_directory, ignore_errors=True) @staticmethod def get_dataset_key(dataset): """Return the dataset key for use with the new_bands dictionary. This is a tuple (satellite_tag, sensor_name, processing_level) except that for derived datasets (currently PQA and FC) the satellite_tag is replaced with 'DERIVED' and the processing_level is used as the sensor_name. So the tuple looks like: ('DERIVED', processing_level, processing_level). """ derived_levels = {'PQA', 'FC'} satellite = dataset.get_satellite_tag() sensor = dataset.get_sensor_name() level = dataset.get_processing_level() if level in derived_levels: satellite = 'DERIVED' sensor = level return (satellite, sensor, level) def get_temp_tile_directory(self): """Return a path to a directory for temporary tile related files.""" return self.temp_tile_directory def check_metadata(self, dataset): """Check that the satellite, sensor, and bands are in the database. Checks that the dataset is of a kind that the database knows about (by checking basic metadata), and the bands that the database expects are present. Raises a DatasetError if the checks fail. """ self.__check_satellite_and_sensor(dataset) self.__check_processing_level(dataset) self.__check_bands(dataset) def transaction(self, db=None): """Returns a Transaction context manager object. This is for use in a 'with' statement. It uses the Collection's database collection if one is not provided. """ return Transaction(self.db if db is None else db, self.transaction_stack) def lock_datasets(self, dataset_list): """Returns a Lock context manager object. dataset_list is a list of dataset ids for the datasets to be locked. This is for use in a 'with' statement. It uses the Collection's datacube object to manage the individual locks. """ lock_list = [ 'Dataset-' + str(dataset_id) for dataset_id in dataset_list ] return Lock(self.datacube, lock_list) def create_acquisition_record(self, dataset): """Factory method to create an instance of the AcquisitonRecord class. This method creates a corresponding record in the database if one does not already exist. """ return AcquisitionRecord(self, dataset) def create_tile_contents(self, tile_type_id, tile_footprint, band_stack): """Factory method to create an instance of the TileContents class. The tile_type_dict contains the information required for resampling extents and resolution. """ tile_type_info = self.datacube.tile_type_dict[tile_type_id] tile_contents = TileContents(self.datacube.tile_root, tile_type_info, tile_footprint, band_stack) return tile_contents def current_transaction(self): """Returns the current transaction.""" return self.transaction_stack[-1] def mark_tile_for_removal(self, tile_pathname): """Mark a tile file for removal on transaction commit.""" self.current_transaction().mark_tile_for_removal(tile_pathname) def mark_tile_for_creation(self, tile_contents): """Mark a tile file for creation on transaction commit.""" self.current_transaction().mark_tile_for_creation(tile_contents) # # worker methods # @staticmethod def __reindex_bands(bands): """Reindex the datacube.bands nested dict structure. This method returns the new nested dict which is indexed by: new_bands[dataset_key][tile_type][file_number] where dataset_key is a tuple: (satellite_tag, sensor_name, processing_level). The original indexing is bands[tile_type][satellite_sensor][file_number] where satellite_sensor is a tuple: (satellite_tag, sensor_name) Note that satellite_tag and sensor_name are replaced by 'DERIVED' and the processing_level for PQA and FC datasets. This needs to be taken into account when constructing a dataset_key. """ new_bands = {} for (tile_type, band_dict) in bands.items(): for ((satellite, sensor), sensor_dict) in band_dict.items(): for (file_number, band_info) in sensor_dict.items(): dataset_key = (satellite, sensor, band_info['level_name']) new_bands.setdefault(dataset_key, {}) new_bands[dataset_key].setdefault(tile_type, {}) new_bands[dataset_key][tile_type][file_number] = band_info return new_bands def __check_satellite_and_sensor(self, dataset): """Check that the dataset's satellite and sensor are in the database. Raises a DatasetError if they are not. """ satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag()) if satellite_id is None: raise DatasetError("Unknown satellite tag: '%s'" % dataset.get_satellite_tag()) sensor_id = self.db.get_sensor_id(satellite_id, dataset.get_sensor_name()) if sensor_id is None: msg = ("Unknown satellite and sensor pair: '%s', '%s'" % (dataset.get_satellite_tag(), dataset.get_sensor_name())) raise DatasetError(msg) def __check_processing_level(self, dataset): """Check that the dataset's processing_level is in the database. Raises a DatasetError if it is not. """ level_id = self.db.get_level_id(dataset.get_processing_level()) if level_id is None: raise DatasetError("Unknown processing level: '%s'" % dataset.get_processing_level()) def __check_bands(self, dataset): """Check that the dataset has the expected bands. Raises a DatasetError if any band expected for this dataset (according to the database) is missing. """ try: dataset_bands = self.new_bands[self.get_dataset_key(dataset)] except KeyError: raise DatasetError('No tile types for this dataset.') for tile_type_bands in dataset_bands.values(): for band_info in tile_type_bands.values(): dataset.find_band_file(band_info['file_pattern'])
class AcquisitionRecord(object): """AcquisitionRecord database interface class.""" ACQUISITION_METADATA_FIELDS = ['satellite_tag', 'sensor_name', 'x_ref', 'y_ref', 'start_datetime', 'end_datetime', 'll_lon', 'll_lat', 'lr_lon', 'lr_lat', 'ul_lon', 'ul_lat', 'ur_lon', 'ur_lat', 'gcp_count', 'mtl_text', 'cloud_cover' ] def __init__(self, collection, dataset): self.collection = collection self.datacube = collection.datacube self.db = IngestDBWrapper(self.datacube.db_connection) self.acquisition_dict = {} self.acquisiton_id = None # set below # Fill a dictonary with data for the acquisition. # Start with fields from the dataset metadata. for field in self.ACQUISITION_METADATA_FIELDS: self.acquisition_dict[field] = dataset.metadata_dict[field] # Next look up the satellite_id and sensor_id in the # database and fill these in. self.acquisition_dict['satellite_id'] = \ self.db.get_satellite_id(self.acquisition_dict['satellite_tag']) self.acquisition_dict['sensor_id'] = \ self.db.get_sensor_id(self.acquisition_dict['satellite_id'], self.acquisition_dict['sensor_name']) # Finally look up the acquisiton_id, or create a new record if it # does not exist, and fill it into the dictionary. self.acquisition_id = \ self.db.get_acquisition_id_fuzzy(self.acquisition_dict) if self.acquisition_id is None: self.acquisition_id = \ self.db.insert_acquisition_record(self.acquisition_dict) else: # Do we update the acquisition record here? pass self.acquisition_dict['acquisition_id'] = self.acquisition_id def create_dataset_record(self, dataset): """Factory method to create an instance of the DatasetRecord class. This method creates a new record in the database if one does not already exist. It will overwrite an earlier dataset record (and its tiles) if one exists. It will raise a DatasetError if a later (or equal time) record for this dataset already exists in the database. """ return DatasetRecord(self.collection, self, dataset)