class DatasetRecord(object): """DatasetRecord database interface class.""" DATASET_METADATA_FIELDS = [ 'dataset_path', 'datetime_processed', 'dataset_size', 'll_x', 'll_y', 'lr_x', 'lr_y', 'ul_x', 'ul_y', 'ur_x', 'ur_y', 'x_pixels', 'y_pixels', 'xml_text' ] def __init__(self, collection, acquisition, dataset): self.collection = collection self.datacube = collection.datacube self.db = IngestDBWrapper(self.datacube.db_connection) dataset_key = collection.get_dataset_key(dataset) self.dataset_bands = collection.new_bands[dataset_key] self.dataset = dataset self.mdd = dataset.metadata_dict self.dataset_dict = {} for field in self.DATASET_METADATA_FIELDS: self.dataset_dict[field] = self.mdd[field] self.dataset_dict['acquisition_id'] = acquisition.acquisition_id self.dataset_dict['crs'] = self.mdd['projection'] self.dataset_dict['level_name'] = self.mdd['processing_level'] self.dataset_dict['level_id'] = \ self.db.get_level_id(self.dataset_dict['level_name']) self.dataset_dict['dataset_id'] = \ self.db.get_dataset_id(self.dataset_dict) if self.dataset_dict['dataset_id'] is None: # create a new dataset record in the database self.dataset_dict['dataset_id'] = \ self.db.insert_dataset_record(self.dataset_dict) self.needs_update = False else: # check that the old dataset record can be updated self.__check_update_ok() self.needs_update = True self.dataset_id = self.dataset_dict['dataset_id'] def remove_mosaics(self, dataset_filter): """Remove mosaics associated with the dataset. This will mark mosaic files for removal, delete mosaic database records if they exist, and update the tile class of overlapping tiles (from other datasets) to reflect the lack of a mosaic. The 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # remove new mosaics (those with database records) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC), output_tile_class_filter=(TC_MOSAIC, ), dataset_filter=dataset_filter) for tile_record_list in overlap_dict.values(): for tr in tile_record_list: self.db.remove_tile_record(tr['tile_id']) self.collection.mark_tile_for_removal(tr['tile_pathname']) # build a dictionary of overlaps (ignoring mosaics) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter) # update tile classes for overlap tiles from other datasets for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to update a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") for tr in tile_record_list: if tr['dataset_id'] != self.dataset_id: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE) # remove old mosaics (those without database records) for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 1: # tile_record_list is sorted by acquisition start time, so # the first record should be the one the mosaic filename is # based on. tr = tile_record_list[0] mosaic_pathname = \ self.__make_mosaic_pathname(tr['tile_pathname']) if os.path.isfile(mosaic_pathname): self.collection.mark_tile_for_removal(mosaic_pathname) def remove_tiles(self): """Remove the tiles associated with the dataset. This will remove ALL the tiles belonging to this dataset, deleting database records and marking tile files for removal on commit. Mosaics should be removed BEFORE calling this (as it will delete the tiles needed to figure out the overlaps, but may not delete all the mosaics). """ tile_list = self.db.get_dataset_tile_ids(self.dataset_id) for tile_id in tile_list: tile_pathname = self.db.get_tile_pathname(tile_id) self.db.remove_tile_record(tile_id) self.collection.mark_tile_for_removal(tile_pathname) def update(self): """Update the dataset record in the database. This first checks that the new dataset is more recent than the record in the database. If not it raises a dataset error. """ self.__check_update_ok() self.db.update_dataset_record(self.dataset_dict) def make_tiles(self, tile_type_id, band_stack): """Tile the dataset, returning a list of tile_content objects.""" tile_list = [] tile_footprint_list = self.get_coverage(tile_type_id) for tile_footprint in tile_footprint_list: tile_contents = self.collection.create_tile_contents( tile_type_id, tile_footprint, band_stack) tile_contents.reproject() if tile_contents.has_data(): tile_list.append(tile_contents) else: tile_contents.remove() return tile_list def store_tiles(self, tile_list): """Store tiles in the database and file store. 'tile_list' is a list of tile_contents objects. This method will create the corrisponding database records and mark tiles for creation when the transaction commits. """ tile_record_list = [] for tile_contents in tile_list: tile_record = self.create_tile_record(tile_contents) tile_record_list.append(tile_record) return tile_record_list def create_mosaics(self, dataset_filter): """Create mosaics associated with the dataset. 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # Build a dictionary of overlaps (ignoring mosaics, including pending). overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter) # Make mosaics and update tile classes as needed. for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to create a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") elif len(tile_record_list) == 2: self.__make_one_mosaic(tile_record_list) for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED) else: for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE) def get_removal_overlaps(self): """Returns a list of overlapping dataset ids for mosaic removal.""" tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC) return self.get_overlaps(tile_class_filter) def get_creation_overlaps(self): """Returns a list of overlapping dataset_ids for mosaic creation.""" tile_class_filter = (TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED) return self.get_overlaps(tile_class_filter) def get_overlaps(self, tile_class_filter): """Returns a list of overlapping dataset ids, including this dataset. A dataset is overlapping if it contains tiles that overlap with tiles belonging to this dataset. Only tiles in the tile_class_filter are considered. """ dataset_list = self.db.get_overlapping_dataset_ids( self.dataset_id, tile_class_filter=tile_class_filter) if not dataset_list: dataset_list = [self.dataset_id] return dataset_list def create_tile_record(self, tile_contents): """Factory method to create an instance of the TileRecord class. The created object will be responsible for inserting tile table records into the database for reprojected or mosaiced tiles.""" self.collection.mark_tile_for_creation(tile_contents) return TileRecord(self.collection, self, tile_contents) def mark_as_tiled(self): """Flag the dataset record as tiled in the database. This flag does not exist in the current database schema, so this method does nothing at the moment.""" pass def list_tile_types(self): """Returns a list of the tile type ids for this dataset.""" return self.dataset_bands.keys() def get_tile_bands(self, tile_type_id): """Returns a dictionary containing the band info for one tile type. The tile_type_id must valid for this dataset, available from list_tile_types above. """ return self.dataset_bands[tile_type_id] def get_coverage(self, tile_type_id): """Given the coordinate reference system of the dataset and that of the tile_type_id, return a list of tiles within the dataset footprint""" tile_type_info = self.collection.datacube.tile_type_dict[tile_type_id] #Get geospatial information from the dataset. dataset_crs = self.mdd['projection'] dataset_geotransform = self.mdd['geo_transform'] pixels = self.mdd['x_pixels'] lines = self.mdd['y_pixels'] #Look up the datacube's projection information for this tile_type tile_crs = tile_type_info['crs'] #Get the transformation between the two projections transformation = self.define_transformation(dataset_crs, tile_crs) #Determine the bounding quadrilateral of the dataset extent #in tile coordinates dataset_bbox = self.get_bbox(transformation, dataset_geotransform, pixels, lines) #Determine maximum inner rectangle, which is guaranteed to need tiling #and the minimum outer rectangle outside which no tiles will exist. cube_origin = (tile_type_info['x_origin'], tile_type_info['y_origin']) cube_tile_size = (tile_type_info['x_size'], tile_type_info['y_size']) coverage = self.get_touched_tiles(dataset_bbox, cube_origin, cube_tile_size) return coverage # # worker methods # def __check_update_ok(self): """Checks if an update is possible, raises a DatasetError otherwise.""" tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED) if self.db.dataset_older_than_database( self.dataset_dict['dataset_id'], self.dataset_dict['datetime_processed'], tile_class_filter): raise DatasetError("Dataset to be ingested is older than " + "the version in the database.") def __make_one_mosaic(self, tile_record_list): """Create a single mosaic. This create the mosaic contents, creates the database record, and marks the mosaic contents for creation on transaction commit. """ mosaic = MosaicContents(tile_record_list, self.datacube.tile_type_dict, self.dataset_dict['level_name'], self.collection.get_temp_tile_directory()) mosaic.create_record(self.db) self.collection.mark_tile_for_creation(mosaic) def __make_mosaic_pathname(self, tile_pathname): """Return the pathname of the mosaic corrisponding to a tile.""" (tile_dir, tile_basename) = os.path.split(tile_pathname) mosaic_dir = os.path.join(tile_dir, 'mosaic_cache') if self.dataset_dict['level_name'] == 'PQA': mosaic_basename = tile_basename else: mosaic_basename = re.sub(r'\.\w+$', '.vrt', tile_basename) return os.path.join(mosaic_dir, mosaic_basename) # # Worker methods for coverage. # # These are public so that they can be called by test_dataset_record. # def define_transformation(self, dataset_crs, tile_crs): """Return the transformation between dataset_crs and tile_crs projections""" osr.UseExceptions() try: dataset_spatial_reference = self.create_spatial_ref(dataset_crs) tile_spatial_reference = self.create_spatial_ref(tile_crs) if dataset_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(dataset_crs)) if tile_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(tile_crs)) return osr.CoordinateTransformation(dataset_spatial_reference, tile_spatial_reference) except Exception: raise DatasetError('Coordinate transformation error ' + 'for transforming %s to %s' % (str(dataset_crs), str(tile_crs))) @staticmethod def create_spatial_ref(crs): """Create a spatial reference system for projecton crs. Called by define_transformation()""" # pylint: disable=broad-except osr.UseExceptions() try: spatial_ref = osr.SpatialReference() except Exception: raise DatasetError('No spatial reference done for %s' % str(crs)) try: spatial_ref.ImportFromWkt(crs) return spatial_ref except Exception: pass try: matchobj = re.match(r'EPSG:(\d+)', crs) epsg_code = int(matchobj.group(1)) spatial_ref.ImportFromEPSG(epsg_code) return spatial_ref except Exception: return None @staticmethod def get_bbox(transform, geotrans, pixels, lines): """Return the coordinates of the dataset footprint in clockwise order from upper-left""" xul, yul, dummy_z = \ transform.TransformPoint(geotrans[0], geotrans[3], 0) xur, yur, dummy_z = \ transform.TransformPoint(geotrans[0] + geotrans[1] * pixels, geotrans[3] + geotrans[4] * pixels, 0) xll, yll, dummy_z = \ transform.TransformPoint(geotrans[0] + geotrans[2] * lines, geotrans[3] + geotrans[5] * lines, 0) xlr, ylr, dummy_z = \ transform.TransformPoint( geotrans[0] + geotrans[1] * pixels + geotrans[2] * lines, geotrans[3] + geotrans[4] * pixels + geotrans[5] * lines, 0) return [(xul, yul), (xur, yur), (xlr, ylr), (xll, yll)] def get_touched_tiles(self, dataset_bbox, cube_origin, cube_tile_size): """Return a list of tuples (itile, jtile) comprising all tiles footprints that intersect the dataset bounding box""" definite_tiles, possible_tiles = \ self.get_definite_and_possible_tiles(dataset_bbox, cube_origin, cube_tile_size) coverage_set = definite_tiles #Check possible tiles: #Check if the tile perimeter intersects the dataset bbox perimeter: intersected_tiles = \ self.get_intersected_tiles(possible_tiles, dataset_bbox, cube_origin, cube_tile_size) coverage_set = coverage_set.union(intersected_tiles) possible_tiles = possible_tiles.difference(intersected_tiles) #Otherwise the tile might be wholly contained in the dataset bbox contained_tiles = \ self.get_contained_tiles(possible_tiles, dataset_bbox, cube_origin, cube_tile_size) coverage_set = coverage_set.union(contained_tiles) return coverage_set @staticmethod def get_definite_and_possible_tiles(bbox, cube_origin, cube_tile_size): """Return two lists of tile footprints: from the largest rectangle wholly contained within the dataset bbox and the smallest rectangle containing the bbox.""" #pylint: disable=too-many-locals #unpack the bbox vertices in clockwise order from upper-left xyul, xyur, xylr, xyll = bbox xul, yul = xyul xur, yur = xyur xlr, ylr = xylr xll, yll = xyll #unpack the origin of the tiled datacube (e.g. lat=0, lon=0) and the #datacube tile size xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size #Define the largest rectangle wholly contained within footprint xmin = max(xll, xul) xmax = min(xlr, xur) ymin = max(yll, ylr) ymax = min(yul, yur) xmin_index = int(floor((xmin - xorigin) / xsize)) xmax_index = int(floor((xmax - xorigin) / xsize)) ymin_index = int(floor((ymin - yorigin) / ysize)) ymax_index = int(floor((ymax - yorigin) / ysize)) definite_tiles = set([(itile, jtile) for itile in range(xmin_index, xmax_index + 1) for jtile in range(ymin_index, ymax_index + 1)]) #Define the smallest rectangle which is guaranteed to include all tiles #in the foorprint. xmin = min(xll, xul) xmax = max(xlr, xur) ymin = min(yll, ylr) ymax = max(yul, yur) xmin_index = int(floor((xmin - xorigin) / xsize)) xmax_index = int(floor((xmax - xorigin) / xsize)) ymin_index = int(floor((ymin - yorigin) / ysize)) ymax_index = int(floor((ymax - yorigin) / ysize)) possible_tiles = set([(itile, jtile) for itile in range(xmin_index, xmax_index + 1) for jtile in range(ymin_index, ymax_index + 1) ]).difference(definite_tiles) return (definite_tiles, possible_tiles) def get_intersected_tiles(self, candidate_tiles, dset_bbox, cube_origin, cube_tile_size): """Return the subset of candidate_tiles that have an intersection with the dataset bounding box""" #pylint: disable=too-many-locals xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size keep_list = [] for itile, jtile in candidate_tiles: intersection_exists = False (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize) tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize), (x0, y0 - ysize)] tile_vtx_number = len(tile_bbox) dset_vtx_number = len(dset_bbox) for tile_vtx in range(tile_vtx_number): x1, y1 = tile_bbox[tile_vtx] x2, y2 = tile_bbox[(tile_vtx + 1) % tile_vtx_number] for dset_vtx in range(dset_vtx_number): x3, y3 = dset_bbox[dset_vtx] x4, y4 = dset_bbox[(dset_vtx + 1) % dset_vtx_number] xcoords = [x1, x2, x3, x4] ycoords = [y1, y2, y3, y4] intersection_exists = \ self.check_intersection(xcoords, ycoords) if intersection_exists: keep_list.append((itile, jtile)) break if intersection_exists: break return set(keep_list) @staticmethod def get_contained_tiles(candidate_tiles, dset_bbox, cube_origin, cube_tile_size): """Return the subset of candidate tiles that lie wholly within the dataset bounding box""" #pylint: disable=too-many-locals xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size keep_list = [] for itile, jtile in candidate_tiles: tile_vtx_inside = [] (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize) tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize), (x0, y0 - ysize)] dset_vtx_number = len(dset_bbox) for x, y in tile_bbox: #Check if this vertex lies within the dataset bounding box: winding_number = 0 for dset_vtx in range(dset_vtx_number): x1, y1 = dset_bbox[dset_vtx] x2, y2 = dset_bbox[(dset_vtx + 1) % dset_vtx_number] if y >= y1 and y < y2: if (x - x1) * (y2 - y1) > (x2 - x1) * (y - y1): winding_number += 1 elif y <= y1 and y > y2: if (x - x1) * (y2 - y1) < (x2 - x1) * (y - y1): winding_number += 1 tile_vtx_inside.append(winding_number % 2 == 1) if tile_vtx_inside.count(True) == len(tile_bbox): keep_list.append((itile, jtile)) assert tile_vtx_inside.count(True) == 4 or \ tile_vtx_inside.count(True) == 0, \ "Tile partially inside dataset bounding box but has" \ "no intersection" return set(keep_list) @staticmethod def check_intersection(xpts, ypts): """Determines if the line segments (xpts[0], ypts[0]) to (xpts[1], ypts[1]) and (xpts[2], ypts[2]) to (xpts[3], ypts[3]) intersect""" pvec = (xpts[0], ypts[0]) qvec = (xpts[2], ypts[2]) rvec = (xpts[1] - xpts[0], ypts[1] - ypts[0]) svec = (xpts[3] - xpts[2], ypts[3] - ypts[2]) rvec_cross_svec = rvec[0] * svec[1] - rvec[1] * svec[0] if rvec_cross_svec == 0: return False qminusp_cross_svec = \ (qvec[0] - pvec[0]) * svec[1] - (qvec[1] - pvec[1]) * svec[0] qminusp_cross_rvec = \ (qvec[0] - pvec[0]) * rvec[1] - (qvec[1] - pvec[1]) * rvec[0] tparameter = qminusp_cross_svec / rvec_cross_svec uparameter = qminusp_cross_rvec / rvec_cross_svec if tparameter > 0 and tparameter < 1 and \ uparameter > 0 and uparameter < 1: return True
class Collection(object): """Collection database interface class.""" # # Interface methods # def __init__(self, datacube): """Initialise the collection object.""" self.datacube = datacube self.db = IngestDBWrapper(datacube.db_connection) self.new_bands = self.__reindex_bands(datacube.bands) self.transaction_stack = [] self.temp_tile_directory = os.path.join(self.datacube.tile_root, 'ingest_temp', self.datacube.process_id) create_directory(self.temp_tile_directory) def cleanup(self): """Do end-of-process cleanup. Deletes the process-specific temporary dirctory. Does not close the database connection (at present), because the datacube object has a destructor which does that. """ shutil.rmtree(self.temp_tile_directory, ignore_errors=True) @staticmethod def get_dataset_key(dataset): """Return the dataset key for use with the new_bands dictionary. This is a tuple (satellite_tag, sensor_name, processing_level) except that for derived datasets (currently PQA and FC) the satellite_tag is replaced with 'DERIVED' and the processing_level is used as the sensor_name. So the tuple looks like: ('DERIVED', processing_level, processing_level). """ derived_levels = {'PQA', 'FC'} satellite = dataset.get_satellite_tag() sensor = dataset.get_sensor_name() level = dataset.get_processing_level() if level in derived_levels: satellite = 'DERIVED' sensor = level return (satellite, sensor, level) def get_temp_tile_directory(self): """Return a path to a directory for temporary tile related files.""" return self.temp_tile_directory def check_metadata(self, dataset): """Check that the satellite, sensor, and bands are in the database. Checks that the dataset is of a kind that the database knows about (by checking basic metadata), and the bands that the database expects are present. Raises a DatasetError if the checks fail. """ self.__check_satellite_and_sensor(dataset) self.__check_processing_level(dataset) self.__check_bands(dataset) def transaction(self, db=None): """Returns a Transaction context manager object. This is for use in a 'with' statement. It uses the Collection's database collection if one is not provided. """ return Transaction(self.db if db is None else db, self.transaction_stack) def lock_datasets(self, dataset_list): """Returns a Lock context manager object. dataset_list is a list of dataset ids for the datasets to be locked. This is for use in a 'with' statement. It uses the Collection's datacube object to manage the individual locks. """ lock_list = [ 'Dataset-' + str(dataset_id) for dataset_id in dataset_list ] return Lock(self.datacube, lock_list) def create_acquisition_record(self, dataset): """Factory method to create an instance of the AcquisitonRecord class. This method creates a corresponding record in the database if one does not already exist. """ return AcquisitionRecord(self, dataset) def create_tile_contents(self, tile_type_id, tile_footprint, band_stack): """Factory method to create an instance of the TileContents class. The tile_type_dict contains the information required for resampling extents and resolution. """ tile_type_info = self.datacube.tile_type_dict[tile_type_id] tile_contents = TileContents(self.datacube.tile_root, tile_type_info, tile_footprint, band_stack) return tile_contents def current_transaction(self): """Returns the current transaction.""" return self.transaction_stack[-1] def mark_tile_for_removal(self, tile_pathname): """Mark a tile file for removal on transaction commit.""" self.current_transaction().mark_tile_for_removal(tile_pathname) def mark_tile_for_creation(self, tile_contents): """Mark a tile file for creation on transaction commit.""" self.current_transaction().mark_tile_for_creation(tile_contents) # # worker methods # @staticmethod def __reindex_bands(bands): """Reindex the datacube.bands nested dict structure. This method returns the new nested dict which is indexed by: new_bands[dataset_key][tile_type][file_number] where dataset_key is a tuple: (satellite_tag, sensor_name, processing_level). The original indexing is bands[tile_type][satellite_sensor][file_number] where satellite_sensor is a tuple: (satellite_tag, sensor_name) Note that satellite_tag and sensor_name are replaced by 'DERIVED' and the processing_level for PQA and FC datasets. This needs to be taken into account when constructing a dataset_key. """ new_bands = {} for (tile_type, band_dict) in bands.items(): for ((satellite, sensor), sensor_dict) in band_dict.items(): for (file_number, band_info) in sensor_dict.items(): dataset_key = (satellite, sensor, band_info['level_name']) new_bands.setdefault(dataset_key, {}) new_bands[dataset_key].setdefault(tile_type, {}) new_bands[dataset_key][tile_type][file_number] = band_info return new_bands def __check_satellite_and_sensor(self, dataset): """Check that the dataset's satellite and sensor are in the database. Raises a DatasetError if they are not. """ satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag()) if satellite_id is None: raise DatasetError("Unknown satellite tag: '%s'" % dataset.get_satellite_tag()) sensor_id = self.db.get_sensor_id(satellite_id, dataset.get_sensor_name()) if sensor_id is None: msg = ("Unknown satellite and sensor pair: '%s', '%s'" % (dataset.get_satellite_tag(), dataset.get_sensor_name())) raise DatasetError(msg) def __check_processing_level(self, dataset): """Check that the dataset's processing_level is in the database. Raises a DatasetError if it is not. """ level_id = self.db.get_level_id(dataset.get_processing_level()) if level_id is None: raise DatasetError("Unknown processing level: '%s'" % dataset.get_processing_level()) def __check_bands(self, dataset): """Check that the dataset has the expected bands. Raises a DatasetError if any band expected for this dataset (according to the database) is missing. """ try: dataset_bands = self.new_bands[self.get_dataset_key(dataset)] except KeyError: raise DatasetError('No tile types for this dataset.') for tile_type_bands in dataset_bands.values(): for band_info in tile_type_bands.values(): dataset.find_band_file(band_info['file_pattern'])
class Collection(object): """Collection database interface class.""" # # Interface methods # def __init__(self, datacube): """Initialise the collection object.""" self.datacube = datacube self.db = IngestDBWrapper(datacube.db_connection) self.new_bands = self.__reindex_bands(datacube.bands) self.transaction_stack = [] self.temp_tile_directory = os.path.join(self.datacube.tile_root, 'ingest_temp', self.datacube.process_id) create_directory(self.temp_tile_directory) def cleanup(self): """Do end-of-process cleanup. Deletes the process-specific temporary dirctory. Does not close the database connection (at present), because the datacube object has a destructor which does that. """ shutil.rmtree(self.temp_tile_directory, ignore_errors=True) @staticmethod def get_dataset_key(dataset): """Return the dataset key for use with the new_bands dictionary. This is a tuple (satellite_tag, sensor_name, processing_level) except that for derived datasets (currently PQA and FC) the satellite_tag is replaced with 'DERIVED' and the processing_level is used as the sensor_name. So the tuple looks like: ('DERIVED', processing_level, processing_level). """ derived_levels = {'PQA', 'FC'} satellite = dataset.get_satellite_tag() sensor = dataset.get_sensor_name() level = dataset.get_processing_level() if level in derived_levels: satellite = 'DERIVED' sensor = level return (satellite, sensor, level) def get_temp_tile_directory(self): """Return a path to a directory for temporary tile related files.""" return self.temp_tile_directory def check_metadata(self, dataset): """Check that the satellite, sensor, and bands are in the database. Checks that the dataset is of a kind that the database knows about (by checking basic metadata), and the bands that the database expects are present. Raises a DatasetError if the checks fail. """ self.__check_satellite_and_sensor(dataset) self.__check_processing_level(dataset) self.__check_bands(dataset) def transaction(self, db=None): """Returns a Transaction context manager object. This is for use in a 'with' statement. It uses the Collection's database collection if one is not provided. """ return Transaction(self.db if db is None else db, self.transaction_stack) def lock_datasets(self, dataset_list): """Returns a Lock context manager object. dataset_list is a list of dataset ids for the datasets to be locked. This is for use in a 'with' statement. It uses the Collection's datacube object to manage the individual locks. """ lock_list = ['Dataset-' + str(dataset_id) for dataset_id in dataset_list] return Lock(self.datacube, lock_list) def create_acquisition_record(self, dataset): """Factory method to create an instance of the AcquisitonRecord class. This method creates a corresponding record in the database if one does not already exist. """ return AcquisitionRecord(self, dataset) def create_tile_contents(self, tile_type_id, tile_footprint, band_stack): """Factory method to create an instance of the TileContents class. The tile_type_dict contains the information required for resampling extents and resolution. """ tile_type_info = self.datacube.tile_type_dict[tile_type_id] tile_contents = TileContents(self.datacube.tile_root, tile_type_info, tile_footprint, band_stack) return tile_contents def current_transaction(self): """Returns the current transaction.""" return self.transaction_stack[-1] def mark_tile_for_removal(self, tile_pathname): """Mark a tile file for removal on transaction commit.""" self.current_transaction().mark_tile_for_removal(tile_pathname) def mark_tile_for_creation(self, tile_contents): """Mark a tile file for creation on transaction commit.""" self.current_transaction().mark_tile_for_creation(tile_contents) # # worker methods # @staticmethod def __reindex_bands(bands): """Reindex the datacube.bands nested dict structure. This method returns the new nested dict which is indexed by: new_bands[dataset_key][tile_type][file_number] where dataset_key is a tuple: (satellite_tag, sensor_name, processing_level). The original indexing is bands[tile_type][satellite_sensor][file_number] where satellite_sensor is a tuple: (satellite_tag, sensor_name) Note that satellite_tag and sensor_name are replaced by 'DERIVED' and the processing_level for PQA and FC datasets. This needs to be taken into account when constructing a dataset_key. """ new_bands = {} for (tile_type, band_dict) in bands.items(): for ((satellite, sensor), sensor_dict) in band_dict.items(): for (file_number, band_info) in sensor_dict.items(): dataset_key = (satellite, sensor, band_info['level_name']) new_bands.setdefault(dataset_key, {}) new_bands[dataset_key].setdefault(tile_type, {}) new_bands[dataset_key][tile_type][file_number] = band_info return new_bands def __check_satellite_and_sensor(self, dataset): """Check that the dataset's satellite and sensor are in the database. Raises a DatasetError if they are not. """ satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag()) if satellite_id is None: raise DatasetError("Unknown satellite tag: '%s'" % dataset.get_satellite_tag()) sensor_id = self.db.get_sensor_id(satellite_id, dataset.get_sensor_name()) if sensor_id is None: msg = ("Unknown satellite and sensor pair: '%s', '%s'" % (dataset.get_satellite_tag(), dataset.get_sensor_name())) raise DatasetError(msg) def __check_processing_level(self, dataset): """Check that the dataset's processing_level is in the database. Raises a DatasetError if it is not. """ level_id = self.db.get_level_id(dataset.get_processing_level()) if level_id is None: raise DatasetError("Unknown processing level: '%s'" % dataset.get_processing_level()) def __check_bands(self, dataset): """Check that the dataset has the expected bands. Raises a DatasetError if any band expected for this dataset (according to the database) is missing. """ try: dataset_bands = self.new_bands[self.get_dataset_key(dataset)] except KeyError: raise DatasetError('No tile types for this dataset.') for tile_type_bands in dataset_bands.values(): for band_info in tile_type_bands.values(): dataset.find_band_file(band_info['file_pattern'])
class DatasetRecord(object): """DatasetRecord database interface class.""" DATASET_METADATA_FIELDS = ['dataset_path', 'datetime_processed', 'dataset_size', 'll_x', 'll_y', 'lr_x', 'lr_y', 'ul_x', 'ul_y', 'ur_x', 'ur_y', 'x_pixels', 'y_pixels', 'xml_text' ] def __init__(self, collection, acquisition, dataset): self.collection = collection self.datacube = collection.datacube self.db = IngestDBWrapper(self.datacube.db_connection) dataset_key = collection.get_dataset_key(dataset) self.dataset_bands = collection.new_bands[dataset_key] self.dataset = dataset self.mdd = dataset.metadata_dict self.dataset_dict = {} for field in self.DATASET_METADATA_FIELDS: self.dataset_dict[field] = self.mdd[field] self.dataset_dict['acquisition_id'] = acquisition.acquisition_id self.dataset_dict['crs'] = self.mdd['projection'] self.dataset_dict['level_name'] = self.mdd['processing_level'] self.dataset_dict['level_id'] = \ self.db.get_level_id(self.dataset_dict['level_name']) self.dataset_dict['dataset_id'] = \ self.db.get_dataset_id(self.dataset_dict) if self.dataset_dict['dataset_id'] is None: # create a new dataset record in the database self.dataset_dict['dataset_id'] = \ self.db.insert_dataset_record(self.dataset_dict) self.needs_update = False else: # check that the old dataset record can be updated self.__check_update_ok() self.needs_update = True self.dataset_id = self.dataset_dict['dataset_id'] def remove_mosaics(self, dataset_filter): """Remove mosaics associated with the dataset. This will mark mosaic files for removal, delete mosaic database records if they exist, and update the tile class of overlapping tiles (from other datasets) to reflect the lack of a mosaic. The 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # remove new mosaics (those with database records) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC), output_tile_class_filter=(TC_MOSAIC,), dataset_filter=dataset_filter ) for tile_record_list in overlap_dict.values(): for tr in tile_record_list: self.db.remove_tile_record(tr['tile_id']) self.collection.mark_tile_for_removal(tr['tile_pathname']) # build a dictionary of overlaps (ignoring mosaics) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter ) # update tile classes for overlap tiles from other datasets for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to update a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") for tr in tile_record_list: if tr['dataset_id'] != self.dataset_id: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE) # remove old mosaics (those without database records) for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 1: # tile_record_list is sorted by acquisition start time, so # the first record should be the one the mosaic filename is # based on. tr = tile_record_list[0] mosaic_pathname = \ self.__make_mosaic_pathname(tr['tile_pathname']) if os.path.isfile(mosaic_pathname): self.collection.mark_tile_for_removal(mosaic_pathname) def remove_tiles(self): """Remove the tiles associated with the dataset. This will remove ALL the tiles belonging to this dataset, deleting database records and marking tile files for removal on commit. Mosaics should be removed BEFORE calling this (as it will delete the tiles needed to figure out the overlaps, but may not delete all the mosaics). """ tile_list = self.db.get_dataset_tile_ids(self.dataset_id) for tile_id in tile_list: tile_pathname = self.db.get_tile_pathname(tile_id) self.db.remove_tile_record(tile_id) self.collection.mark_tile_for_removal(tile_pathname) def update(self): """Update the dataset record in the database. This first checks that the new dataset is more recent than the record in the database. If not it raises a dataset error. """ self.__check_update_ok() self.db.update_dataset_record(self.dataset_dict) def make_tiles(self, tile_type_id, band_stack): """Tile the dataset, returning a list of tile_content objects.""" tile_list = [] tile_footprint_list = self.get_coverage(tile_type_id) for tile_footprint in tile_footprint_list: tile_contents = self.collection.create_tile_contents( tile_type_id, tile_footprint, band_stack ) tile_contents.reproject() if tile_contents.has_data(): tile_list.append(tile_contents) else: tile_contents.remove() return tile_list def store_tiles(self, tile_list): """Store tiles in the database and file store. 'tile_list' is a list of tile_contents objects. This method will create the corrisponding database records and mark tiles for creation when the transaction commits. """ tile_record_list = [] for tile_contents in tile_list: tile_record = self.create_tile_record(tile_contents) tile_record_list.append(tile_record) return tile_record_list def create_mosaics(self, dataset_filter): """Create mosaics associated with the dataset. 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # Build a dictionary of overlaps (ignoring mosaics, including pending). overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter ) # Make mosaics and update tile classes as needed. for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to create a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") elif len(tile_record_list) == 2: self.__make_one_mosaic(tile_record_list) for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED) else: for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE) def get_removal_overlaps(self): """Returns a list of overlapping dataset ids for mosaic removal.""" tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC) return self.get_overlaps(tile_class_filter) def get_creation_overlaps(self): """Returns a list of overlapping dataset_ids for mosaic creation.""" tile_class_filter = (TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED) return self.get_overlaps(tile_class_filter) def get_overlaps(self, tile_class_filter): """Returns a list of overlapping dataset ids, including this dataset. A dataset is overlapping if it contains tiles that overlap with tiles belonging to this dataset. Only tiles in the tile_class_filter are considered. """ dataset_list = self.db.get_overlapping_dataset_ids( self.dataset_id, tile_class_filter=tile_class_filter ) if not dataset_list: dataset_list = [self.dataset_id] return dataset_list def create_tile_record(self, tile_contents): """Factory method to create an instance of the TileRecord class. The created object will be responsible for inserting tile table records into the database for reprojected or mosaiced tiles.""" self.collection.mark_tile_for_creation(tile_contents) return TileRecord(self.collection, self, tile_contents) def mark_as_tiled(self): """Flag the dataset record as tiled in the database. This flag does not exist in the current database schema, so this method does nothing at the moment.""" pass def list_tile_types(self): """Returns a list of the tile type ids for this dataset.""" return self.dataset_bands.keys() def get_tile_bands(self, tile_type_id): """Returns a dictionary containing the band info for one tile type. The tile_type_id must valid for this dataset, available from list_tile_types above. """ return self.dataset_bands[tile_type_id] def get_coverage(self, tile_type_id): """Given the coordinate reference system of the dataset and that of the tile_type_id, return a list of tiles within the dataset footprint""" tile_type_info = self.collection.datacube.tile_type_dict[tile_type_id] #Get geospatial information from the dataset. dataset_crs = self.mdd['projection'] dataset_geotransform = self.mdd['geo_transform'] pixels = self.mdd['x_pixels'] lines = self.mdd['y_pixels'] #Look up the datacube's projection information for this tile_type tile_crs = tile_type_info['crs'] #Get the transformation between the two projections transformation = self.define_transformation(dataset_crs, tile_crs) #Determine the bounding quadrilateral of the dataset extent #in tile coordinates dataset_bbox = self.get_bbox(transformation, dataset_geotransform, pixels, lines) #Determine maximum inner rectangle, which is guaranteed to need tiling #and the minimum outer rectangle outside which no tiles will exist. cube_origin = (tile_type_info['x_origin'], tile_type_info['y_origin']) cube_tile_size = (tile_type_info['x_size'], tile_type_info['y_size']) coverage = self.get_touched_tiles(dataset_bbox, cube_origin, cube_tile_size) return coverage # # worker methods # def __check_update_ok(self): """Checks if an update is possible, raises a DatasetError otherwise.""" tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED) if self.db.dataset_older_than_database( self.dataset_dict['dataset_id'], self.dataset_dict['datetime_processed'], tile_class_filter): raise DatasetError("Dataset to be ingested is older than " + "the version in the database.") def __make_one_mosaic(self, tile_record_list): """Create a single mosaic. This create the mosaic contents, creates the database record, and marks the mosaic contents for creation on transaction commit. """ mosaic = MosaicContents( tile_record_list, self.datacube.tile_type_dict, self.dataset_dict['level_name'], self.collection.get_temp_tile_directory() ) mosaic.create_record(self.db) self.collection.mark_tile_for_creation(mosaic) def __make_mosaic_pathname(self, tile_pathname): """Return the pathname of the mosaic corrisponding to a tile.""" (tile_dir, tile_basename) = os.path.split(tile_pathname) mosaic_dir = os.path.join(tile_dir, 'mosaic_cache') if self.dataset_dict['level_name'] == 'PQA': mosaic_basename = tile_basename else: mosaic_basename = re.sub(r'\.\w+$', '.vrt', tile_basename) return os.path.join(mosaic_dir, mosaic_basename) # # Worker methods for coverage. # # These are public so that they can be called by test_dataset_record. # def define_transformation(self, dataset_crs, tile_crs): """Return the transformation between dataset_crs and tile_crs projections""" osr.UseExceptions() try: dataset_spatial_reference = self.create_spatial_ref(dataset_crs) tile_spatial_reference = self.create_spatial_ref(tile_crs) if dataset_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(dataset_crs)) if tile_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(tile_crs)) return osr.CoordinateTransformation(dataset_spatial_reference, tile_spatial_reference) except Exception: raise DatasetError('Coordinate transformation error ' + 'for transforming %s to %s' % (str(dataset_crs), str(tile_crs))) @staticmethod def create_spatial_ref(crs): """Create a spatial reference system for projecton crs. Called by define_transformation()""" # pylint: disable=broad-except osr.UseExceptions() try: spatial_ref = osr.SpatialReference() except Exception: raise DatasetError('No spatial reference done for %s' % str(crs)) try: spatial_ref.ImportFromWkt(crs) return spatial_ref except Exception: pass try: matchobj = re.match(r'EPSG:(\d+)', crs) epsg_code = int(matchobj.group(1)) spatial_ref.ImportFromEPSG(epsg_code) return spatial_ref except Exception: return None @staticmethod def get_bbox(transform, geotrans, pixels, lines): """Return the coordinates of the dataset footprint in clockwise order from upper-left""" xul, yul, dummy_z = \ transform.TransformPoint(geotrans[0], geotrans[3], 0) xur, yur, dummy_z = \ transform.TransformPoint(geotrans[0] + geotrans[1] * pixels, geotrans[3] + geotrans[4] * pixels, 0) xll, yll, dummy_z = \ transform.TransformPoint(geotrans[0] + geotrans[2] * lines, geotrans[3] + geotrans[5] * lines, 0) xlr, ylr, dummy_z = \ transform.TransformPoint( geotrans[0] + geotrans[1] * pixels + geotrans[2] * lines, geotrans[3] + geotrans[4] * pixels + geotrans[5] * lines, 0) return [(xul, yul), (xur, yur), (xlr, ylr), (xll, yll)] def get_touched_tiles(self, dataset_bbox, cube_origin, cube_tile_size): """Return a list of tuples (itile, jtile) comprising all tiles footprints that intersect the dataset bounding box""" definite_tiles, possible_tiles = \ self.get_definite_and_possible_tiles(dataset_bbox, cube_origin, cube_tile_size) coverage_set = definite_tiles #Check possible tiles: #Check if the tile perimeter intersects the dataset bbox perimeter: intersected_tiles = \ self.get_intersected_tiles(possible_tiles, dataset_bbox, cube_origin, cube_tile_size) coverage_set = coverage_set.union(intersected_tiles) possible_tiles = possible_tiles.difference(intersected_tiles) #Otherwise the tile might be wholly contained in the dataset bbox contained_tiles = \ self.get_contained_tiles(possible_tiles, dataset_bbox, cube_origin, cube_tile_size) coverage_set = coverage_set.union(contained_tiles) return coverage_set @staticmethod def get_definite_and_possible_tiles(bbox, cube_origin, cube_tile_size): """Return two lists of tile footprints: from the largest rectangle wholly contained within the dataset bbox and the smallest rectangle containing the bbox.""" #pylint: disable=too-many-locals #unpack the bbox vertices in clockwise order from upper-left xyul, xyur, xylr, xyll = bbox xul, yul = xyul xur, yur = xyur xlr, ylr = xylr xll, yll = xyll #unpack the origin of the tiled datacube (e.g. lat=0, lon=0) and the #datacube tile size xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size #Define the largest rectangle wholly contained within footprint xmin = max(xll, xul) xmax = min(xlr, xur) ymin = max(yll, ylr) ymax = min(yul, yur) xmin_index = int(floor((xmin - xorigin) / xsize)) xmax_index = int(floor((xmax - xorigin) / xsize)) ymin_index = int(floor((ymin - yorigin) / ysize)) ymax_index = int(floor((ymax - yorigin) / ysize)) definite_tiles = set([(itile, jtile) for itile in range(xmin_index, xmax_index + 1) for jtile in range(ymin_index, ymax_index + 1)]) #Define the smallest rectangle which is guaranteed to include all tiles #in the foorprint. xmin = min(xll, xul) xmax = max(xlr, xur) ymin = min(yll, ylr) ymax = max(yul, yur) xmin_index = int(floor((xmin - xorigin) / xsize)) xmax_index = int(floor((xmax - xorigin) / xsize)) ymin_index = int(floor((ymin - yorigin) / ysize)) ymax_index = int(floor((ymax - yorigin) / ysize)) possible_tiles = set([(itile, jtile) for itile in range(xmin_index, xmax_index + 1) for jtile in range(ymin_index, ymax_index + 1) ]).difference(definite_tiles) return (definite_tiles, possible_tiles) def get_intersected_tiles(self, candidate_tiles, dset_bbox, cube_origin, cube_tile_size): """Return the subset of candidate_tiles that have an intersection with the dataset bounding box""" #pylint: disable=too-many-locals xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size keep_list = [] for itile, jtile in candidate_tiles: intersection_exists = False (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize) tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize), (x0, y0 - ysize)] tile_vtx_number = len(tile_bbox) dset_vtx_number = len(dset_bbox) for tile_vtx in range(tile_vtx_number): x1, y1 = tile_bbox[tile_vtx] x2, y2 = tile_bbox[(tile_vtx + 1) % tile_vtx_number] for dset_vtx in range(dset_vtx_number): x3, y3 = dset_bbox[dset_vtx] x4, y4 = dset_bbox[(dset_vtx + 1) % dset_vtx_number] xcoords = [x1, x2, x3, x4] ycoords = [y1, y2, y3, y4] intersection_exists = \ self.check_intersection(xcoords, ycoords) if intersection_exists: keep_list.append((itile, jtile)) break if intersection_exists: break return set(keep_list) @staticmethod def get_contained_tiles(candidate_tiles, dset_bbox, cube_origin, cube_tile_size): """Return the subset of candidate tiles that lie wholly within the dataset bounding box""" #pylint: disable=too-many-locals xorigin, yorigin = cube_origin xsize, ysize = cube_tile_size keep_list = [] for itile, jtile in candidate_tiles: tile_vtx_inside = [] (x0, y0) = (xorigin + itile * xsize, yorigin + (jtile + 1) * ysize) tile_bbox = [(x0, y0), (x0 + xsize, y0), (x0 + xsize, y0 - ysize), (x0, y0 - ysize)] dset_vtx_number = len(dset_bbox) for x, y in tile_bbox: #Check if this vertex lies within the dataset bounding box: winding_number = 0 for dset_vtx in range(dset_vtx_number): x1, y1 = dset_bbox[dset_vtx] x2, y2 = dset_bbox[(dset_vtx + 1) % dset_vtx_number] if y >= y1 and y < y2: if (x - x1) * (y2 - y1) > (x2 - x1) * (y - y1): winding_number += 1 elif y <= y1 and y > y2: if (x - x1) * (y2 - y1) < (x2 - x1) * (y - y1): winding_number += 1 tile_vtx_inside.append(winding_number % 2 == 1) if tile_vtx_inside.count(True) == len(tile_bbox): keep_list.append((itile, jtile)) assert tile_vtx_inside.count(True) == 4 or \ tile_vtx_inside.count(True) == 0, \ "Tile partially inside dataset bounding box but has" \ "no intersection" return set(keep_list) @staticmethod def check_intersection(xpts, ypts): """Determines if the line segments (xpts[0], ypts[0]) to (xpts[1], ypts[1]) and (xpts[2], ypts[2]) to (xpts[3], ypts[3]) intersect""" pvec = (xpts[0], ypts[0]) qvec = (xpts[2], ypts[2]) rvec = (xpts[1] - xpts[0], ypts[1] - ypts[0]) svec = (xpts[3] - xpts[2], ypts[3] - ypts[2]) rvec_cross_svec = rvec[0] * svec[1] - rvec[1] * svec[0] if rvec_cross_svec == 0: return False qminusp_cross_svec = \ (qvec[0] - pvec[0]) * svec[1] - (qvec[1] - pvec[1]) * svec[0] qminusp_cross_rvec = \ (qvec[0] - pvec[0]) * rvec[1] - (qvec[1] - pvec[1]) * rvec[0] tparameter = qminusp_cross_svec / rvec_cross_svec uparameter = qminusp_cross_rvec / rvec_cross_svec if tparameter > 0 and tparameter < 1 and \ uparameter > 0 and uparameter < 1: return True