def create_mosaics(self, dataset_filter): """Create mosaics associated with the dataset. 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # Build a dictionary of overlaps (ignoring mosaics, including pending). overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_PENDING, TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter) # Make mosaics and update tile classes as needed. for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to create a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") elif len(tile_record_list) == 2: self.__make_one_mosaic(tile_record_list) for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SUPERSEDED) else: for tr in tile_record_list: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE)
def __init__(self, dataset_path): """Opens the dataset and extracts metadata. Most of the metadata is kept in self._ds which is a EOtools.DatasetDrivers.SceneDataset object. Some extra metadata is extracted and kept the instance attributes. """ self._dataset_path = dataset_path self._ds = SceneDataset(default_metadata_required=False, utm_fix=True) self._ds = self._ds.Open(self.get_dataset_path()) if not self._ds: raise DatasetError("Unable to open %s" % self.get_dataset_path()) # # Cache extra metadata in instance attributes. # self._dataset_size = self._get_directory_size() if self.get_processing_level() in ['ORTHO', 'L1T', 'MAP']: LOGGER.debug('Dataset %s is Level 1', self.get_dataset_path()) self._gcp_count = self._get_gcp_count() self._mtl_text = self._get_mtl_text() else: self._gcp_count = None self._mtl_text = None self._xml_text = self._get_xml_text() AbstractDataset.__init__(self)
def catalog(self, dataset): """Catalog a single dataset into the collection.""" # Create or locate the acquisition and dataset_record for # the dataset we are ingesting. Simultanious attempts to # create the records may cause an IntegerityError - a retry # of the transaction should fix this. tries = 0 while tries < self.CATALOG_MAX_TRIES: try: with self.collection.transaction(): acquisition_record = \ self.collection.create_acquisition_record(dataset) dataset_record = \ acquisition_record.create_dataset_record(dataset) break except psycopg2.IntegrityError: tries = tries + 1 else: raise DatasetError('Unable to catalog: ' + 'persistent integrity error.') # Update the dataset and remove tiles if necessary. if dataset_record.needs_update: overlap_list = dataset_record.get_removal_overlaps() with self.collection.lock_datasets(overlap_list): with self.collection.transaction(): dataset_record.remove_mosaics(overlap_list) dataset_record.remove_tiles() dataset_record.update() return dataset_record
def has_data(self): """Check if the reprojection gave rise to a tile with valid data. Open the file and check if there is data""" tile_dataset = gdal.Open(self.temp_tile_output_path) data = tile_dataset.ReadAsArray() if len(data.shape) == 2: data = data[None, :] if data.shape[0] != len(self.band_stack.band_dict): raise DatasetError( ("Number of layers (%d) in tile file\n %s\n" + "does not match number of bands " + "(%d) from database.") % (data.shape[0], self.temp_tile_output_path, len(self.band_stack.band_dict))) for file_number in self.band_stack.band_dict: nodata_val = self.band_stack.band_dict[file_number]['nodata_value'] if nodata_val is None: if (self.band_stack.band_dict[file_number]['level_name'] == 'PQA'): #Check if any pixel has the contiguity bit set if (np.bitwise_and(data, PQA_CONTIGUITY) > 0).any(): return True else: #nodata_value of None means all array data is valid return True else: if (data != nodata_val).any(): return True #If all comparisons have shown that all array contents are nodata: return False
def __make_mosaic_vrt(tile_record_list, mosaic_path): """From two or more source tiles create a vrt""" LOGGER.info('Creating mosaic VRT file %s', mosaic_path) source_file_list = [tr['tile_pathname'] for tr in tile_record_list] gdalbuildvrt_cmd = ["gdalbuildvrt", "-q", "-overwrite", "%s" % mosaic_path ] gdalbuildvrt_cmd.extend(source_file_list) result = execute(gdalbuildvrt_cmd, shell=False) if result['stdout']: log_multiline(LOGGER.info, result['stdout'], 'stdout from %s' % gdalbuildvrt_cmd, '\t') if result['stderr']: log_multiline(LOGGER.debug, result['stderr'], 'stderr from %s' % gdalbuildvrt_cmd, '\t') if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (gdalbuildvrt_cmd, result['stderr']))
def has_data(self): """Check if the reprojection gave rise to a tile with valid data. Open the file and check if there is data""" tile_dataset = gdal.Open(self.temp_tile_output_path) start_datetime = datetime.now() if tile_dataset.RasterCount != len(self.band_stack.band_dict): raise DatasetError(("Number of layers (%d) in tile file\n %s\n" + "does not match number of bands " + "(%d) from database." ) % (tile_dataset.RasterCount, self.temp_tile_output_path, len(self.band_stack.band_dict) ) ) # Convert self.band_stack.band_dict into list of elements sorted by tile_layer band_list = [self.band_stack.band_dict[file_number] for file_number in sorted(self.band_stack.band_dict.keys(), key=lambda file_number: self.band_stack.band_dict[file_number]['tile_layer'])] result = False # Read each band in individually - will be quicker for non-empty tiles but slower for empty ones for band_index in range(tile_dataset.RasterCount): band_no = band_index + 1 band = tile_dataset.GetRasterBand(band_no) band_data = band.ReadAsArray() # Use DB value: Should actually be the same for all bands in a given processing level nodata_val = band_list[band_index]['nodata_value'] if nodata_val is None: # Use value defined in tile dataset (inherited from source dataset) nodata_val = band.GetNoDataValue() LOGGER.debug('nodata_val = %s for layer %d', nodata_val, band_no) if nodata_val is None: # Special case for PQA with no no-data value defined if (self.band_stack.band_dict[file_number]['level_name'] == 'PQA'): if (np.bitwise_and(band_data, PQA_CONTIGUITY) > 0).any(): LOGGER.debug('Tile is not empty: PQA data contains some contiguous data') result = True break else: #nodata_value of None means all array data is valid LOGGER.debug('Tile is not empty: No-data value is not set') result = True break elif (band_data != nodata_val).any(): LOGGER.debug('Tile is not empty: Some values != %s', nodata_val) result = True break # All comparisons have shown that all band contents are no-data: LOGGER.info('Tile ' + ('has data' if result else 'is empty') + '.') LOGGER.debug('Empty tile detection time = %s', datetime.now() - start_datetime) return result
def __check_satellite_and_sensor(self, dataset): """Check that the dataset's satellite and sensor are in the database. Raises a DatasetError if they are not. """ satellite_id = self.db.get_satellite_id(dataset.get_satellite_tag()) if satellite_id is None: raise DatasetError("Unknown satellite tag: '%s'" % dataset.get_satellite_tag()) sensor_id = self.db.get_sensor_id(satellite_id, dataset.get_sensor_name()) if sensor_id is None: msg = ("Unknown satellite and sensor pair: '%s', '%s'" % (dataset.get_satellite_tag(), dataset.get_sensor_name())) raise DatasetError(msg)
def define_transformation(self, dataset_crs, tile_crs): """Return the transformation between dataset_crs and tile_crs projections""" osr.UseExceptions() try: dataset_spatial_reference = self.create_spatial_ref(dataset_crs) tile_spatial_reference = self.create_spatial_ref(tile_crs) if dataset_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(dataset_crs)) if tile_spatial_reference is None: raise DatasetError('Unknown projecton %s' % str(tile_crs)) return osr.CoordinateTransformation(dataset_spatial_reference, tile_spatial_reference) except Exception: raise DatasetError('Coordinate transformation error ' + 'for transforming %s to %s' % (str(dataset_crs), str(tile_crs)))
def find_band_file(self, file_pattern): """Find the file in dataset_dir matching file_pattern and check uniqueness. Returns the path to the file if found, raises a DatasetError otherwise.""" dataset_dir = os.path.join(self.metadata_dict['dataset_path'], 'scene01') if not os.path.isdir(dataset_dir): raise DatasetError('%s is not a valid directory' % dataset_dir) filelist = [filename for filename in os.listdir(dataset_dir) if re.match(file_pattern, filename)] if not len(filelist) == 1: raise DatasetError('Unable to find unique match ' + 'for file pattern %s' % file_pattern) return os.path.join(dataset_dir, filelist[0])
def __check_update_ok(self): """Checks if an update is possible, raises a DatasetError otherwise.""" tile_class_filter = (TC_SINGLE_SCENE, TC_SUPERSEDED) if self.db.dataset_older_than_database( self.dataset_dict['dataset_id'], self.dataset_dict['datetime_processed'], tile_class_filter): raise DatasetError("Dataset to be ingested is older than " + "the version in the database.")
def filter_on_metadata(self, dataset): """Raises a DatasetError unless the dataset passes the filter.""" path = dataset.get_x_ref() row = dataset.get_y_ref() dt = dataset.get_start_datetime() date = dt.date() if dt is not None else None if not self.filter_dataset(path, row, date): raise DatasetError('Filtered by metadata.')
def __check_processing_level(self, dataset): """Check that the dataset's processing_level is in the database. Raises a DatasetError if it is not. """ level_id = self.db.get_level_id(dataset.get_processing_level()) if level_id is None: raise DatasetError("Unknown processing level: '%s'" % dataset.get_processing_level())
def remove_mosaics(self, dataset_filter): """Remove mosaics associated with the dataset. This will mark mosaic files for removal, delete mosaic database records if they exist, and update the tile class of overlapping tiles (from other datasets) to reflect the lack of a mosaic. The 'dataset_filter' is a list of dataset_ids to filter on. It should be the list of dataset_ids that have been locked (including this dataset). It is used to avoid operating on the tiles of an unlocked dataset. """ # remove new mosaics (those with database records) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED, TC_MOSAIC), output_tile_class_filter=(TC_MOSAIC, ), dataset_filter=dataset_filter) for tile_record_list in overlap_dict.values(): for tr in tile_record_list: self.db.remove_tile_record(tr['tile_id']) self.collection.mark_tile_for_removal(tr['tile_pathname']) # build a dictionary of overlaps (ignoring mosaics) overlap_dict = self.db.get_overlapping_tiles_for_dataset( self.dataset_id, input_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), output_tile_class_filter=(TC_SINGLE_SCENE, TC_SUPERSEDED), dataset_filter=dataset_filter) # update tile classes for overlap tiles from other datasets for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 2: raise DatasetError("Attempt to update a mosaic of three or " + "more datasets. Handling for this case " + "is not yet implemented.") for tr in tile_record_list: if tr['dataset_id'] != self.dataset_id: self.db.update_tile_class(tr['tile_id'], TC_SINGLE_SCENE) # remove old mosaics (those without database records) for tile_record_list in overlap_dict.values(): if len(tile_record_list) > 1: # tile_record_list is sorted by acquisition start time, so # the first record should be the one the mosaic filename is # based on. tr = tile_record_list[0] mosaic_pathname = \ self.__make_mosaic_pathname(tr['tile_pathname']) if os.path.isfile(mosaic_pathname): self.collection.mark_tile_for_removal(mosaic_pathname)
def nc2vrt(self, nc_path, vrt_path): """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack""" nc_abs_path = os.path.abspath(nc_path) vrt_abs_path = os.path.abspath(vrt_path) # Create VRT file using absolute pathnames nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path) LOGGER.debug('nc2vrt_cmd = %s', nc2vrt_cmd) result = execute(nc2vrt_cmd) #, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (nc2vrt_cmd, result['stderr']))
def _get_directory_size(self): """Calculate the size of the dataset in kB.""" command = "du -sk %s | cut -f1" % self.get_dataset_path() LOGGER.debug('executing "%s"', command) result = execute(command) if result['returncode'] != 0: raise DatasetError('Unable to calculate directory size: ' + '"%s" failed: %s' % (command, result['stderr'])) LOGGER.debug('stdout = %s', result['stdout']) return int(result['stdout'])
def __check_bands(self, dataset): """Check that the dataset has the expected bands. Raises a DatasetError if any band expected for this dataset (according to the database) is missing. """ try: dataset_bands = self.new_bands[self.get_dataset_key(dataset)] except KeyError: raise DatasetError('No tile types for this dataset.') for tile_type_bands in dataset_bands.values(): for band_info in tile_type_bands.values(): dataset.find_band_file(band_info['file_pattern'])
def buildvrt(self, temp_dir): """Given a dataset_record and corresponding dataset, build the vrt that will be used to reproject the dataset's data to tile coordinates""" #Make the list of filenames from the dataset_path/scene01 and each #file_number's file_pattern. Also get list of nodata_value. self.source_file_list, self.nodata_list = self.list_source_files() nodata_value = self.nodata_list[0] #TODO: check that this works for PQA where nodata_value is None if nodata_value is not None: nodata_spec = [ "-srcnodata", "%d" % nodata_value, "-vrtnodata", "%d" % (nodata_value) ] else: nodata_spec = [] #Form the vrt_band_stack_filename. #This is done using #args = shlex.split(command_line) #where command_line is the buildvrt command create_directory(temp_dir) self.vrt_name = self.get_vrt_name(temp_dir) #Build the vrt buildvrt_cmd = [ "gdalbuildvrt", "-separate", "-q", ] buildvrt_cmd.extend(nodata_spec) buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name]) buildvrt_cmd.extend(self.source_file_list) #for fle in self.source_file_list: # buildvrt_cmd.append(fle) #buildvrt_cmd = ' '.join(buildvrt_cmd) result = execute(buildvrt_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s'\ % (buildvrt_cmd, result['stderr'])) #Add the metadata and return the band_stack as a gdal datatset, storing #as an attribute of the Bandstack object self.vrt_band_stack = self.add_metadata(self.vrt_name)
def create_spatial_ref(crs): """Create a spatial reference system for projecton crs. Called by define_transformation()""" # pylint: disable=broad-except osr.UseExceptions() try: spatial_ref = osr.SpatialReference() except Exception: raise DatasetError('No spatial reference done for %s' % str(crs)) try: spatial_ref.ImportFromWkt(crs) return spatial_ref except Exception: pass try: matchobj = re.match(r'EPSG:(\d+)', crs) epsg_code = int(matchobj.group(1)) spatial_ref.ImportFromEPSG(epsg_code) return spatial_ref except Exception: return None
def _extract_from_file(file_pattern, file_description, extract_function): """Extract metadata from a file. Returns the result of running extract_function on the opened file, or None if the file cannot be found. file_pattern is a glob pattern for the file: the first file found is used. file_description is a description of the file for logging and error messages.""" try: md_path = glob.glob(file_pattern)[0] md_file = open(md_path) metadata = extract_function(md_file) md_file.close() except IndexError: # File not found metadata = None LOGGER.debug('No %s file found.', file_description) except IOError: # Open failed raise DatasetError('Unable to open %s file.' % file_description) return metadata
def __init__(self, dataset_path): """Opens the dataset and extracts metadata. """ self._satellite_tag = "MT" self._satellite_sensor = "MODIS-Terra" self._dataset_file = os.path.abspath(dataset_path) fileName, fileExtension = os.path.splitext(self._dataset_file) if (fileName.endswith("RBQ500")): self._processor_level = "RBQ500" else: self._processor_level = "MOD09" vrt_file = open(dataset_path, 'r') vrt_string = vrt_file.read() vrt_file.close() self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0] self._vrt_file = dataset_path self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly) if not self._ds: raise DatasetError("Unable to open %s" % self.get_dataset_path()) self._dataset_size = os.path.getsize(self._dataset_path) LOGGER.debug('Transform = %s', self._ds.GetGeoTransform()) LOGGER.debug('Projection = %s', self._ds.GetProjection()) LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize) LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize) command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path result = execute(command) if result['returncode'] != 0: raise DatasetError('Unable to perform ncdump: ' + '"%s" failed: %s' % (command, result['stderr'])) s = re.sub(r"\s+", "", result['stdout']) LOGGER.debug('%s = %s', command, s) self._rangeendingdate = re.search( 'RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE', s).groups(1)[0] LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate) self._rangeendingtime = re.search( 'RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME', s).groups(1)[0] LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime) self._rangebeginningdate = re.search( 'RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE', s).groups(1)[0] LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate) self._rangebeginningtime = re.search( 'RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME', s).groups(1)[0] LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime) self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime self._orbitnumber = int( re.search( 'ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER', s).groups(1)[0]) LOGGER.debug('OrbitNumber = %d', self._orbitnumber) self._cloud_cover_percentage = float( re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0]) LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage) self._completion_datetime = re.search( 'PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME', s).groups(1)[0] LOGGER.debug('ProcessedTime = %s', self._completion_datetime) self._metadata = self._ds.GetMetadata('SUBDATASETS') band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME']) # Get Coordinates self._width = band1.RasterXSize self._height = band1.RasterYSize self._gt = band1.GetGeoTransform() self._minx = self._gt[0] self._miny = self._gt[ 3] + self._width * self._gt[4] + self._height * self._gt[5] # from self._maxx = self._gt[ 0] + self._width * self._gt[1] + self._height * self._gt[2] # from self._maxy = self._gt[3] LOGGER.debug('min/max x coordinates (%s, %s)', str(self._minx), str(self._maxx)) # min/max x coordinates LOGGER.debug('min/max y coordinates (%s, %s)', str(self._miny), str(self._maxy)) # min/max y coordinates LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]), str(self._gt[5])) # pixel size self._pixelX = self._width self._pixelY = self._height LOGGER.debug('pixels (%s, %s)', str(self._pixelX), str(self._pixelY)) # pixels self._gcp_count = None self._mtl_text = None self._xml_text = None AbstractDataset.__init__(self)
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method'] ) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = ["-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path reproject_cmd = ["gdalwarp", "-q", "-of", "%s" % self.tile_type_info['file_format'], "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend(["-overwrite", "%s" % self.band_stack.vrt_name, "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value ]) command_string = ' '.join(reproject_cmd) LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint) retry=True while retry: LOGGER.debug('command_string = %s', command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime) if result['stdout']: log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: # Return code is non-zero log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t') # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if (result['stderr'].find('LZW') > -1 # LZW-related error and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF and 'COMPRESS=LZW' in format_spec): # LZW compression requested uncompressed_tile_path = temp_tile_output_path + '.tmp' # Write uncompressed tile to a temporary path command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE') command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += '; gdal_translate -of GTiff' command_string += ' ' + ' '.join(format_spec) command_string += ' %s %s' % ( uncompressed_tile_path, temp_tile_output_path ) LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF') else: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (command_string, result['stderr'])) else: retry = False # No retry on success # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks if self.nc_temp_tile_output_path: self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
def __make_mosaic_pqa(tile_record_list, tile_type_info, mosaic_path): """From the PQA tiles in tile_record_list, create a mosaic tile at mosaic_pathname. """ LOGGER.info('Creating PQA mosaic file %s', mosaic_path) mosaic_file_list = [tr['tile_pathname'] for tr in tile_record_list] template_dataset = gdal.Open(mosaic_file_list[0]) gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #Set datatype formats appropriate to Create() and numpy gdal_dtype = template_dataset.GetRasterBand(1).DataType numpy_dtype = gdal.GetDataTypeName(gdal_dtype) mosaic_dataset = gdal_driver.Create( mosaic_path, template_dataset.RasterXSize, template_dataset.RasterYSize, 1, gdal_dtype, tile_type_info['format_options'].split(','), ) if not mosaic_dataset: raise DatasetError( 'Unable to open output dataset %s' % mosaic_dataset) mosaic_dataset.SetGeoTransform(template_dataset.GetGeoTransform()) mosaic_dataset.SetProjection(template_dataset.GetProjection()) #TODO: make vrt here - not really needed for single-layer file # if tile_type_info['file_format'] == 'netCDF': # pass output_band = mosaic_dataset.GetRasterBand(1) # Set all background values of data_array to FFFF (i.e. all ones) data_array = numpy.ones(shape=(template_dataset.RasterYSize, template_dataset.RasterXSize), dtype=numpy_dtype ) * -1 # Set all background values of no_data_array to 0 (i.e. all zeroes) no_data_array = numpy.zeros(shape=(template_dataset.RasterYSize, template_dataset.RasterXSize), dtype=numpy_dtype ) overall_data_mask = numpy.zeros((mosaic_dataset.RasterYSize, mosaic_dataset.RasterXSize), dtype=numpy.bool ) del template_dataset # Populate data_array with -masked PQA data for pqa_dataset_index in range(len(mosaic_file_list)): pqa_dataset_path = mosaic_file_list[pqa_dataset_index] pqa_dataset = gdal.Open(pqa_dataset_path) if not pqa_dataset: raise DatasetError('Unable to open %s' % pqa_dataset_path) pqa_array = pqa_dataset.ReadAsArray() del pqa_dataset LOGGER.debug('Opened %s', pqa_dataset_path) # Treat contiguous and non-contiguous pixels separately # Set all contiguous pixels to true in data_mask pqa_data_mask = (pqa_array & PQA_CONTIGUITY).astype(numpy.bool) # Expand overall_data_mask to true for any contiguous pixels overall_data_mask = overall_data_mask | pqa_data_mask # Perform bitwise-and on contiguous pixels in data_array data_array[pqa_data_mask] &= pqa_array[pqa_data_mask] # Perform bitwise-or on non-contiguous pixels in no_data_array no_data_array[~pqa_data_mask] |= pqa_array[~pqa_data_mask] # Set all pixels which don't contain data to combined no-data values # (should be same as original no-data values) data_array[~overall_data_mask] = no_data_array[~overall_data_mask] output_band.WriteArray(data_array) mosaic_dataset.FlushCache()
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method']) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = [ "-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) reproject_cmd = [ "gdalwarp", "-q", "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend([ "-overwrite", "%s" % self.band_stack.vrt_name, "%s" % self.temp_tile_output_path ]) result = execute(reproject_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (reproject_cmd, result['stderr']))