def __make_mosaic_vrt(tile_record_list, mosaic_path): """From two or more source tiles create a vrt""" LOGGER.info('Creating mosaic VRT file %s', mosaic_path) source_file_list = [tr['tile_pathname'] for tr in tile_record_list] gdalbuildvrt_cmd = ["gdalbuildvrt", "-q", "-overwrite", "%s" % mosaic_path ] gdalbuildvrt_cmd.extend(source_file_list) result = execute(gdalbuildvrt_cmd, shell=False) if result['stdout']: log_multiline(LOGGER.info, result['stdout'], 'stdout from %s' % gdalbuildvrt_cmd, '\t') if result['stderr']: log_multiline(LOGGER.debug, result['stderr'], 'stderr from %s' % gdalbuildvrt_cmd, '\t') if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (gdalbuildvrt_cmd, result['stderr']))
def unlock_object(self, lock_object, lock_type_id=1): # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() result = False sql = """-- Delete lock object if it is owned by this process delete from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and lock_owner = %(lock_owner)s; """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': self.process_id } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) result = not self.check_object_locked(lock_object, lock_type_id) finally: lock_connection.close() if result: logger.debug('Unlocked object %s', lock_object) else: logger.debug('Unable to unlock object %s', lock_object) return result
def flag_records(self): params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())), 'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys())) } if (params['tiles_to_be_deleted_tuple'] or params['tiles_to_be_updated_tuple'] ): sql = ("""-- Change tile class of non-overlapping tiles or overlap source tiles from nominated datasets update tile set tile_class_id = tile_class_id + 1000 where tile_class_id < 1000 and tile_id in %(tiles_to_be_deleted_tuple)s; """ if params['tiles_to_be_deleted_tuple'] else '') + \ (""" -- Change tile class of overlap source tiles NOT from nominated datasets update tile set tile_class_id = 1 -- Change 3->1 where tile_class_id = 3 and tile_id in %(tiles_to_be_updated_tuple)s; """ if params['tiles_to_be_updated_tuple'] else '') log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') if self.dryrun: print('\nDRY RUN ONLY!') print('Tile-flagging SQL:') print(self.db_cursor.mogrify(sql, params)) print() else: self.db_cursor.execute(sql, params) print('Records updated successfully') else: print('No tiles to delete or modify')
def lock_object(self, lock_object, lock_type_id=1, lock_status_id=None, lock_detail=None): # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() result = None sql = """-- Insert lock record if doesn't already exist insert into lock( lock_type_id, lock_object, lock_owner, lock_status_id) select %(lock_type_id)s, %(lock_object)s, %(lock_owner)s, %(lock_status_id)s where not exists (select lock_type_id, lock_object from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s); -- Update lock record if it is not owned or owned by this process update lock set lock_owner = %(lock_owner)s, lock_status_id = %(lock_status_id)s, lock_detail = %(lock_detail)s where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and (lock_owner is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': self.process_id, 'lock_status_id': lock_status_id, 'lock_detail': lock_detail } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') # Need to specifically check object lock record for this process and specified status try: lock_cursor.execute(sql, params) result = self.check_object_locked(lock_object=lock_object, lock_type_id=lock_type_id, lock_status_id=lock_status_id, lock_owner=self.process_id, lock_connection=lock_connection) finally: lock_connection.close() if result: logger.debug('Locked object %s', lock_object) else: logger.debug('Unable to lock object %s', lock_object) return result
def log_sql(sql_query_string): """Logs an sql query to the logger at debug level. This uses the log_multiline utility function from eotools.utils. sql_query_string is as returned from cursor.mogrify.""" log_multiline(LOGGER.debug, sql_query_string, title="SQL", prefix="\t")
def cell_has_data(self, x_index, y_index, start_datetime=None, end_datetime=None, tile_type_id=None): db_cursor = self.db_connection.cursor() sql = """-- count of acquisitions which have tiles covering the matching indices select count(distinct acquisition_id) as acquisition_count from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using (dataset_id) inner join acquisition using (acquisition_id) where tile_type_id = %(tile_type_id)s and x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and (%(start_datetime)s is null or start_datetime >= %(start_datetime)s) and (%(end_datetime)s is null or end_datetime <= %(end_datetime)s); """ tile_type_id = tile_type_id or self.default_tile_type_id params = {'x_index': x_index, 'y_index': y_index, 'start_datetime': start_datetime, 'end_datetime': end_datetime, 'tile_type_id': tile_type_id } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) record = db_cursor.fetchone() if record: return record[0] else: return 0
def __init__(self, source_datacube=None, default_tile_type_id=1): ''' Constructor for TileRemover class ''' self.dataset_records = {} self.acquisition_records = {} self.all_tile_records = {} self.tile_records_to_delete = {} self.tile_records_to_update = {} if source_datacube: # Copy values from source_datacube and then override command line args self.__dict__ = copy(source_datacube.__dict__) args = self.parse_args() # Set instance attributes for every value in command line arguments file for attribute_name in args.__dict__.keys(): attribute_value = args.__dict__[attribute_name] self.__setattr__(attribute_name, attribute_value) else: DataCube.__init__(self) # Call inherited constructor if self.debug: logger.setLevel(logging.DEBUG) if self.action and type(self.action) == str: self.action = TileRemover.action_dict.get(self.action[0].lower()) or 'report' else: self.action = 'report' if self.target and type(self.target) == str: self.target = TileRemover.target_dict.get(self.target[0].lower()) or 'acquisition' else: self.target = 'acquisition' if self.dataset_name: # Dataset list specified at command line self.dataset_name_list = self.dataset_name.split(',') elif self.dataset_list: # Dataset list file specified dataset_list_file = open(self.dataset_list, 'r') self.dataset_name_list = [dataset_name.replace('\n', '') for dataset_name in dataset_list_file.readlines()] dataset_list_file.close() else: raise Exception('No dataset IDs or dataset name list file specified') assert self.dataset_name_list, 'No dataset names specified' self.dataset_name_list = sorted(self.dataset_name_list) # Only need one cursor - create it here self.db_cursor = self.db_connection.cursor() # Populate field name lists for later use self.dataset_field_list = self.get_field_names('dataset', ['xml_text']) self.acquisition_field_list = self.get_field_names('acquisition', ['mtl_text']) self.tile_field_list = self.get_field_names('tile') self.satellite_dict = self.get_satellite_dict() log_multiline(logger.debug, self.__dict__, 'self.__dict__', '\t')
def get_field_names(self, table_name, excluded_field_list=()): ''' Return a list containing all field names for the specified table''' sql = """select column_name from information_schema.columns where table_name='""" + table_name + """';""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) field_list = [record[0] for record in self.db_cursor if record[0] not in excluded_field_list] log_multiline(logger.debug, field_list, table_name + ' field list', '\t') return field_list
def get_satellite_dict(self): ''' Return a dict of satellite tags keyed by satellite_id''' sql = """select satellite_id, satellite_tag from satellite;""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) satellite_dict = dict([(record[0], record[1]) for record in self.db_cursor]) log_multiline(logger.debug, satellite_dict, ' satellite_dict', '\t') return satellite_dict
def create_rgb_tif(input_dataset_path, output_dataset_path, pqa_mask=None, rgb_bands=None, input_no_data_value=-999, output_no_data_value=0, input_range=()): if os.path.exists(output_dataset_path): logger.info('Output dataset %s already exists - skipping', output_dataset_path) return if not self.lock_object(output_dataset_path): logger.info('Output dataset %s already locked - skipping', output_dataset_path) return if not rgb_bands: rgb_bands = [3, 1, 2] scale_factor = 10000.0 / 255.0 # Scale factor to translate from +ve int16 to byte input_gdal_dataset = gdal.Open(input_dataset_path) assert input_gdal_dataset, 'Unable to open input dataset %s' % (input_dataset_path) try: # Create multi-band dataset for masked data logger.debug('output_dataset path = %s', output_dataset_path) gdal_driver = gdal.GetDriverByName('GTiff') log_multiline(logger.debug, gdal_driver.GetMetadata(), 'gdal_driver.GetMetadata()') output_gdal_dataset = gdal_driver.Create(output_dataset_path, input_gdal_dataset.RasterXSize, input_gdal_dataset.RasterYSize, len(rgb_bands), gdal.GDT_Byte, ['INTERLEAVE=PIXEL']) #['INTERLEAVE=PIXEL','COMPRESS=NONE','BIGTIFF=YES']) assert output_gdal_dataset, 'Unable to open input dataset %s' % output_dataset_path output_gdal_dataset.SetGeoTransform(input_gdal_dataset.GetGeoTransform()) output_gdal_dataset.SetProjection(input_gdal_dataset.GetProjection()) dest_band_no = 0 for source_band_no in rgb_bands: dest_band_no += 1 logger.debug('Processing source band %d, destination band %d', source_band_no, dest_band_no) input_band_array = input_gdal_dataset.GetRasterBand(source_band_no).ReadAsArray() input_gdal_dataset.FlushCache() output_band_array = (input_band_array / scale_factor).astype(numpy.byte) output_band_array[numpy.logical_or((input_band_array < 0), (input_band_array > 10000))] = output_no_data_value # Set any out-of-bounds values to no-data if pqa_mask is not None: # Need to perform masking output_band_array[numpy.logical_or((input_band_array == input_no_data_value), ~pqa_mask)] = output_no_data_value # Apply PQA mask and no-data value else: output_band_array[(input_band_array == input_no_data_value)] = output_no_data_value # Re-apply no-data value output_band = output_gdal_dataset.GetRasterBand(dest_band_no) output_band.SetNoDataValue(output_no_data_value) output_band.WriteArray(output_band_array) output_band.FlushCache() output_gdal_dataset.FlushCache() finally: self.unlock_object(output_dataset_path)
def _reproject(tile_type_info, tile_footprint, band_stack, output_path): nodata_value = band_stack.nodata_list[0] # Assume resampling method is the same for all bands, this is # because resampling_method is per proessing_level # TODO assert this is the case first_file_number = band_stack.band_dict.keys()[0] reproject_cmd = _create_reproject_command( band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info ) command_string = " ".join(reproject_cmd) LOGGER.info("Performing gdalwarp for tile %s", tile_footprint) retry = True while retry: LOGGER.debug("command_string = %s", command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime) if result["stdout"]: log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: # Return code is non-zero log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t") # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if ( result["stderr"].find("LZW") > -1 # LZW-related error and tile_type_info["file_format"] == "GTiff" # Output format is GeoTIFF and "COMPRESS=LZW" in tile_type_info["format_options"] ): # LZW compression requested uncompressed_tile_path = output_path + ".tmp" # Write uncompressed tile to a temporary path command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE") command_string = command_string.replace(output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += "; gdal_translate -of GTiff" command_string += " " + " ".join(_make_format_spec(tile_type_info)) command_string += " %s %s" % (uncompressed_tile_path, output_path) LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF") else: raise DatasetError( "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"]) ) else: retry = False # No retry on success
def get_tile_ordinates(self, point_x, point_y, point_date, processing_level='NBAR', satellite=None, tile_type_id=None): """ Function to return tile path and pixel coordinates. Arguments should be self explanatory Returns: tile_pathname (pixel_x, pixel_y): Pixel coordinates from top-left NB: There is a KNOWN ISSUE with N-S overlaps where the Southernmost tile may contain only no-data for the coordinate. This will be fixed when the original mosiac cache data is catalogued in the tile table. """ db_cursor2 = self.db_connection.cursor() sql = """-- Find tile path for specified indices and date select tile_pathname, round((%(point_x)s - %(point_x)s::integer) * tile_type.x_pixels)::integer as x_ordinate, round((1.0 - (%(point_y)s - %(point_y)s::integer)) * tile_type.y_pixels)::integer as y_ordinate -- Offset from Top from acquisition inner join satellite using(satellite_id) inner join dataset using(acquisition_id) inner join processing_level using(level_id) inner join tile using(dataset_id) inner join tile_type using(tile_type_id) where tile_type_id = %(tile_type_id)s and tile_class_id = 1 -- Non-empty tiles and (%(satellite)s is null or upper(satellite_tag) = upper(%(satellite)s)) and upper(level_name) = upper(%(processing_level)s) and end_datetime > %(point_date)s and end_datetime < (%(point_date)s + 1) and x_index = cast((%(point_x)s - x_origin) / x_size as integer) and y_index = cast((%(point_y)s - y_origin) / y_size as integer) order by x_ref, y_ref desc limit 1; -- Return Southernmost tile """ params = {'point_x': point_x, 'point_y': point_y, 'point_date': point_date, 'processing_level': processing_level, 'satellite': satellite, 'tile_type_id': tile_type_id } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) result = db_cursor2.fetchone() if result: # Tile exists return result[0], (result[1], result[2]) else: return None
def assemble_stack(season_stacker, years=0): """ returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts """ def date2datetime(input_date, time_offset=time.min): if not input_date: return None return datetime.combine(input_date, time_offset) derived_stack_dict = {} start_date = season_stacker.start_date end_date = season_stacker.end_date end_year = end_date.year + years while end_date.year <= end_year: season_info_dict = season_stacker.stack_derived(x_index=season_stacker.x_index, y_index=season_stacker.y_index, stack_output_dir=season_stacker.output_dir, start_datetime=date2datetime(start_date, time.min), end_datetime=date2datetime(end_date, time.max), satellite=season_stacker.satellite, sensor=season_stacker.sensor, create_stacks=False) for output_stack_path in season_info_dict: # Create a new list for each stack if it doesn't already exist stack_list = derived_stack_dict.get(output_stack_path, []) if not stack_list: derived_stack_dict[output_stack_path] = stack_list stack_list.extend(season_info_dict[output_stack_path]) start_date = date(start_date.year + 1, start_date.month, start_date.day) end_date = date(end_date.year + 1, end_date.month, end_date.day) log_multiline(logger.debug, derived_stack_dict, 'derived_stack_dict', '\t') for output_stack_path in sorted(derived_stack_dict.keys()): if os.path.exists(output_stack_path) and not season_stacker.refresh: logger.info('Skipped existing stack file %s', output_stack_path) continue if (season_stacker.lock_object(output_stack_path)): logger.debug('Creating temporal stack %s', output_stack_path) season_stacker.stack_files(timeslice_info_list=derived_stack_dict[output_stack_path], stack_dataset_path=output_stack_path, band1_vrt_path=None, overwrite=True) season_stacker.unlock_object(output_stack_path) # logger.info('VRT stack file %s created', output_stack_path) logger.info('Finished creating %d temporal stack files in %s.', len(derived_stack_dict), season_stacker.output_dir) return derived_stack_dict
def get_tile_records(self, dataset_records): sql = """-- Find tiles and any overlap tiles including those for other datasets select """ + \ ',\n '.join(self.tile_field_list) + \ """ from tile where dataset_id in %(dataset_id_tuple)s union SELECT DISTINCT """ + \ ',\n '.join(['o.' + tile_field for tile_field in self.tile_field_list]) + \ """ FROM tile t JOIN dataset d USING (dataset_id) JOIN acquisition a USING (acquisition_id) JOIN tile o ON o.x_index = t.x_index AND o.y_index = t.y_index AND o.tile_type_id = t.tile_type_id JOIN dataset od ON od.dataset_id = o.dataset_id AND od.level_id = d.level_id JOIN acquisition oa ON oa.acquisition_id = od.acquisition_id AND oa.satellite_id = a.satellite_id WHERE d.dataset_id in %(dataset_id_tuple)s AND ( (oa.start_datetime BETWEEN a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0) OR (oa.end_datetime BETWEEN a.start_datetime - (a.end_datetime - a.start_datetime) / 2.0 AND a.end_datetime + (a.end_datetime - a.start_datetime) / 2.0) );""" params = {'dataset_id_tuple': tuple(sorted(set([dataset_record['dataset_id'] for dataset_record in dataset_records.values()])))} log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) tile_records = {} for record in self.db_cursor: tile_records[record[0]] = dict(zip(self.tile_field_list, record)) log_multiline(logger.debug, tile_records, 'tile_records', '\t') return tile_records
def check_object_locked(self, lock_object, lock_type_id=1, lock_status_id=None, lock_owner=None, lock_connection=None): # Check whether we need to create a new connection and do it if required create_connection = not lock_connection # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = lock_connection or self.create_connection() lock_cursor = lock_connection.cursor() result = None sql = """-- Select lock record if it exists select lock_object, lock_owner, lock_status_id, lock_detail from lock where lock_type_id = %(lock_type_id)s and lock_object = %(lock_object)s and (%(lock_status_id)s is null or lock_status_id = %(lock_status_id)s) and (%(lock_owner)s is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': lock_owner, 'lock_status_id': lock_status_id } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) record = lock_cursor.fetchone() if record: result = {'lock_type_id': lock_type_id, 'lock_object': record[0], 'lock_owner': record[1], 'lock_status_id': record[2], 'lock_detail': record[3] } finally: # Only close connection if it was created in this function if create_connection: lock_connection.close() return result
def get_acquisition_records(self, dataset_records): sql = """-- Find all acquisition records for specified datasets select """ + \ ',\n '.join(self.acquisition_field_list) + \ """ from acquisition where acquisition_id in %(acquisition_id_tuple)s""" params = {'acquisition_id_tuple': tuple(sorted(set([dataset_record['acquisition_id'] for dataset_record in dataset_records.values()])))} log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') self.db_cursor.execute(sql, params) acquisition_records = {} for record in self.db_cursor: acquisition_records[record[0]] = dict(zip(self.acquisition_field_list, record)) log_multiline(logger.debug, acquisition_records, 'acquisition_records', '\t') return acquisition_records
def assemble_stack(index_stacker): """ returns stack_info_dict - a dict keyed by stack file name containing a list of tile_info dicts """ def date2datetime(input_date, time_offset=time.min): if not input_date: return None return datetime.combine(input_date, time_offset) stack_info_dict = index_stacker.stack_derived(x_index=index_stacker.x_index, y_index=index_stacker.y_index, stack_output_dir=index_stacker.output_dir, start_datetime=date2datetime(index_stacker.start_date, time.min), end_datetime=date2datetime(index_stacker.end_date, time.max), satellite=index_stacker.satellite, sensor=index_stacker.sensor) log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t') logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), index_stacker.output_dir) return stack_info_dict
def get_intersecting_tiles(self, geometry_wkt, geometry_srid=4326): """ Function to return all tile_footprint indexes that intersect the specified geometry. Arguments: geometry_wkt - A Well Known Text geometry specification geometry_srid - The spatial reference system ID (EPSG code) that geometry_wkt uses. Defaults to 4326 Returns: A list of tuples in the form (x_index, y_index, tile_type_id) x_index - Integer x-index y_index - Integer y-index tile_type_id - Integer tile type ID """ db_cursor2 = self.db_connection.cursor() sql = """-- Find the tile_footprints that intersect geometry_wkt select x_index, y_index, tile_type_id from tile_footprint where bbox && ST_GeomFromText(%(geometry_wkt)s, %(geometry_srid)s) order by x_index, y_index """ params = {'geometry_wkt' : geometry_wkt, 'geometry_srid' : geometry_srid} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) resultArray = [] for record in db_cursor2: assert record, 'No data found for this tile and temporal range' resultArray.append((record[0], record[1], record[2])) return resultArray
def get_dataset_records(self, dataset_name_list): '''Return a nested dict containing all dataset record info for datasets matching specified names keyed by dataset_id''' dataset_records = {} for dataset_name in dataset_name_list: if self.target == 'dataset': # Only return exact matches match_pattern = '.*/' + dataset_name + '$' else: # Return all versions # match_pattern = '.*/' + re.sub('_(\d){1,3}$', '', dataset_name) + '(_(\d){1,3})*$' if self.target == 'acquisition': sql = """-- Find all datasets derived from acquisition of specified dataset name select """ + \ ',\n '.join(self.dataset_field_list) + \ """ from dataset join ( select distinct acquisition_id from dataset where dataset_path ~ '""" + match_pattern + """' ) a using(acquisition_id);""" else: sql = """-- Find datasets matching provided name select """ + \ ',\n '.join(self.dataset_field_list) + \ """ from dataset where dataset_path ~ '""" + match_pattern + """';""" log_multiline(logger.debug, sql, 'SQL', '\t') self.db_cursor.execute(sql) for record in self.db_cursor: dataset_records[record[0]] = dict(zip(self.dataset_field_list, record)) log_multiline(logger.debug, dataset_records, 'dataset_records', '\t') return dataset_records
def clear_all_locks(self, lock_object=None, lock_type_id=1, lock_owner=None): """ USE WITH CAUTION - This will affect all processes using specified lock type """ # Need separate non-persistent connection for lock mechanism to allow independent transaction commits lock_connection = self.create_connection() lock_cursor = lock_connection.cursor() sql = """-- Delete ALL lock objects matching any supplied parameters delete from lock where (%(lock_type_id)s is null or lock_type_id = %(lock_type_id)s) and (%(lock_object)s is null or lock_object = %(lock_object)s) and (%(lock_owner)s is null or lock_owner = %(lock_owner)s); """ params = {'lock_type_id': lock_type_id, 'lock_object': lock_object, 'lock_owner': lock_owner } log_multiline(logger.debug, lock_cursor.mogrify(sql, params), 'SQL', '\t') try: lock_cursor.execute(sql, params) finally: lock_connection.close()
def __init__(self, data_cube, lookup_scheme_name=None, tile_type_id=1, # Should this be None? satellite_tag=None, sensor_name=None, level_name=None): ''' Constructor for BandLookup class Parameters (can all be set later with the exception of data_cube): data_cube: Parent data_cube (or descendant) object lookup_scheme_name: lookup scheme name. Needs to be a member of self.lookup_schemes tile_type_id: Tile Type identifier. Defaults to 1 - should this be None? satellite_tag: Short name of satellite sensor_name: Name of sensor level_name: Processing level name ''' assert isinstance(data_cube, DataCube), 'data_cube parameter must be of type DataCube' assert not lookup_scheme_name or type(lookup_scheme_name) == str, 'lookup_scheme_name parameter must be of type str' assert not tile_type_id or type(tile_type_id) in compat.integer_types, 'tile_type_id parameter must be of type long or int' assert not satellite_tag or type(satellite_tag) == str, 'satellite_tag parameter must be of type str' assert not sensor_name or type(sensor_name) == str, 'sensor_name parameter must be of type str' assert not level_name or type(level_name) == str, 'level_name parameter must be of type str' if data_cube.debug: logger.setLevel(logging.DEBUG) # Set instance values if provided as constructor parameters self.lookup_scheme_name = lookup_scheme_name self.tile_type_id = tile_type_id self.satellite_tag = satellite_tag self.sensor_name = sensor_name self.level_name = level_name self.db_connection = data_cube.db_connection db_cursor = self.db_connection.cursor() if not BandLookup._band_lookup_dict: # Check whether class lookup dict has been populated sql = """-- Retrieve all band equivalence information SELECT band_lookup_scheme.lookup_scheme_name, band_source.tile_type_id, coalesce(satellite.satellite_tag, 'DERIVED') as satellite_tag, coalesce(sensor_name, level_name) as sensor_name, processing_level.level_name, band_equivalent.master_band_tag, band_source.tile_layer, band_equivalent.nominal_centre::float, band_equivalent.nominal_bandwidth::float, band_equivalent.centre_tolerance::float, band_equivalent.bandwidth_tolerance::float, COALESCE(band_adjustment.adjustment_offset, 0.0)::float AS adjustment_offset, COALESCE(band_adjustment.adjustment_multiplier, 1.0)::float AS adjustment_multiplier, band_lookup_scheme.lookup_scheme_id, band.satellite_id, band.sensor_id, band.band_id, band_equivalent.master_band_name, band_type_name, band.min_wavelength::float, band.max_wavelength::float, band_lookup_scheme.lookup_scheme_description FROM band JOIN band_type using(band_type_id) JOIN band_source using (band_id) JOIN processing_level using(level_id) JOIN band_equivalent ON band_equivalent.band_type_id = band.band_type_id and abs((band.max_wavelength::numeric + band.min_wavelength::numeric) / 2.0 - band_equivalent.nominal_centre) <= band_equivalent.centre_tolerance AND abs(band.max_wavelength::numeric - band.min_wavelength::numeric - band_equivalent.nominal_bandwidth) <= band_equivalent.bandwidth_tolerance JOIN band_lookup_scheme USING (lookup_scheme_id) LEFT JOIN band_adjustment USING (lookup_scheme_id, band_id) LEFT JOIN sensor using(satellite_id, sensor_id) LEFT JOIN satellite using(satellite_id) ORDER BY 1,2,3,4,5,7 """ log_multiline(logger.debug, sql, 'SQL', '\t') db_cursor.execute(sql) for record in db_cursor: # Create nested dict with levels keyed by: # lookup_scheme_name, tile_type_id, satellite_tag, sensor_name, level_name, band_tag lookup_scheme_dict = BandLookup._band_lookup_dict.get(record[0]) if lookup_scheme_dict is None: lookup_scheme_dict = {} BandLookup._band_lookup_dict[record[0]] = lookup_scheme_dict BandLookup._lookup_schemes[record[0]] = record[21] # Set lookup scheme description tile_type_id_dict = lookup_scheme_dict.get(record[1]) if tile_type_id_dict is None: tile_type_id_dict = {} lookup_scheme_dict[record[1]] = tile_type_id_dict satellite_tag_dict = tile_type_id_dict.get(record[2]) if satellite_tag_dict is None: satellite_tag_dict = {} tile_type_id_dict[record[2]] = satellite_tag_dict sensor_name_dict = satellite_tag_dict.get(record[3]) if sensor_name_dict is None: sensor_name_dict = {} satellite_tag_dict[record[3]] = sensor_name_dict level_name_dict = sensor_name_dict.get(record[4]) if level_name_dict is None: level_name_dict = {} sensor_name_dict[record[4]] = level_name_dict assert level_name_dict.get(record[5]) is None, 'Duplicated band_tag record' level_name_dict[record[5]] = { 'tile_layer': record[6], 'nominal_centre': record[7], 'nominal_bandwidth': record[8], 'centre_tolerance': record[9], 'bandwidth_tolerance': record[10], 'adjustment_offset': record[11], 'adjustment_multiplier': record[12], 'lookup_scheme_id': record[13], 'satellite_id': record[14], 'sensor_id': record[15], 'band_id': record[16], 'master_band_name': record[17], 'band_type_name': record[18], 'min_wavelength': record[19], 'max_wavelength': record[20] } log_multiline(logger.debug, BandLookup._band_lookup_dict, 'BandLookup._band_lookup_dict', '\t')
record = db_cursor.fetchone() if record: return record[0] else: return 0 if __name__ == '__main__': # Set top level standard output console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) console_formatter = logging.Formatter('%(message)s') console_handler.setFormatter(console_formatter) datacube = DataCube() log_multiline(logger.info, datacube.__dict__, 'Datacube contents', '\t') # Test locking mechanism datacube.clear_all_locks(lock_object='***lock_test***') logger.info('clear_all_locks test passed: %s', not datacube.check_object_locked('***lock_test***')) datacube.lock_object('***lock_test***') logger.info('lock_object test passed: %s', bool(datacube.check_object_locked('***lock_test***'))) datacube.unlock_object('***lock_test***') logger.info('unlock_object test passed: %s', not datacube.check_object_locked('***lock_test***'))
def delete_records(self): params = {'tiles_to_be_deleted_tuple': tuple(sorted(self.tile_records_to_delete.keys())), 'tiles_to_be_updated_tuple': tuple(sorted(self.tile_records_to_update.keys())), 'dataset_tuple': tuple(sorted(self.dataset_records.keys())), 'acquisition_tuple': tuple(sorted(self.acquisition_records.keys())) } if (params['tiles_to_be_deleted_tuple'] or params['tiles_to_be_updated_tuple'] or params['dataset_tuple'] or params['acquisition_tuple'] ): sql = ("""-- Delete non-overlapping tiles or overlap source tiles from nominated datasets delete from tile where tile_id in %(tiles_to_be_deleted_tuple)s; """ if params['tiles_to_be_deleted_tuple'] else '') + \ (""" -- Change tile class of overlap source tiles NOT from nominated datasets update tile set tile_class_id = 1 where tile_class_id = 3 and tile_id in %(tiles_to_be_updated_tuple)s; """ if params['tiles_to_be_updated_tuple'] else '') + \ (""" -- Delete datasets delete from dataset where dataset_id in %(dataset_tuple)s and not exists ( select tile_id from tile where dataset_id in %(dataset_tuple)s""" + \ (""" and tile_id not in %(tiles_to_be_deleted_tuple)s """ if params['tiles_to_be_deleted_tuple'] else '') + \ """ ); """ if params['dataset_tuple'] else '') + \ (""" -- Delete acquisitions not shared by other not-nominated datasets delete from acquisition where acquisition_id in %(acquisition_tuple)s and not exists ( select dataset_id from dataset where acquisition_id in %(acquisition_tuple)s and dataset_id not in %(dataset_tuple)s ); """ if params['dataset_tuple'] else '') log_multiline(logger.debug, self.db_cursor.mogrify(sql, params), 'SQL', '\t') if self.dryrun: print('\nDRY RUN ONLY!') print('Record-deleting SQL:') print(self.db_cursor.mogrify(sql, params)) print() print('Tile files which would be deleted:') for tile_pathname in sorted([tile_record['tile_pathname'] for tile_record in self.tile_records_to_delete.values()]): print('\t%s' % tile_pathname) print() else: self.db_cursor.execute(sql, params) print('Records deleted/updated successfully') self.remove_files(sorted([tile_record['tile_pathname'] for tile_record in self.tile_records_to_delete.values()])) print('Tile files removed successfully') else: print('No tiles, datasets or acquisitions to delete or modify')
def __init__(self, config=None): self.agdc_root = os.path.dirname(__file__) self.db_connection = None self.host = None self.dbname = None self.user = None self.password = None # Default schemas: can be overridden in config file. self.schemas = 'agdc, public, gis, topology' self.process_id = os.getenv('PBS_O_HOST', socket.gethostname()) + ':' + os.getenv('PBS_JOBID', str(os.getpid())) def open_config(config_file): assert os.path.exists(config_file), config_file + " does not exist" logger.debug(' Opening conf file %s', repr(config_file)) _config_parser = ConfigParser.SafeConfigParser(allow_no_value=True) _config_parser.read(config_file) assert _config_parser.has_section(DataCube.SECTION_NAME), 'No %s section defined in conf file' % DataCube.SECTION_NAME return _config_parser def string_to_boolean(bool_string): return bool_string[0].lower() in ['t', '1'] args = self.parse_args() self.debug = args.debug if self.debug: logger.setLevel(logging.DEBUG) logger.debug('datacube module logging level set to DEBUG') log_multiline(logger.debug, args.__dict__, 'args.__dict__', '\t') # Default conf file is agdc_default.conf - show absolute pathname in error messages config_file = config or os.path.abspath(args.config_file or os.path.join(self.agdc_root, 'agdc_default.conf')) config_parser = open_config(config_file) # Set instance attributes for every value in config file for attribute_name in config_parser.options(DataCube.SECTION_NAME): attribute_value = config_parser.get(DataCube.SECTION_NAME, attribute_name) self.__setattr__(attribute_name, attribute_value) # Set instance attributes for every value in command line arguments file for attribute_name in args.__dict__.keys(): attribute_value = args.__dict__[attribute_name] if attribute_value: self.__setattr__(attribute_name, attribute_value) self.create_directory(self.temp_dir) self.port = int(self.port) self.db_connection = self.create_connection() # Store tile type info in dict stucture db_cursor = self.db_connection.cursor() sql = """-- Retrieve all tile_type information select tile_type_id, tile_type_name, crs, x_origin, y_origin, x_size, y_size, x_pixels, y_pixels, unit, file_format, file_extension, format_options, tile_directory, x_size / x_pixels as x_pixel_size, y_size / y_pixels as y_pixel_size from tile_type """ log_multiline(logger.debug, sql, 'SQL', '\t') db_cursor.execute(sql) self.tile_type_dict = {} for record in db_cursor: tile_type_info = { 'tile_type_id': record[0], 'tile_type_name': record[1], 'crs': record[2], 'x_origin': record[3], 'y_origin': record[4], 'x_size': record[5], 'y_size': record[6], 'x_pixels': record[7], 'y_pixels': record[8], 'unit': record[9], 'file_format': record[10], 'file_extension': record[11], 'format_options': record[12], 'tile_directory': record[13], 'x_pixel_size': record[14], 'y_pixel_size': record[15] } self.tile_type_dict[record[0]] = tile_type_info # Store bands in nested dict stucture self.bands = {} db_cursor = self.db_connection.cursor() sql = """-- Retrieve all band information (including derived bands) select tile_type_id, coalesce(satellite_tag, 'DERIVED') as satellite_tag, coalesce(sensor_name, level_name) as sensor_name, band_id, sensor_id, band_name, band_type_name, file_number, resolution, min_wavelength, max_wavelength, file_pattern, level_name, tile_layer, band_tag, resampling_method, nodata_value from band ba inner join band_type bt using(band_type_id) inner join band_source bs using (band_id) inner join processing_level pl using(level_id) left join sensor se using(satellite_id, sensor_id) left join satellite sa using(satellite_id) order by tile_type_id,satellite_name, sensor_name, level_name, tile_layer """ log_multiline(logger.debug, sql, 'SQL', '\t') db_cursor.execute(sql) for record in db_cursor: # self.bands is keyed by tile_type_id band_dict = self.bands.get(record[0], {}) if not band_dict: # New dict needed self.bands[record[0]] = band_dict # sensor_dict is keyed by (satellite_tag, sensor_name) sensor_dict = band_dict.get((record[1], record[2]), {}) if not sensor_dict: # New dict needed band_dict[(record[1], record[2])] = sensor_dict band_info = {} band_info['band_id'] = record[3] band_info['band_name'] = record[5] band_info['band_type'] = record[6] band_info['file_number'] = record[7] band_info['resolution'] = record[8] band_info['min_wavelength'] = record[9] band_info['max_wavelength'] = record[10] band_info['file_pattern'] = record[11] band_info['level_name'] = record[12] band_info['tile_layer'] = record[13] band_info['band_tag'] = record[14] band_info['resampling_method'] = record[15] band_info['nodata_value'] = record[16] sensor_dict[record[7]] = band_info # file_number - must be unique for a given satellite/sensor or derived level log_multiline(logger.debug, self.bands, 'self.bands', '\t')
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ log_multiline(logger.debug, input_dataset_dict, "input_dataset_dict", "\t") # Definitions for mapping NBAR values to RGB rgb_bands = ("SWIR1", "NIR", "G") rgb_minmax = ((780, 5100), (200, 4500), (100, 2300)) # Min/Max scaled values to map to 1-255 nbar_dataset_info = input_dataset_dict.get("NBAR") # Only need NBAR data for NDVI # thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands if not nbar_dataset_info: log_multiline(logger.warning, input_dataset_dict, "NBAR dict does not exist", "\t") return None # Instantiate band lookup object with all required lookup parameters lookup = BandLookup( data_cube=self, lookup_scheme_name="LANDSAT-LS5/7", tile_type_id=tile_type_info["tile_type_id"], satellite_tag=nbar_dataset_info["satellite_tag"], sensor_name=nbar_dataset_info["sensor_name"], level_name=nbar_dataset_info["level_name"], ) nbar_dataset_path = nbar_dataset_info["tile_pathname"] output_tile_path = os.path.join( self.output_dir, re.sub(r"\.\w+$", "_RGB.tif", os.path.basename(nbar_dataset_path)) ) if os.path.exists(output_tile_path): logger.info("Skipping existing file %s", output_tile_path) return None if not self.lock_object(output_tile_path): logger.info("Skipping locked file %s", output_tile_path) return None input_dataset = gdal.Open(nbar_dataset_path) assert input_dataset, "Unable to open dataset %s" % nbar_dataset_path # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI if nbar_dataset_info["satellite_tag"] == "LS8" and nbar_dataset_info["sensor_name"] == "OLI": pqa_mask = numpy.ones(shape=(input_dataset.RasterYSize, input_dataset.RasterXSize), dtype=numpy.bool) logger.debug("Work-around for LS8-OLI PQA issue applied: EVERYTHING PASSED") else: if input_dataset_dict.get("PQA") is None: # No PQA tile available return # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(pqa_dataset_path=input_dataset_dict["PQA"]["tile_pathname"]) log_multiline(logger.debug, pqa_mask, "pqa_mask", "\t") gdal_driver = gdal.GetDriverByName("GTiff") output_dataset = gdal_driver.Create( output_tile_path, input_dataset.RasterXSize, input_dataset.RasterYSize, 3, gdal.GDT_Byte, ["INTERLEAVE=PIXEL", "COMPRESS=LZW"], # ,'BIGTIFF=YES'] ) assert output_dataset, "Unable to open output dataset %s" % output_dataset output_dataset.SetGeoTransform(input_dataset.GetGeoTransform()) output_dataset.SetProjection(input_dataset.GetProjection()) for band_index in range(3): logger.debug( "Processing %s band in layer %s as band %s", rgb_bands[band_index], lookup.band_no[rgb_bands[band_index]], band_index + 1, ) # Offset byte values by 1 to avoid transparency bug scale = (rgb_minmax[band_index][1] - rgb_minmax[band_index][0]) / 254.0 offset = 1.0 - rgb_minmax[band_index][0] / scale input_array = input_dataset.GetRasterBand(lookup.band_no[rgb_bands[band_index]]).ReadAsArray() log_multiline(logger.debug, input_array, "input_array", "\t") output_array = (input_array / scale + offset).astype(numpy.byte) # Set out-of-range values to minimum or maximum as required output_array[input_array < rgb_minmax[band_index][0]] = 1 output_array[input_array > rgb_minmax[band_index][1]] = 255 output_array[~pqa_mask] = 0 # Apply PQA Mask log_multiline(logger.debug, output_array, "output_array", "\t") output_band = output_dataset.GetRasterBand(band_index + 1) output_band.WriteArray(output_array) output_band.SetNoDataValue(0) output_band.FlushCache() output_dataset.FlushCache() self.unlock_object(output_tile_path) logger.info("Finished writing RGB file %s", output_tile_path) return None # Don't build a stack file
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: nbar_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): stack_output_info: dict containing stack output information. Obtained from stacker object. A sample is shown below stack_output_info = {'x_index': 144, 'y_index': -36, 'stack_output_dir': '/g/data/v10/tmp/ndvi', 'start_datetime': None, # Datetime object or None 'end_datetime': None, # Datetime object or None 'satellite': None, # String or None 'sensor': None} # String or None Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'nbar_dataset_dict must be a dict' dtype = gdalconst.GDT_Float32 # All output is to be float32 no_data_value = numpy.nan log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands # Need to skip tiles which don't have an NBAR tile (i.e. for non-mosaiced FC tiles at W & E sides of test area) if nbar_dataset_info is None: logger.warning('NBAR tile does not exist') return None # Nasty work-around for bad PQA due to missing thermal bands for LS8-OLI if nbar_dataset_info['satellite_tag'] == 'LS8' and nbar_dataset_info['sensor_name'] == 'OLI': logger.debug('Work-around for LS8-OLI PQA issue applied: TILE SKIPPED') return None # Instantiate band lookup object with all required lookup parameters lookup = BandLookup(data_cube=self, lookup_scheme_name='LANDSAT-LS5/7', tile_type_id=tile_type_info['tile_type_id'], satellite_tag=nbar_dataset_info['satellite_tag'], sensor_name=nbar_dataset_info['sensor_name'], level_name=nbar_dataset_info['level_name'] ) nbar_dataset_path = nbar_dataset_info['tile_pathname'] if input_dataset_dict.get('PQA') is None: # No PQA tile available return # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) log_multiline(logger.debug, pqa_mask, 'pqa_mask', '\t') nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset band_array = None; # List of outputs to generate from each file output_tag_list = ['B', 'G', 'R', 'NIR', 'SWIR1', 'SWIR2', 'NDVI', 'EVI', 'NDSI', 'NDMI', 'SLAVI', 'SATVI'] for output_tag in sorted(output_tag_list): # List of outputs to generate from each file # TODO: Make the stack file name reflect the date range output_stack_path = os.path.join(self.output_dir, re.sub(r'\+', '', '%s_%+04d_%+04d' % (output_tag, stack_output_info['x_index'], stack_output_info['y_index']))) if stack_output_info['start_datetime']: output_stack_path += '_%s' % stack_output_info['start_datetime'].strftime('%Y%m%d') if stack_output_info['end_datetime']: output_stack_path += '_%s' % stack_output_info['end_datetime'].strftime('%Y%m%d') output_stack_path += '_pqa_stack.vrt' output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$', tile_type_info['file_extension'], re.sub('NBAR', output_tag, os.path.basename(nbar_dataset_path) ) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find tiles to stack output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 output_dataset_info['nodata_value'] = no_data_value # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path): if self.lock_object(output_tile_path): # Test for concurrent writes to the same file try: # Read whole nbar_dataset into one array. # 62MB for float32 data should be OK for memory depending on what else happens downstream if band_array is None: # Convert to float32 for arithmetic and scale back to 0~1 reflectance band_array = (nbar_dataset.ReadAsArray().astype(numpy.float32)) / SCALE_FACTOR log_multiline(logger.debug, band_array, 'band_array', '\t') # Adjust bands if required for band_tag in lookup.bands: if lookup.adjustment_multiplier[band_tag] != 1.0 or lookup.adjustment_offset[band_tag] != 0.0: logger.debug('Band values adjusted: %s = %s * %s + %s', band_tag, band_tag, lookup.adjustment_multiplier[band_tag], lookup.adjustment_offset[band_tag]) band_array[lookup.band_index[band_tag]] = band_array[lookup.band_index[band_tag]] * lookup.adjustment_multiplier[band_tag] + lookup.adjustment_offset[band_tag] log_multiline(logger.debug, band_array, 'adjusted band_array', '\t') # Re-project issues with PQ. REDO the contiguity layer. non_contiguous = (band_array < 0).any(0) pqa_mask[non_contiguous] = False log_multiline(logger.debug, pqa_mask, 'enhanced pqa_mask', '\t') gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate each output here # Remember band_array indices are zero-based if output_tag in lookup.bands: # One of the band tags # Copy values data_array = band_array[lookup.band_index[output_tag]].copy() elif output_tag == 'NDVI': data_array = numexpr.evaluate("((NIR_array - R_array) / (NIR_array + R_array)) + 1", {'NIR_array': band_array[lookup.band_index['NIR']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'EVI': data_array = numexpr.evaluate("(2.5 * ((NIR_array - R_array) / (NIR_array + (6 * R_array) - (7.5 * B_array) + 1))) + 1", {'NIR_array': band_array[lookup.band_index['NIR']], 'R_array':band_array[lookup.band_index['R']], 'B_array':band_array[lookup.band_index['B']] }) elif output_tag == 'NDSI': data_array = numexpr.evaluate("((R_array - SWIR1_array) / (R_array + SWIR1_array)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'NDMI': data_array = numexpr.evaluate("((NIR_array - SWIR1_array) / (NIR_array + SWIR1_array)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'NIR_array': band_array[lookup.band_index['NIR']] }) elif output_tag == 'SLAVI': data_array = numexpr.evaluate("NIR_array / (R_array + SWIR1_array)", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'NIR_array': band_array[lookup.band_index['NIR']], 'R_array': band_array[lookup.band_index['R']] }) elif output_tag == 'SATVI': data_array = numexpr.evaluate("(((SWIR1_array - R_array) / (SWIR1_array + R_array + 0.5)) * 1.5 - (SWIR2_array / 2)) + 1", {'SWIR1_array': band_array[lookup.band_index['SWIR1']], 'SWIR2_array':band_array[lookup.band_index['SWIR2']], 'R_array':band_array[lookup.band_index['R']] }) else: raise Exception('Invalid operation') log_multiline(logger.debug, data_array, 'data_array', '\t') if no_data_value: self.apply_pqa_mask(data_array=data_array, pqa_mask=pqa_mask, no_data_value=no_data_value) log_multiline(logger.debug, data_array, 'masked data_array', '\t') gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, dtype, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(output_dataset_info['nodata_value']) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing dataset %s', output_tile_path) finally: self.unlock_object(output_tile_path) else: logger.info('Skipped locked dataset %s', output_tile_path) sleep(5) #TODO: Find a nicer way of dealing with contention for the same output tile else: logger.info('Skipped existing dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
# This is the main function when this script is directly executed - You can mostly # ignore it's contents. The bulk of the "interesting work" is in the above class if __name__ == '__main__': def date2datetime(input_date, time_offset=time.min): if not input_date: return None return datetime.combine(input_date, time_offset) # Stacker class takes care of command line parameters stacker = PQAStacker() if stacker.debug: console_handler.setLevel(logging.DEBUG) # Check for required command line parameters assert (stacker.x_index and stacker.y_index), 'You must specify Tile X/Y-index (-x/-y or --x_index/--y_index)' assert stacker.output_dir, 'Output directory not specified (-o or --output)' stack_info_dict = stacker.stack_derived(x_index=stacker.x_index, y_index=stacker.y_index, stack_output_dir=stacker.output_dir, start_datetime=date2datetime(stacker.start_date, time.min), end_datetime=date2datetime(stacker.end_date, time.max), satellite=stacker.satellite, sensor=stacker.sensor) log_multiline(logger.debug, stack_info_dict, 'stack_info_dict', '\t') logger.info('Finished creating %d temporal stack files in %s.', len(stack_info_dict), stacker.output_dir)
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): """ Overrides abstract function in stacker class. Called in Stacker.stack_derived() function. Creates PQA-masked NDVI stack Arguments: input_dataset_dict: Dict keyed by processing level (e.g. ORTHO, NBAR, PQA, DEM) containing all tile info which can be used within the function A sample is shown below (including superfluous band-specific information): { 'NBAR': {'band_name': 'Visible Blue', 'band_tag': 'B10', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NBAR', 'nodata_value': -999L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_NBAR_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'ORTHO': {'band_name': 'Thermal Infrared (Low Gain)', 'band_tag': 'B61', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'ORTHO', 'nodata_value': 0L, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_ORTHO_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25}, 'PQA': {'band_name': 'Pixel Quality Assurance', 'band_tag': 'PQA', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'PQA', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/datacube/EPSG4326_1deg_0.00025pixel/LS7_ETM/150_-025/2000/LS7_ETM_PQA_150_-025_2000-02-09T23-46-12.722217.tif, 'x_index': 150, 'y_index': -25} } Arguments (Cont'd): tile_type_info: dict containing tile type information. Obtained from stacker object (e.g: stacker.tile_type_dict[tile_type_id]). A sample is shown below {'crs': 'EPSG:4326', 'file_extension': '.tif', 'file_format': 'GTiff', 'format_options': 'COMPRESS=LZW,BIGTIFF=YES', 'tile_directory': 'EPSG4326_1deg_0.00025pixel', 'tile_type_id': 1L, 'tile_type_name': 'Unprojected WGS84 1-degree at 4000 pixels/degree', 'unit': 'degree', 'x_origin': 0.0, 'x_pixel_size': Decimal('0.00025000000000000000'), 'x_pixels': 4000L, 'x_size': 1.0, 'y_origin': 0.0, 'y_pixel_size': Decimal('0.00025000000000000000'), 'y_pixels': 4000L, 'y_size': 1.0} Function must create one or more GDAL-supported output datasets. Useful functions in the Stacker class include Stacker.get_pqa_mask(), but it is left to the coder to produce exactly what is required for a single slice of the temporal stack of derived quantities. Returns: output_dataset_info: Dict keyed by stack filename containing metadata info for GDAL-supported output datasets created by this function. Note that the key(s) will be used as the output filename for the VRT temporal stack and each dataset created must contain only a single band. An example is as follows: {'/g/data/v10/tmp/ndvi/NDVI_stack_150_-025.vrt': {'band_name': 'Normalised Differential Vegetation Index with PQA applied', 'band_tag': 'NDVI', 'end_datetime': datetime.datetime(2000, 2, 9, 23, 46, 36, 722217), 'end_row': 77, 'level_name': 'NDVI', 'nodata_value': None, 'path': 91, 'satellite_tag': 'LS7', 'sensor_name': 'ETM+', 'start_datetime': datetime.datetime(2000, 2, 9, 23, 46, 12, 722217), 'start_row': 77, 'tile_layer': 1, 'tile_pathname': '/g/data/v10/tmp/ndvi/LS7_ETM_NDVI_150_-025_2000-02-09T23-46-12.722217.tif', 'x_index': 150, 'y_index': -25} } """ assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict' log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') # Test function to copy ORTHO & NBAR band datasets with pixel quality mask applied # to an output directory for stacking output_dataset_dict = {} nbar_dataset_info = input_dataset_dict.get('NBAR') # Only need NBAR data for NDVI if nbar_dataset_info is None: return #thermal_dataset_info = input_dataset_dict['ORTHO'] # Could have one or two thermal bands # Instantiate band lookup object with all required lookup parameters lookup = BandLookup(data_cube=self, lookup_scheme_name='LANDSAT-LS5/7', tile_type_id=tile_type_info['tile_type_id'], satellite_tag=nbar_dataset_info['satellite_tag'], sensor_name=nbar_dataset_info['sensor_name'], level_name=nbar_dataset_info['level_name'] ) nbar_dataset_path = nbar_dataset_info['tile_pathname'] #======================================================================= # # Generate sorted list of band info for this tile type, satellite and sensor # band_dict = self.bands[tile_type_info['tile_type_id']][(nbar_dataset_info['satellite_tag'], nbar_dataset_info['sensor_name'])] # band_info_list = [band_dict[tile_layer] for tile_layer in sorted(band_dict.keys()) if band_dict[tile_layer]['level_name'] == 'NBAR'] #======================================================================= # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset logger.debug('Opened NBAR dataset %s', nbar_dataset_path) #no_data_value = nbar_dataset_info['nodata_value'] no_data_value = -32767 # Need a value outside the scaled range -10000 - +10000 for output_tag in ['NDVI']: # List of outputs to generate from each file - just NDVI at this stage. output_stack_path = os.path.join(self.output_dir, '%s_pqa_masked.vrt' % output_tag) output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$', '_%s%s' % (output_tag, tile_type_info['file_extension']), os.path.basename(nbar_dataset_path) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find output_dataset_info['band_name'] = '%s with PQA mask applied' % output_tag output_dataset_info['band_tag'] = '%s-PQA' % output_tag output_dataset_info['tile_layer'] = 1 # Check for existing, valid file if self.refresh or not os.path.exists(output_tile_path) or not gdal.Open(output_tile_path): gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, nbar_dataset.GetRasterBand(1).DataType, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) # Calculate NDVI here # Remember band indices are one-based try: # Read and adjust arrays for NIR and R NIR_array = nbar_dataset.GetRasterBand(lookup.band_no['NIR']).ReadAsArray() * lookup.adjustment_multiplier['NIR'] + lookup.adjustment_offset['NIR'] * SCALE_FACTOR R_array = nbar_dataset.GetRasterBand(lookup.band_no['R']).ReadAsArray() * lookup.adjustment_multiplier['R'] + lookup.adjustment_offset['R'] * SCALE_FACTOR except TypeError: return data_array = numpy.true_divide(NIR_array - R_array, NIR_array + R_array) * SCALE_FACTOR self.apply_pqa_mask(data_array, pqa_mask, no_data_value) output_band.WriteArray(data_array) output_band.SetNoDataValue(no_data_value) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing %s', output_tile_path) else: logger.info('Skipped existing, valid dataset %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # log_multiline(logger.debug, output_dataset_info, 'output_dataset_info', '\t') log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') # NDVI dataset processed - return info return output_dataset_dict
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict' log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') # Figure out our input/output files nbar_dataset_path = input_dataset_dict['NBAR']['tile_pathname'] nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset total_bands = nbar_dataset.RasterCount logger.debug('Opened NBAR dataset %s', nbar_dataset_path) # Get the pixel mask as a single numpy array # Be mindful of memory usage, should be fine in this instance pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) # Instead of receiving one entry, this will have a number of entries = to the number of bands output_dataset_dict = {} # Instead of creating 1 file with many bands # Let's create many files with a single band for index in range(1, total_bands + 1): output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$', '_pqa_masked_band_%s%s' % (index, tile_type_info['file_extension']), os.path.basename(nbar_dataset_path))) output_stack_path = os.path.join(self.output_dir, 'pqa_masked_band_%s.vrt' % (index)) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(input_dataset_dict['NBAR']) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find output_dataset_info['band_name'] = 'NBAR band %s with PQA mask applied' % (index) output_dataset_info['band_tag'] = 'NBAR-PQA-%s' % (index) output_dataset_info['tile_layer'] = 1 # Create a new geotiff for the masked output gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, nbar_dataset.GetRasterBand(index).DataType, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) # Mask our band (each band is a numpy array of values) input_band = nbar_dataset.GetRasterBand(index) input_band_data = input_band.ReadAsArray() # Apply the mask in place on input_band_data no_data_value = -32767 self.apply_pqa_mask(input_band_data, pqa_mask, no_data_value) # Write the data as a new band output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(input_band_data) output_band.SetNoDataValue(no_data_value) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) output_dataset.FlushCache() logger.info('Finished writing %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info log_multiline(logger.debug, output_dataset_dict, 'output_dataset_dict', '\t') return output_dataset_dict
def derive_datasets(self, input_dataset_dict, stack_output_info, tile_type_info): assert type(input_dataset_dict) == dict, 'input_dataset_dict must be a dict' log_multiline(logger.debug, input_dataset_dict, 'input_dataset_dict', '\t') output_dataset_dict = {} nbar_dataset_info = input_dataset_dict['NBAR'] # Only need NBAR data for NDVI nbar_dataset_path = nbar_dataset_info['tile_pathname'] # Get a boolean mask from the PQA dataset (use default parameters for mask and dilation) pqa_mask = self.get_pqa_mask(input_dataset_dict['PQA']['tile_pathname']) nbar_dataset = gdal.Open(nbar_dataset_path) assert nbar_dataset, 'Unable to open dataset %s' % nbar_dataset logger.debug('Opened NBAR dataset %s', nbar_dataset_path) #no_data_value = nbar_dataset_info['nodata_value'] no_data_value = -32767 # Need a value outside the scaled range -10000 - +10000 output_stack_path = os.path.join(self.output_dir, 'NDVI_pqa_masked.vrt') output_tile_path = os.path.join(self.output_dir, re.sub(r'\.\w+$', '_NDVI%s' % (tile_type_info['file_extension']), os.path.basename(nbar_dataset_path) ) ) # Copy metadata for eventual inclusion in stack file output # This could also be written to the output tile if required output_dataset_info = dict(nbar_dataset_info) output_dataset_info['tile_pathname'] = output_tile_path # This is the most important modification - used to find output_dataset_info['band_name'] = 'NDVI with PQA mask applied' output_dataset_info['band_tag'] = 'NDVI-PQA' output_dataset_info['tile_layer'] = 1 # NBAR bands into 2D NumPy arrays. near_ir_band_data = nbar_dataset.GetRasterBand(4).ReadAsArray() # Near Infrared light visible_band_data = nbar_dataset.GetRasterBand(3).ReadAsArray() # Red Visible Light logger.debug('near_ir_band_data = %s', near_ir_band_data) logger.debug('visible_band_data = %s', visible_band_data) logger.debug('SCALE_FACTOR = %s', SCALE_FACTOR) # Calculate NDVI for every element in the array using # ((NIR - VIS) / (NIR + VIS)) * SCALE_FACTOR # HINT - Use numpy.true_divide(numerator, denominator) to avoid divide by 0 errors data_array = numpy.true_divide(near_ir_band_data - visible_band_data, (near_ir_band_data + visible_band_data)) * SCALE_FACTOR self.apply_pqa_mask(data_array, pqa_mask, no_data_value) # Create our output file gdal_driver = gdal.GetDriverByName(tile_type_info['file_format']) output_dataset = gdal_driver.Create(output_tile_path, nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, 1, nbar_dataset.GetRasterBand(1).DataType, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s'% output_dataset output_dataset.SetGeoTransform(nbar_dataset.GetGeoTransform()) output_dataset.SetProjection(nbar_dataset.GetProjection()) output_band = output_dataset.GetRasterBand(1) output_band.WriteArray(data_array) output_band.SetNoDataValue(no_data_value) output_band.FlushCache() # This is not strictly necessary - copy metadata to output dataset output_dataset_metadata = nbar_dataset.GetMetadata() if output_dataset_metadata: output_dataset.SetMetadata(output_dataset_metadata) log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') output_dataset.FlushCache() logger.info('Finished writing %s', output_tile_path) output_dataset_dict[output_stack_path] = output_dataset_info # NDVI dataset processed - return info return output_dataset_dict